1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 		  tree vectype, int misalign,
96 		  enum vect_cost_model_location where)
97 {
98   if ((kind == vector_load || kind == unaligned_load)
99       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100     kind = vector_gather_load;
101   if ((kind == vector_store || kind == unaligned_store)
102       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103     kind = vector_scatter_store;
104 
105   stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106   body_cost_vec->safe_push (si);
107 
108   return (unsigned)
109       (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111 
112 /* Return a variable of type ELEM_TYPE[NELEMS].  */
113 
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 			 "vect_array");
119 }
120 
121 /* ARRAY is an array of vectors created by create_vector_array.
122    Return an SSA_NAME for the vector in index N.  The reference
123    is part of the vectorization of STMT_INFO and the vector is associated
124    with scalar destination SCALAR_DEST.  */
125 
126 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (vec_info *vinfo,
128 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131   tree vect_type, vect, vect_name, array_ref;
132   gimple *new_stmt;
133 
134   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135   vect_type = TREE_TYPE (TREE_TYPE (array));
136   vect = vect_create_destination_var (scalar_dest, vect_type);
137   array_ref = build4 (ARRAY_REF, vect_type, array,
138 		      build_int_cst (size_type_node, n),
139 		      NULL_TREE, NULL_TREE);
140 
141   new_stmt = gimple_build_assign (vect, array_ref);
142   vect_name = make_ssa_name (vect, new_stmt);
143   gimple_assign_set_lhs (new_stmt, vect_name);
144   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145 
146   return vect_name;
147 }
148 
149 /* ARRAY is an array of vectors created by create_vector_array.
150    Emit code to store SSA_NAME VECT in index N of the array.
151    The store is part of the vectorization of STMT_INFO.  */
152 
153 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)154 write_vector_array (vec_info *vinfo,
155 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158   tree array_ref;
159   gimple *new_stmt;
160 
161   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 		      build_int_cst (size_type_node, n),
163 		      NULL_TREE, NULL_TREE);
164 
165   new_stmt = gimple_build_assign (array_ref, vect);
166   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168 
169 /* PTR is a pointer to an array of type TYPE.  Return a representation
170    of *PTR.  The memory reference replaces those in FIRST_DR
171    (and its group).  */
172 
173 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176   tree mem_ref;
177 
178   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179   /* Arrays have the same alignment as their type.  */
180   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181   return mem_ref;
182 }
183 
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185    Emit the clobber before *GSI.  */
186 
187 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 		       gimple_stmt_iterator *gsi, tree var)
190 {
191   tree clobber = build_clobber (TREE_TYPE (var));
192   gimple *new_stmt = gimple_build_assign (var, clobber);
193   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195 
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
197 
198 /* Function vect_mark_relevant.
199 
200    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
201 
202 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 		    enum vect_relevant relevant, bool live_p)
205 {
206   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 
209   if (dump_enabled_p ())
210     dump_printf_loc (MSG_NOTE, vect_location,
211 		     "mark relevant %d, live %d: %G", relevant, live_p,
212 		     stmt_info->stmt);
213 
214   /* If this stmt is an original stmt in a pattern, we might need to mark its
215      related pattern stmt instead of the original stmt.  However, such stmts
216      may have their own uses that are not in any pattern, in such cases the
217      stmt itself should be marked.  */
218   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219     {
220       /* This is the last stmt in a sequence that was detected as a
221 	 pattern that can potentially be vectorized.  Don't mark the stmt
222 	 as relevant/live because it's not going to be vectorized.
223 	 Instead mark the pattern-stmt that replaces it.  */
224 
225       if (dump_enabled_p ())
226 	dump_printf_loc (MSG_NOTE, vect_location,
227 			 "last stmt in pattern. don't mark"
228 			 " relevant/live.\n");
229       stmt_vec_info old_stmt_info = stmt_info;
230       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234     }
235 
236   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238     STMT_VINFO_RELEVANT (stmt_info) = relevant;
239 
240   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242     {
243       if (dump_enabled_p ())
244         dump_printf_loc (MSG_NOTE, vect_location,
245                          "already marked relevant/live.\n");
246       return;
247     }
248 
249   worklist->safe_push (stmt_info);
250 }
251 
252 
253 /* Function is_simple_and_all_uses_invariant
254 
255    Return true if STMT_INFO is simple and all uses of it are invariant.  */
256 
257 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 				  loop_vec_info loop_vinfo)
260 {
261   tree op;
262   ssa_op_iter iter;
263 
264   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265   if (!stmt)
266     return false;
267 
268   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269     {
270       enum vect_def_type dt = vect_uninitialized_def;
271 
272       if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 	{
274 	  if (dump_enabled_p ())
275 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 			     "use not simple.\n");
277 	  return false;
278 	}
279 
280       if (dt != vect_external_def && dt != vect_constant_def)
281 	return false;
282     }
283   return true;
284 }
285 
286 /* Function vect_stmt_relevant_p.
287 
288    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289    is "relevant for vectorization".
290 
291    A stmt is considered "relevant for vectorization" if:
292    - it has uses outside the loop.
293    - it has vdefs (it alters memory).
294    - control stmts in the loop (except for the exit condition).
295 
296    CHECKME: what other side effects would the vectorizer allow?  */
297 
298 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 		      enum vect_relevant *relevant, bool *live_p)
301 {
302   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303   ssa_op_iter op_iter;
304   imm_use_iterator imm_iter;
305   use_operand_p use_p;
306   def_operand_p def_p;
307 
308   *relevant = vect_unused_in_scope;
309   *live_p = false;
310 
311   /* cond stmt other than loop exit cond.  */
312   if (is_ctrl_stmt (stmt_info->stmt)
313       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314     *relevant = vect_used_in_scope;
315 
316   /* changing memory.  */
317   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318     if (gimple_vdef (stmt_info->stmt)
319 	&& !gimple_clobber_p (stmt_info->stmt))
320       {
321 	if (dump_enabled_p ())
322 	  dump_printf_loc (MSG_NOTE, vect_location,
323                            "vec_stmt_relevant_p: stmt has vdefs.\n");
324 	*relevant = vect_used_in_scope;
325       }
326 
327   /* uses outside the loop.  */
328   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329     {
330       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 	{
332 	  basic_block bb = gimple_bb (USE_STMT (use_p));
333 	  if (!flow_bb_inside_loop_p (loop, bb))
334 	    {
335 	      if (is_gimple_debug (USE_STMT (use_p)))
336 		continue;
337 
338 	      if (dump_enabled_p ())
339 		dump_printf_loc (MSG_NOTE, vect_location,
340                                  "vec_stmt_relevant_p: used out of loop.\n");
341 
342 	      /* We expect all such uses to be in the loop exit phis
343 		 (because of loop closed form)   */
344 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 	      gcc_assert (bb == single_exit (loop)->dest);
346 
347               *live_p = true;
348 	    }
349 	}
350     }
351 
352   if (*live_p && *relevant == vect_unused_in_scope
353       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354     {
355       if (dump_enabled_p ())
356 	dump_printf_loc (MSG_NOTE, vect_location,
357 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358       *relevant = vect_used_only_live;
359     }
360 
361   return (*live_p || *relevant);
362 }
363 
364 
365 /* Function exist_non_indexing_operands_for_use_p
366 
367    USE is one of the uses attached to STMT_INFO.  Check if USE is
368    used in STMT_INFO for anything other than indexing an array.  */
369 
370 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373   tree operand;
374 
375   /* USE corresponds to some operand in STMT.  If there is no data
376      reference in STMT, then any operand that corresponds to USE
377      is not indexing an array.  */
378   if (!STMT_VINFO_DATA_REF (stmt_info))
379     return true;
380 
381   /* STMT has a data_ref. FORNOW this means that its of one of
382      the following forms:
383      -1- ARRAY_REF = var
384      -2- var = ARRAY_REF
385      (This should have been verified in analyze_data_refs).
386 
387      'var' in the second case corresponds to a def, not a use,
388      so USE cannot correspond to any operands that are not used
389      for array indexing.
390 
391      Therefore, all we need to check is if STMT falls into the
392      first case, and whether var corresponds to USE.  */
393 
394   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395   if (!assign || !gimple_assign_copy_p (assign))
396     {
397       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398       if (call && gimple_call_internal_p (call))
399 	{
400 	  internal_fn ifn = gimple_call_internal_fn (call);
401 	  int mask_index = internal_fn_mask_index (ifn);
402 	  if (mask_index >= 0
403 	      && use == gimple_call_arg (call, mask_index))
404 	    return true;
405 	  int stored_value_index = internal_fn_stored_value_index (ifn);
406 	  if (stored_value_index >= 0
407 	      && use == gimple_call_arg (call, stored_value_index))
408 	    return true;
409 	  if (internal_gather_scatter_fn_p (ifn)
410 	      && use == gimple_call_arg (call, 1))
411 	    return true;
412 	}
413       return false;
414     }
415 
416   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417     return false;
418   operand = gimple_assign_rhs1 (assign);
419   if (TREE_CODE (operand) != SSA_NAME)
420     return false;
421 
422   if (operand == use)
423     return true;
424 
425   return false;
426 }
427 
428 
429 /*
430    Function process_use.
431 
432    Inputs:
433    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435      that defined USE.  This is done by calling mark_relevant and passing it
436      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438      be performed.
439 
440    Outputs:
441    Generally, LIVE_P and RELEVANT are used to define the liveness and
442    relevance info of the DEF_STMT of this USE:
443        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445    Exceptions:
446    - case 1: If USE is used only for address computations (e.g. array indexing),
447    which does not need to be directly vectorized, then the liveness/relevance
448    of the respective DEF_STMT is left unchanged.
449    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450    we skip DEF_STMT cause it had already been processed.
451    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452    "relevant" will be modified accordingly.
453 
454    Return true if everything is as expected. Return false otherwise.  */
455 
456 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 	     bool force)
460 {
461   stmt_vec_info dstmt_vinfo;
462   enum vect_def_type dt;
463 
464   /* case 1: we are only interested in uses that need to be vectorized.  Uses
465      that are used for address computation are not considered relevant.  */
466   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467     return opt_result::success ();
468 
469   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470     return opt_result::failure_at (stmt_vinfo->stmt,
471 				   "not vectorized:"
472 				   " unsupported use in stmt.\n");
473 
474   if (!dstmt_vinfo)
475     return opt_result::success ();
476 
477   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478   basic_block bb = gimple_bb (stmt_vinfo->stmt);
479 
480   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481      We have to force the stmt live since the epilogue loop needs it to
482      continue computing the reduction.  */
483   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487       && bb->loop_father == def_bb->loop_father)
488     {
489       if (dump_enabled_p ())
490 	dump_printf_loc (MSG_NOTE, vect_location,
491 			 "reduc-stmt defining reduc-phi in the same nest.\n");
492       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493       return opt_result::success ();
494     }
495 
496   /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 	outer-loop-header-bb:
498 		d = dstmt_vinfo
499 	inner-loop:
500 		stmt # use (d)
501 	outer-loop-tail-bb:
502 		...		  */
503   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504     {
505       if (dump_enabled_p ())
506 	dump_printf_loc (MSG_NOTE, vect_location,
507                          "outer-loop def-stmt defining inner-loop stmt.\n");
508 
509       switch (relevant)
510 	{
511 	case vect_unused_in_scope:
512 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 		      vect_used_in_scope : vect_unused_in_scope;
514 	  break;
515 
516 	case vect_used_in_outer_by_reduction:
517           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 	  relevant = vect_used_by_reduction;
519 	  break;
520 
521 	case vect_used_in_outer:
522           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 	  relevant = vect_used_in_scope;
524 	  break;
525 
526 	case vect_used_in_scope:
527 	  break;
528 
529 	default:
530 	  gcc_unreachable ();
531 	}
532     }
533 
534   /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 	outer-loop-header-bb:
536 		...
537 	inner-loop:
538 		d = dstmt_vinfo
539 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 		stmt # use (d)		*/
541   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542     {
543       if (dump_enabled_p ())
544 	dump_printf_loc (MSG_NOTE, vect_location,
545                          "inner-loop def-stmt defining outer-loop stmt.\n");
546 
547       switch (relevant)
548         {
549         case vect_unused_in_scope:
550           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
553           break;
554 
555         case vect_used_by_reduction:
556 	case vect_used_only_live:
557           relevant = vect_used_in_outer_by_reduction;
558           break;
559 
560         case vect_used_in_scope:
561           relevant = vect_used_in_outer;
562           break;
563 
564         default:
565           gcc_unreachable ();
566         }
567     }
568   /* We are also not interested in uses on loop PHI backedges that are
569      inductions.  Otherwise we'll needlessly vectorize the IV increment
570      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
571      of course.  */
572   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 				      loop_latch_edge (bb->loop_father))
577 	       == use))
578     {
579       if (dump_enabled_p ())
580 	dump_printf_loc (MSG_NOTE, vect_location,
581                          "induction value on backedge.\n");
582       return opt_result::success ();
583     }
584 
585 
586   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587   return opt_result::success ();
588 }
589 
590 
591 /* Function vect_mark_stmts_to_be_vectorized.
592 
593    Not all stmts in the loop need to be vectorized. For example:
594 
595      for i...
596        for j...
597    1.    T0 = i + j
598    2.	 T1 = a[T0]
599 
600    3.    j = j + 1
601 
602    Stmt 1 and 3 do not need to be vectorized, because loop control and
603    addressing of vectorized data-refs are handled differently.
604 
605    This pass detects such stmts.  */
606 
607 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612   unsigned int nbbs = loop->num_nodes;
613   gimple_stmt_iterator si;
614   unsigned int i;
615   basic_block bb;
616   bool live_p;
617   enum vect_relevant relevant;
618 
619   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620 
621   auto_vec<stmt_vec_info, 64> worklist;
622 
623   /* 1. Init worklist.  */
624   for (i = 0; i < nbbs; i++)
625     {
626       bb = bbs[i];
627       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 	{
629 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 	  if (dump_enabled_p ())
631 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 			     phi_info->stmt);
633 
634 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 	}
637       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 	{
639 	  if (is_gimple_debug (gsi_stmt (si)))
640 	    continue;
641 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 	  if (dump_enabled_p ())
643 	      dump_printf_loc (MSG_NOTE, vect_location,
644 			       "init: stmt relevant? %G", stmt_info->stmt);
645 
646 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 	}
649     }
650 
651   /* 2. Process_worklist */
652   while (worklist.length () > 0)
653     {
654       use_operand_p use_p;
655       ssa_op_iter iter;
656 
657       stmt_vec_info stmt_vinfo = worklist.pop ();
658       if (dump_enabled_p ())
659 	dump_printf_loc (MSG_NOTE, vect_location,
660 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661 
662       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 	 of STMT.  */
665       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666 
667       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 	 propagated as is to the DEF_STMTs of its USEs.
669 
670 	 One exception is when STMT has been identified as defining a reduction
671 	 variable; in this case we set the relevance to vect_used_by_reduction.
672 	 This is because we distinguish between two kinds of relevant stmts -
673 	 those that are used by a reduction computation, and those that are
674 	 (also) used by a regular computation.  This allows us later on to
675 	 identify stmts that are used solely by a reduction, and therefore the
676 	 order of the results that they produce does not have to be kept.  */
677 
678       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679         {
680           case vect_reduction_def:
681 	    gcc_assert (relevant != vect_unused_in_scope);
682 	    if (relevant != vect_unused_in_scope
683 		&& relevant != vect_used_in_scope
684 		&& relevant != vect_used_by_reduction
685 		&& relevant != vect_used_only_live)
686 	      return opt_result::failure_at
687 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 	    break;
689 
690           case vect_nested_cycle:
691 	    if (relevant != vect_unused_in_scope
692 		&& relevant != vect_used_in_outer_by_reduction
693 		&& relevant != vect_used_in_outer)
694 	      return opt_result::failure_at
695 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696             break;
697 
698           case vect_double_reduction_def:
699 	    if (relevant != vect_unused_in_scope
700 		&& relevant != vect_used_by_reduction
701 		&& relevant != vect_used_only_live)
702 	      return opt_result::failure_at
703 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704             break;
705 
706           default:
707             break;
708         }
709 
710       if (is_pattern_stmt_p (stmt_vinfo))
711         {
712           /* Pattern statements are not inserted into the code, so
713              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714              have to scan the RHS or function arguments instead.  */
715 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 	    {
717 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 	      tree op = gimple_assign_rhs1 (assign);
719 
720 	      i = 1;
721 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 		{
723 		  opt_result res
724 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 				   loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 				     loop_vinfo, relevant, &worklist, false);
730 		  if (!res)
731 		    return res;
732 		  i = 2;
733 		}
734 	      for (; i < gimple_num_ops (assign); i++)
735 		{
736 		  op = gimple_op (assign, i);
737                   if (TREE_CODE (op) == SSA_NAME)
738 		    {
739 		      opt_result res
740 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 				       &worklist, false);
742 		      if (!res)
743 			return res;
744 		    }
745                  }
746             }
747 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 	    {
749 	      for (i = 0; i < gimple_call_num_args (call); i++)
750 		{
751 		  tree arg = gimple_call_arg (call, i);
752 		  opt_result res
753 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 				   &worklist, false);
755 		  if (!res)
756 		    return res;
757 		}
758 	    }
759         }
760       else
761 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762           {
763             tree op = USE_FROM_PTR (use_p);
764 	    opt_result res
765 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 			     &worklist, false);
767 	    if (!res)
768 	      return res;
769           }
770 
771       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 	{
773 	  gather_scatter_info gs_info;
774 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 	    gcc_unreachable ();
776 	  opt_result res
777 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 			   &worklist, true);
779 	  if (!res)
780 	    {
781 	      if (fatal)
782 		*fatal = false;
783 	      return res;
784 	    }
785 	}
786     } /* while worklist */
787 
788   return opt_result::success ();
789 }
790 
791 /* Function vect_model_simple_cost.
792 
793    Models cost for simple operations, i.e. those that only emit ncopies of a
794    single op.  Right now, this does not account for multiple insns that could
795    be generated for the single vector op.  We will handle that shortly.  */
796 
797 static void
798 vect_model_simple_cost (vec_info *,
799 			stmt_vec_info stmt_info, int ncopies,
800 			enum vect_def_type *dt,
801 			int ndts,
802 			slp_tree node,
803 			stmt_vector_for_cost *cost_vec,
804 			vect_cost_for_stmt kind = vector_stmt)
805 {
806   int inside_cost = 0, prologue_cost = 0;
807 
808   gcc_assert (cost_vec != NULL);
809 
810   /* ???  Somehow we need to fix this at the callers.  */
811   if (node)
812     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813 
814   if (!node)
815     /* Cost the "broadcast" of a scalar operand in to a vector operand.
816        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817        cost model.  */
818     for (int i = 0; i < ndts; i++)
819       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 					   stmt_info, 0, vect_prologue);
822 
823   /* Pass the inside-of-loop statements to the target-specific cost model.  */
824   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 				   stmt_info, 0, vect_body);
826 
827   if (dump_enabled_p ())
828     dump_printf_loc (MSG_NOTE, vect_location,
829                      "vect_model_simple_cost: inside_cost = %d, "
830                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832 
833 
834 /* Model cost for type demotion and promotion operations.  PWR is
835    normally zero for single-step promotions and demotions.  It will be
836    one if two-step promotion/demotion is required, and so on.  NCOPIES
837    is the number of vector results (and thus number of instructions)
838    for the narrowest end of the operation chain.  Each additional
839    step doubles the number of instructions required.  If WIDEN_ARITH
840    is true the stmt is doing widening arithmetic.  */
841 
842 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec,bool widen_arith)843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 				    enum vect_def_type *dt,
845 				    unsigned int ncopies, int pwr,
846 				    stmt_vector_for_cost *cost_vec,
847 				    bool widen_arith)
848 {
849   int i;
850   int inside_cost = 0, prologue_cost = 0;
851 
852   for (i = 0; i < pwr + 1; i++)
853     {
854       inside_cost += record_stmt_cost (cost_vec, ncopies,
855 				       widen_arith
856 				       ? vector_stmt : vec_promote_demote,
857 				       stmt_info, 0, vect_body);
858       ncopies *= 2;
859     }
860 
861   /* FORNOW: Assuming maximum 2 args per stmts.  */
862   for (i = 0; i < 2; i++)
863     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 					 stmt_info, 0, vect_prologue);
866 
867   if (dump_enabled_p ())
868     dump_printf_loc (MSG_NOTE, vect_location,
869                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
870                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 }
872 
873 /* Returns true if the current function returns DECL.  */
874 
875 static bool
cfun_returns(tree decl)876 cfun_returns (tree decl)
877 {
878   edge_iterator ei;
879   edge e;
880   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
881     {
882       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883       if (!ret)
884 	continue;
885       if (gimple_return_retval (ret) == decl)
886 	return true;
887       /* We often end up with an aggregate copy to the result decl,
888          handle that case as well.  First skip intermediate clobbers
889 	 though.  */
890       gimple *def = ret;
891       do
892 	{
893 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
894 	}
895       while (gimple_clobber_p (def));
896       if (is_a <gassign *> (def)
897 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 	  && gimple_assign_rhs1 (def) == decl)
899 	return true;
900     }
901   return false;
902 }
903 
904 /* Function vect_model_store_cost
905 
906    Models cost for stores.  In the case of grouped accesses, one access
907    has the overhead of the grouped access attributed to it.  */
908 
909 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 		       vect_memory_access_type memory_access_type,
912 		       dr_alignment_support alignment_support_scheme,
913 		       int misalignment,
914 		       vec_load_store_type vls_type, slp_tree slp_node,
915 		       stmt_vector_for_cost *cost_vec)
916 {
917   unsigned int inside_cost = 0, prologue_cost = 0;
918   stmt_vec_info first_stmt_info = stmt_info;
919   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920 
921   /* ???  Somehow we need to fix this at the callers.  */
922   if (slp_node)
923     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924 
925   if (vls_type == VLS_STORE_INVARIANT)
926     {
927       if (!slp_node)
928 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 					   stmt_info, 0, vect_prologue);
930     }
931 
932   /* Grouped stores update all elements in the group at once,
933      so we want the DR for the first statement.  */
934   if (!slp_node && grouped_access_p)
935     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936 
937   /* True if we should include any once-per-group costs as well as
938      the cost of the statement itself.  For SLP we only get called
939      once per group anyhow.  */
940   bool first_stmt_p = (first_stmt_info == stmt_info);
941 
942   /* We assume that the cost of a single store-lanes instruction is
943      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
944      access is instead being provided by a permute-and-store operation,
945      include the cost of the permutes.  */
946   if (first_stmt_p
947       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948     {
949       /* Uses a high and low interleave or shuffle operations for each
950 	 needed permute.  */
951       int group_size = DR_GROUP_SIZE (first_stmt_info);
952       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 				      stmt_info, 0, vect_body);
955 
956       if (dump_enabled_p ())
957         dump_printf_loc (MSG_NOTE, vect_location,
958                          "vect_model_store_cost: strided group_size = %d .\n",
959                          group_size);
960     }
961 
962   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963   /* Costs of the stores.  */
964   if (memory_access_type == VMAT_ELEMENTWISE
965       || memory_access_type == VMAT_GATHER_SCATTER)
966     {
967       /* N scalar stores plus extracting the elements.  */
968       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969       inside_cost += record_stmt_cost (cost_vec,
970 				       ncopies * assumed_nunits,
971 				       scalar_store, stmt_info, 0, vect_body);
972     }
973   else
974     vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 			 misalignment, &inside_cost, cost_vec);
976 
977   if (memory_access_type == VMAT_ELEMENTWISE
978       || memory_access_type == VMAT_STRIDED_SLP)
979     {
980       /* N scalar stores plus extracting the elements.  */
981       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982       inside_cost += record_stmt_cost (cost_vec,
983 				       ncopies * assumed_nunits,
984 				       vec_to_scalar, stmt_info, 0, vect_body);
985     }
986 
987   /* When vectorizing a store into the function result assign
988      a penalty if the function returns in a multi-register location.
989      In this case we assume we'll end up with having to spill the
990      vector result and do piecewise loads as a conservative estimate.  */
991   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992   if (base
993       && (TREE_CODE (base) == RESULT_DECL
994 	  || (DECL_P (base) && cfun_returns (base)))
995       && !aggregate_value_p (base, cfun->decl))
996     {
997       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998       /* ???  Handle PARALLEL in some way.  */
999       if (REG_P (reg))
1000 	{
1001 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 	  /* Assume that a single reg-reg move is possible and cheap,
1003 	     do not account for vector to gp register move cost.  */
1004 	  if (nregs > 1)
1005 	    {
1006 	      /* Spill.  */
1007 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 						 vector_store,
1009 						 stmt_info, 0, vect_epilogue);
1010 	      /* Loads.  */
1011 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 						 scalar_load,
1013 						 stmt_info, 0, vect_epilogue);
1014 	    }
1015 	}
1016     }
1017 
1018   if (dump_enabled_p ())
1019     dump_printf_loc (MSG_NOTE, vect_location,
1020                      "vect_model_store_cost: inside_cost = %d, "
1021                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1022 }
1023 
1024 
1025 /* Calculate cost of DR's memory access.  */
1026 void
vect_get_store_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 		     dr_alignment_support alignment_support_scheme,
1029 		     int misalignment,
1030 		     unsigned int *inside_cost,
1031 		     stmt_vector_for_cost *body_cost_vec)
1032 {
1033   switch (alignment_support_scheme)
1034     {
1035     case dr_aligned:
1036       {
1037 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 					  vector_store, stmt_info, 0,
1039 					  vect_body);
1040 
1041         if (dump_enabled_p ())
1042           dump_printf_loc (MSG_NOTE, vect_location,
1043                            "vect_model_store_cost: aligned.\n");
1044         break;
1045       }
1046 
1047     case dr_unaligned_supported:
1048       {
1049         /* Here, we assign an additional cost for the unaligned store.  */
1050 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 					  unaligned_store, stmt_info,
1052 					  misalignment, vect_body);
1053         if (dump_enabled_p ())
1054           dump_printf_loc (MSG_NOTE, vect_location,
1055                            "vect_model_store_cost: unaligned supported by "
1056                            "hardware.\n");
1057         break;
1058       }
1059 
1060     case dr_unaligned_unsupported:
1061       {
1062         *inside_cost = VECT_MAX_COST;
1063 
1064         if (dump_enabled_p ())
1065           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066                            "vect_model_store_cost: unsupported access.\n");
1067         break;
1068       }
1069 
1070     default:
1071       gcc_unreachable ();
1072     }
1073 }
1074 
1075 
1076 /* Function vect_model_load_cost
1077 
1078    Models cost for loads.  In the case of grouped accesses, one access has
1079    the overhead of the grouped access attributed to it.  Since unaligned
1080    accesses are supported for loads, we also account for the costs of the
1081    access scheme chosen.  */
1082 
1083 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,gather_scatter_info * gs_info,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1084 vect_model_load_cost (vec_info *vinfo,
1085 		      stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 		      vect_memory_access_type memory_access_type,
1087 		      dr_alignment_support alignment_support_scheme,
1088 		      int misalignment,
1089 		      gather_scatter_info *gs_info,
1090 		      slp_tree slp_node,
1091 		      stmt_vector_for_cost *cost_vec)
1092 {
1093   unsigned int inside_cost = 0, prologue_cost = 0;
1094   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1095 
1096   gcc_assert (cost_vec);
1097 
1098   /* ???  Somehow we need to fix this at the callers.  */
1099   if (slp_node)
1100     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1101 
1102   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1103     {
1104       /* If the load is permuted then the alignment is determined by
1105 	 the first group element not by the first scalar stmt DR.  */
1106       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107       /* Record the cost for the permutation.  */
1108       unsigned n_perms, n_loads;
1109       vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 				    vf, true, &n_perms, &n_loads);
1111       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 				       first_stmt_info, 0, vect_body);
1113 
1114       /* And adjust the number of loads performed.  This handles
1115 	 redundancies as well as loads that are later dead.  */
1116       ncopies = n_loads;
1117     }
1118 
1119   /* Grouped loads read all elements in the group at once,
1120      so we want the DR for the first statement.  */
1121   stmt_vec_info first_stmt_info = stmt_info;
1122   if (!slp_node && grouped_access_p)
1123     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 
1125   /* True if we should include any once-per-group costs as well as
1126      the cost of the statement itself.  For SLP we only get called
1127      once per group anyhow.  */
1128   bool first_stmt_p = (first_stmt_info == stmt_info);
1129 
1130   /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131      ones we actually need.  Account for the cost of unused results.  */
1132   if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1133     {
1134       unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135       stmt_vec_info next_stmt_info = first_stmt_info;
1136       do
1137 	{
1138 	  gaps -= 1;
1139 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1140 	}
1141       while (next_stmt_info);
1142       if (gaps)
1143 	{
1144 	  if (dump_enabled_p ())
1145 	    dump_printf_loc (MSG_NOTE, vect_location,
1146 			     "vect_model_load_cost: %d unused vectors.\n",
1147 			     gaps);
1148 	  vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 			      alignment_support_scheme, misalignment, false,
1150 			      &inside_cost, &prologue_cost,
1151 			      cost_vec, cost_vec, true);
1152 	}
1153     }
1154 
1155   /* We assume that the cost of a single load-lanes instruction is
1156      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1157      access is instead being provided by a load-and-permute operation,
1158      include the cost of the permutes.  */
1159   if (first_stmt_p
1160       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1161     {
1162       /* Uses an even and odd extract operations or shuffle operations
1163 	 for each needed permute.  */
1164       int group_size = DR_GROUP_SIZE (first_stmt_info);
1165       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 				       stmt_info, 0, vect_body);
1168 
1169       if (dump_enabled_p ())
1170         dump_printf_loc (MSG_NOTE, vect_location,
1171                          "vect_model_load_cost: strided group_size = %d .\n",
1172                          group_size);
1173     }
1174 
1175   /* The loads themselves.  */
1176   if (memory_access_type == VMAT_ELEMENTWISE
1177       || memory_access_type == VMAT_GATHER_SCATTER)
1178     {
1179       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181       if (memory_access_type == VMAT_GATHER_SCATTER
1182 	  && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 	/* For emulated gathers N offset vector element extracts
1184 	   (we assume the scalar scaling and ptr + offset add is consumed by
1185 	   the load).  */
1186 	inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 					 vec_to_scalar, stmt_info, 0,
1188 					 vect_body);
1189       /* N scalar loads plus gathering them into a vector.  */
1190       inside_cost += record_stmt_cost (cost_vec,
1191 				       ncopies * assumed_nunits,
1192 				       scalar_load, stmt_info, 0, vect_body);
1193     }
1194   else if (memory_access_type == VMAT_INVARIANT)
1195     {
1196       /* Invariant loads will ideally be hoisted and splat to a vector.  */
1197       prologue_cost += record_stmt_cost (cost_vec, 1,
1198 					 scalar_load, stmt_info, 0,
1199 					 vect_prologue);
1200       prologue_cost += record_stmt_cost (cost_vec, 1,
1201 					 scalar_to_vec, stmt_info, 0,
1202 					 vect_prologue);
1203     }
1204   else
1205     vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 			alignment_support_scheme, misalignment, first_stmt_p,
1207 			&inside_cost, &prologue_cost,
1208 			cost_vec, cost_vec, true);
1209   if (memory_access_type == VMAT_ELEMENTWISE
1210       || memory_access_type == VMAT_STRIDED_SLP
1211       || (memory_access_type == VMAT_GATHER_SCATTER
1212 	  && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 				     stmt_info, 0, vect_body);
1215 
1216   if (dump_enabled_p ())
1217     dump_printf_loc (MSG_NOTE, vect_location,
1218                      "vect_model_load_cost: inside_cost = %d, "
1219                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 }
1221 
1222 
1223 /* Calculate cost of DR's memory access.  */
1224 void
vect_get_load_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 		    dr_alignment_support alignment_support_scheme,
1227 		    int misalignment,
1228 		    bool add_realign_cost, unsigned int *inside_cost,
1229 		    unsigned int *prologue_cost,
1230 		    stmt_vector_for_cost *prologue_cost_vec,
1231 		    stmt_vector_for_cost *body_cost_vec,
1232 		    bool record_prologue_costs)
1233 {
1234   switch (alignment_support_scheme)
1235     {
1236     case dr_aligned:
1237       {
1238 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 					  stmt_info, 0, vect_body);
1240 
1241         if (dump_enabled_p ())
1242           dump_printf_loc (MSG_NOTE, vect_location,
1243                            "vect_model_load_cost: aligned.\n");
1244 
1245         break;
1246       }
1247     case dr_unaligned_supported:
1248       {
1249         /* Here, we assign an additional cost for the unaligned load.  */
1250 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 					  unaligned_load, stmt_info,
1252 					  misalignment, vect_body);
1253 
1254         if (dump_enabled_p ())
1255           dump_printf_loc (MSG_NOTE, vect_location,
1256                            "vect_model_load_cost: unaligned supported by "
1257                            "hardware.\n");
1258 
1259         break;
1260       }
1261     case dr_explicit_realign:
1262       {
1263 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 					  vector_load, stmt_info, 0, vect_body);
1265 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 					  vec_perm, stmt_info, 0, vect_body);
1267 
1268         /* FIXME: If the misalignment remains fixed across the iterations of
1269            the containing loop, the following cost should be added to the
1270            prologue costs.  */
1271         if (targetm.vectorize.builtin_mask_for_load)
1272 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 					    stmt_info, 0, vect_body);
1274 
1275         if (dump_enabled_p ())
1276           dump_printf_loc (MSG_NOTE, vect_location,
1277                            "vect_model_load_cost: explicit realign\n");
1278 
1279         break;
1280       }
1281     case dr_explicit_realign_optimized:
1282       {
1283         if (dump_enabled_p ())
1284           dump_printf_loc (MSG_NOTE, vect_location,
1285                            "vect_model_load_cost: unaligned software "
1286                            "pipelined.\n");
1287 
1288         /* Unaligned software pipeline has a load of an address, an initial
1289            load, and possibly a mask operation to "prime" the loop.  However,
1290            if this is an access in a group of loads, which provide grouped
1291            access, then the above cost should only be considered for one
1292            access in the group.  Inside the loop, there is a load op
1293            and a realignment op.  */
1294 
1295         if (add_realign_cost && record_prologue_costs)
1296           {
1297 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 						vector_stmt, stmt_info,
1299 						0, vect_prologue);
1300             if (targetm.vectorize.builtin_mask_for_load)
1301 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 						  vector_stmt, stmt_info,
1303 						  0, vect_prologue);
1304           }
1305 
1306 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 					  stmt_info, 0, vect_body);
1308 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 					  stmt_info, 0, vect_body);
1310 
1311         if (dump_enabled_p ())
1312           dump_printf_loc (MSG_NOTE, vect_location,
1313                            "vect_model_load_cost: explicit realign optimized"
1314                            "\n");
1315 
1316         break;
1317       }
1318 
1319     case dr_unaligned_unsupported:
1320       {
1321         *inside_cost = VECT_MAX_COST;
1322 
1323         if (dump_enabled_p ())
1324           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325                            "vect_model_load_cost: unsupported access.\n");
1326         break;
1327       }
1328 
1329     default:
1330       gcc_unreachable ();
1331     }
1332 }
1333 
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335    the loop preheader for the vectorized stmt STMT_VINFO.  */
1336 
1337 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 		    gimple_stmt_iterator *gsi)
1340 {
1341   if (gsi)
1342     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343   else
1344     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1345 
1346   if (dump_enabled_p ())
1347     dump_printf_loc (MSG_NOTE, vect_location,
1348 		     "created new init_stmt: %G", new_stmt);
1349 }
1350 
1351 /* Function vect_init_vector.
1352 
1353    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1355    vector type a vector with all elements equal to VAL is created first.
1356    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1357    initialization at the loop preheader.
1358    Return the DEF of INIT_STMT.
1359    It will be used in the vectorization of STMT_INFO.  */
1360 
1361 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 		  gimple_stmt_iterator *gsi)
1364 {
1365   gimple *init_stmt;
1366   tree new_temp;
1367 
1368   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1369   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1370     {
1371       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1373 	{
1374 	  /* Scalar boolean value should be transformed into
1375 	     all zeros or all ones value before building a vector.  */
1376 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1377 	    {
1378 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1380 
1381 	      if (CONSTANT_CLASS_P (val))
1382 		val = integer_zerop (val) ? false_val : true_val;
1383 	      else
1384 		{
1385 		  new_temp = make_ssa_name (TREE_TYPE (type));
1386 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 						   val, true_val, false_val);
1388 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 		  val = new_temp;
1390 		}
1391 	    }
1392 	  else
1393 	    {
1394 	      gimple_seq stmts = NULL;
1395 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 				    TREE_TYPE (type), val);
1398 	      else
1399 		/* ???  Condition vectorization expects us to do
1400 		   promotion of invariant/external defs.  */
1401 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 		   !gsi_end_p (gsi2); )
1404 		{
1405 		  init_stmt = gsi_stmt (gsi2);
1406 		  gsi_remove (&gsi2, false);
1407 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1408 		}
1409 	    }
1410 	}
1411       val = build_vector_from_val (type, val);
1412     }
1413 
1414   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415   init_stmt = gimple_build_assign (new_temp, val);
1416   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417   return new_temp;
1418 }
1419 
1420 
1421 /* Function vect_get_vec_defs_for_operand.
1422 
1423    OP is an operand in STMT_VINFO.  This function returns a vector of
1424    NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1425 
1426    In the case that OP is an SSA_NAME which is defined in the loop, then
1427    STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1428 
1429    In case OP is an invariant or constant, a new stmt that creates a vector def
1430    needs to be introduced.  VECTYPE may be used to specify a required type for
1431    vector invariant.  */
1432 
1433 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 			       unsigned ncopies,
1436 			       tree op, vec<tree> *vec_oprnds, tree vectype)
1437 {
1438   gimple *def_stmt;
1439   enum vect_def_type dt;
1440   bool is_simple_use;
1441   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1442 
1443   if (dump_enabled_p ())
1444     dump_printf_loc (MSG_NOTE, vect_location,
1445 		     "vect_get_vec_defs_for_operand: %T\n", op);
1446 
1447   stmt_vec_info def_stmt_info;
1448   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 				      &def_stmt_info, &def_stmt);
1450   gcc_assert (is_simple_use);
1451   if (def_stmt && dump_enabled_p ())
1452     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1453 
1454   vec_oprnds->create (ncopies);
1455   if (dt == vect_constant_def || dt == vect_external_def)
1456     {
1457       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458       tree vector_type;
1459 
1460       if (vectype)
1461 	vector_type = vectype;
1462       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 	vector_type = truth_type_for (stmt_vectype);
1465       else
1466 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1467 
1468       gcc_assert (vector_type);
1469       tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470       while (ncopies--)
1471 	vec_oprnds->quick_push (vop);
1472     }
1473   else
1474     {
1475       def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476       gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477       for (unsigned i = 0; i < ncopies; ++i)
1478 	vec_oprnds->quick_push (gimple_get_lhs
1479 				  (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1480     }
1481 }
1482 
1483 
1484 /* Get vectorized definitions for OP0 and OP1.  */
1485 
1486 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 		   unsigned ncopies,
1489 		   tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 		   tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 		   tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 		   tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1493 {
1494   if (slp_node)
1495     {
1496       if (op0)
1497 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498       if (op1)
1499 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500       if (op2)
1501 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502       if (op3)
1503 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1504     }
1505   else
1506     {
1507       if (op0)
1508 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 				       op0, vec_oprnds0, vectype0);
1510       if (op1)
1511 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 				       op1, vec_oprnds1, vectype1);
1513       if (op2)
1514 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 				       op2, vec_oprnds2, vectype2);
1516       if (op3)
1517 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 				       op3, vec_oprnds3, vectype3);
1519     }
1520 }
1521 
1522 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 		   unsigned ncopies,
1525 		   tree op0, vec<tree> *vec_oprnds0,
1526 		   tree op1, vec<tree> *vec_oprnds1,
1527 		   tree op2, vec<tree> *vec_oprnds2,
1528 		   tree op3, vec<tree> *vec_oprnds3)
1529 {
1530   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 		     op0, vec_oprnds0, NULL_TREE,
1532 		     op1, vec_oprnds1, NULL_TREE,
1533 		     op2, vec_oprnds2, NULL_TREE,
1534 		     op3, vec_oprnds3, NULL_TREE);
1535 }
1536 
1537 /* Helper function called by vect_finish_replace_stmt and
1538    vect_finish_stmt_generation.  Set the location of the new
1539    statement and create and return a stmt_vec_info for it.  */
1540 
1541 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1542 vect_finish_stmt_generation_1 (vec_info *,
1543 			       stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545   if (dump_enabled_p ())
1546     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1547 
1548   if (stmt_info)
1549     {
1550       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1551 
1552       /* While EH edges will generally prevent vectorization, stmt might
1553 	 e.g. be in a must-not-throw region.  Ensure newly created stmts
1554 	 that could throw are part of the same region.  */
1555       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 	add_stmt_to_eh_lp (vec_stmt, lp_nr);
1558     }
1559   else
1560     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1561 }
1562 
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564    which sets the same scalar result as STMT_INFO did.  Create and return a
1565    stmt_vec_info for VEC_STMT.  */
1566 
1567 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 			  stmt_vec_info stmt_info, gimple *vec_stmt)
1570 {
1571   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1573 
1574   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575   gsi_replace (&gsi, vec_stmt, true);
1576 
1577   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1578 }
1579 
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1582 
1583 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 			     stmt_vec_info stmt_info, gimple *vec_stmt,
1586 			     gimple_stmt_iterator *gsi)
1587 {
1588   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1589 
1590   if (!gsi_end_p (*gsi)
1591       && gimple_has_mem_ops (vec_stmt))
1592     {
1593       gimple *at_stmt = gsi_stmt (*gsi);
1594       tree vuse = gimple_vuse (at_stmt);
1595       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1596 	{
1597 	  tree vdef = gimple_vdef (at_stmt);
1598 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 	  gimple_set_modified (vec_stmt, true);
1600 	  /* If we have an SSA vuse and insert a store, update virtual
1601 	     SSA form to avoid triggering the renamer.  Do so only
1602 	     if we can easily see all uses - which is what almost always
1603 	     happens with the way vectorized stmts are inserted.  */
1604 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 	      && ((is_gimple_assign (vec_stmt)
1606 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 		  || (is_gimple_call (vec_stmt)
1608 		      && !(gimple_call_flags (vec_stmt)
1609 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1610 	    {
1611 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 	      gimple_set_vdef (vec_stmt, new_vdef);
1613 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1614 	    }
1615 	}
1616     }
1617   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1619 }
1620 
1621 /* We want to vectorize a call to combined function CFN with function
1622    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623    as the types of all inputs.  Check whether this is possible using
1624    an internal function, returning its code if so or IFN_LAST if not.  */
1625 
1626 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 				tree vectype_out, tree vectype_in)
1629 {
1630   internal_fn ifn;
1631   if (internal_fn_p (cfn))
1632     ifn = as_internal_fn (cfn);
1633   else
1634     ifn = associated_internal_fn (fndecl);
1635   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1636     {
1637       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638       if (info.vectorizable)
1639 	{
1640 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 					      OPTIMIZE_FOR_SPEED))
1644 	    return ifn;
1645 	}
1646     }
1647   return IFN_LAST;
1648 }
1649 
1650 
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 				  gimple_stmt_iterator *);
1653 
1654 /* Check whether a load or store statement in the loop described by
1655    LOOP_VINFO is possible in a loop using partial vectors.  This is
1656    testing whether the vectorizer pass has the appropriate support,
1657    as well as whether the target does.
1658 
1659    VLS_TYPE says whether the statement is a load or store and VECTYPE
1660    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1661    says how the load or store is going to be implemented and GROUP_SIZE
1662    is the number of load or store statements in the containing group.
1663    If the access is a gather load or scatter store, GS_INFO describes
1664    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1665    condition under which it occurs.
1666 
1667    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668    vectors is not supported, otherwise record the required rgroup control
1669    types.  */
1670 
1671 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,unsigned int ncopies,gather_scatter_info * gs_info,tree scalar_mask)1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 				      vec_load_store_type vls_type,
1674 				      int group_size,
1675 				      vect_memory_access_type
1676 				      memory_access_type,
1677 				      unsigned int ncopies,
1678 				      gather_scatter_info *gs_info,
1679 				      tree scalar_mask)
1680 {
1681   /* Invariant loads need no special support.  */
1682   if (memory_access_type == VMAT_INVARIANT)
1683     return;
1684 
1685   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1686   machine_mode vecmode = TYPE_MODE (vectype);
1687   bool is_load = (vls_type == VLS_LOAD);
1688   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1689     {
1690       if (is_load
1691 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1692 	  : !vect_store_lanes_supported (vectype, group_size, true))
1693 	{
1694 	  if (dump_enabled_p ())
1695 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 			     "can't operate on partial vectors because"
1697 			     " the target doesn't have an appropriate"
1698 			     " load/store-lanes instruction.\n");
1699 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1700 	  return;
1701 	}
1702       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703       return;
1704     }
1705 
1706   if (memory_access_type == VMAT_GATHER_SCATTER)
1707     {
1708       internal_fn ifn = (is_load
1709 			 ? IFN_MASK_GATHER_LOAD
1710 			 : IFN_MASK_SCATTER_STORE);
1711       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 						   gs_info->memory_type,
1713 						   gs_info->offset_vectype,
1714 						   gs_info->scale))
1715 	{
1716 	  if (dump_enabled_p ())
1717 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 			     "can't operate on partial vectors because"
1719 			     " the target doesn't have an appropriate"
1720 			     " gather load or scatter store instruction.\n");
1721 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 	  return;
1723 	}
1724       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1725       return;
1726     }
1727 
1728   if (memory_access_type != VMAT_CONTIGUOUS
1729       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1730     {
1731       /* Element X of the data must come from iteration i * VF + X of the
1732 	 scalar loop.  We need more work to support other mappings.  */
1733       if (dump_enabled_p ())
1734 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 			 "can't operate on partial vectors because an"
1736 			 " access isn't contiguous.\n");
1737       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738       return;
1739     }
1740 
1741   if (!VECTOR_MODE_P (vecmode))
1742     {
1743       if (dump_enabled_p ())
1744 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 			 "can't operate on partial vectors when emulating"
1746 			 " vector operations.\n");
1747       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1748       return;
1749     }
1750 
1751   /* We might load more scalars than we need for permuting SLP loads.
1752      We checked in get_group_load_store_type that the extra elements
1753      don't leak into a new vector.  */
1754   auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1755   {
1756     unsigned int nvectors;
1757     if (can_div_away_from_zero_p (size, nunits, &nvectors))
1758       return nvectors;
1759     gcc_unreachable ();
1760   };
1761 
1762   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1764   machine_mode mask_mode;
1765   bool using_partial_vectors_p = false;
1766   if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1767       && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1768     {
1769       unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1771       using_partial_vectors_p = true;
1772     }
1773 
1774   machine_mode vmode;
1775   if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1776     {
1777       unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1778       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1779       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1780       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1781       using_partial_vectors_p = true;
1782     }
1783 
1784   if (!using_partial_vectors_p)
1785     {
1786       if (dump_enabled_p ())
1787 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 			 "can't operate on partial vectors because the"
1789 			 " target doesn't have the appropriate partial"
1790 			 " vectorization load or store.\n");
1791       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792     }
1793 }
1794 
1795 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1796    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797    that needs to be applied to all loads and stores in a vectorized loop.
1798    Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1799    otherwise return VEC_MASK & LOOP_MASK.
1800 
1801    MASK_TYPE is the type of both masks.  If new statements are needed,
1802    insert them before GSI.  */
1803 
1804 static tree
prepare_vec_mask(loop_vec_info loop_vinfo,tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1805 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1806 		  tree vec_mask, gimple_stmt_iterator *gsi)
1807 {
1808   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809   if (!loop_mask)
1810     return vec_mask;
1811 
1812   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1813 
1814   if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1815     return vec_mask;
1816 
1817   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1818   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1819 					  vec_mask, loop_mask);
1820 
1821   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1822   return and_res;
1823 }
1824 
1825 /* Determine whether we can use a gather load or scatter store to vectorize
1826    strided load or store STMT_INFO by truncating the current offset to a
1827    smaller width.  We need to be able to construct an offset vector:
1828 
1829      { 0, X, X*2, X*3, ... }
1830 
1831    without loss of precision, where X is STMT_INFO's DR_STEP.
1832 
1833    Return true if this is possible, describing the gather load or scatter
1834    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1835 
1836 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1837 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1838 				     loop_vec_info loop_vinfo, bool masked_p,
1839 				     gather_scatter_info *gs_info)
1840 {
1841   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1842   data_reference *dr = dr_info->dr;
1843   tree step = DR_STEP (dr);
1844   if (TREE_CODE (step) != INTEGER_CST)
1845     {
1846       /* ??? Perhaps we could use range information here?  */
1847       if (dump_enabled_p ())
1848 	dump_printf_loc (MSG_NOTE, vect_location,
1849 			 "cannot truncate variable step.\n");
1850       return false;
1851     }
1852 
1853   /* Get the number of bits in an element.  */
1854   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1856   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1857 
1858   /* Set COUNT to the upper limit on the number of elements - 1.
1859      Start with the maximum vectorization factor.  */
1860   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1861 
1862   /* Try lowering COUNT to the number of scalar latch iterations.  */
1863   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1864   widest_int max_iters;
1865   if (max_loop_iterations (loop, &max_iters)
1866       && max_iters < count)
1867     count = max_iters.to_shwi ();
1868 
1869   /* Try scales of 1 and the element size.  */
1870   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1871   wi::overflow_type overflow = wi::OVF_NONE;
1872   for (int i = 0; i < 2; ++i)
1873     {
1874       int scale = scales[i];
1875       widest_int factor;
1876       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1877 	continue;
1878 
1879       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
1880       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1881       if (overflow)
1882 	continue;
1883       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1884       unsigned int min_offset_bits = wi::min_precision (range, sign);
1885 
1886       /* Find the narrowest viable offset type.  */
1887       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1888       tree offset_type = build_nonstandard_integer_type (offset_bits,
1889 							 sign == UNSIGNED);
1890 
1891       /* See whether the target supports the operation with an offset
1892 	 no narrower than OFFSET_TYPE.  */
1893       tree memory_type = TREE_TYPE (DR_REF (dr));
1894       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1895 				     vectype, memory_type, offset_type, scale,
1896 				     &gs_info->ifn, &gs_info->offset_vectype)
1897 	  || gs_info->ifn == IFN_LAST)
1898 	continue;
1899 
1900       gs_info->decl = NULL_TREE;
1901       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1902 	 but we don't need to store that here.  */
1903       gs_info->base = NULL_TREE;
1904       gs_info->element_type = TREE_TYPE (vectype);
1905       gs_info->offset = fold_convert (offset_type, step);
1906       gs_info->offset_dt = vect_constant_def;
1907       gs_info->scale = scale;
1908       gs_info->memory_type = memory_type;
1909       return true;
1910     }
1911 
1912   if (overflow && dump_enabled_p ())
1913     dump_printf_loc (MSG_NOTE, vect_location,
1914 		     "truncating gather/scatter offset to %d bits"
1915 		     " might change its value.\n", element_bits);
1916 
1917   return false;
1918 }
1919 
1920 /* Return true if we can use gather/scatter internal functions to
1921    vectorize STMT_INFO, which is a grouped or strided load or store.
1922    MASKED_P is true if load or store is conditional.  When returning
1923    true, fill in GS_INFO with the information required to perform the
1924    operation.  */
1925 
1926 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1927 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1928 				    loop_vec_info loop_vinfo, bool masked_p,
1929 				    gather_scatter_info *gs_info)
1930 {
1931   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1932       || gs_info->ifn == IFN_LAST)
1933     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1934 						masked_p, gs_info);
1935 
1936   tree old_offset_type = TREE_TYPE (gs_info->offset);
1937   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1938 
1939   gcc_assert (TYPE_PRECISION (new_offset_type)
1940 	      >= TYPE_PRECISION (old_offset_type));
1941   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1942 
1943   if (dump_enabled_p ())
1944     dump_printf_loc (MSG_NOTE, vect_location,
1945 		     "using gather/scatter for strided/grouped access,"
1946 		     " scale = %d\n", gs_info->scale);
1947 
1948   return true;
1949 }
1950 
1951 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1952    elements with a known constant step.  Return -1 if that step
1953    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1954 
1955 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1956 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1957 {
1958   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1959   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1960 			       size_zero_node);
1961 }
1962 
1963 /* If the target supports a permute mask that reverses the elements in
1964    a vector of type VECTYPE, return that mask, otherwise return null.  */
1965 
1966 static tree
perm_mask_for_reverse(tree vectype)1967 perm_mask_for_reverse (tree vectype)
1968 {
1969   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1970 
1971   /* The encoding has a single stepped pattern.  */
1972   vec_perm_builder sel (nunits, 1, 3);
1973   for (int i = 0; i < 3; ++i)
1974     sel.quick_push (nunits - 1 - i);
1975 
1976   vec_perm_indices indices (sel, 1, nunits);
1977   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1978     return NULL_TREE;
1979   return vect_gen_perm_mask_checked (vectype, indices);
1980 }
1981 
1982 /* A subroutine of get_load_store_type, with a subset of the same
1983    arguments.  Handle the case where STMT_INFO is a load or store that
1984    accesses consecutive elements with a negative step.  Sets *POFFSET
1985    to the offset to be applied to the DR for the first access.  */
1986 
1987 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies,poly_int64 * poffset)1988 get_negative_load_store_type (vec_info *vinfo,
1989 			      stmt_vec_info stmt_info, tree vectype,
1990 			      vec_load_store_type vls_type,
1991 			      unsigned int ncopies, poly_int64 *poffset)
1992 {
1993   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1994   dr_alignment_support alignment_support_scheme;
1995 
1996   if (ncopies > 1)
1997     {
1998       if (dump_enabled_p ())
1999 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 			 "multiple types with negative step.\n");
2001       return VMAT_ELEMENTWISE;
2002     }
2003 
2004   /* For backward running DRs the first access in vectype actually is
2005      N-1 elements before the address of the DR.  */
2006   *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2007 	      * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2008 
2009   int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2010   alignment_support_scheme
2011     = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2012   if (alignment_support_scheme != dr_aligned
2013       && alignment_support_scheme != dr_unaligned_supported)
2014     {
2015       if (dump_enabled_p ())
2016 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2017 			 "negative step but alignment required.\n");
2018       *poffset = 0;
2019       return VMAT_ELEMENTWISE;
2020     }
2021 
2022   if (vls_type == VLS_STORE_INVARIANT)
2023     {
2024       if (dump_enabled_p ())
2025 	dump_printf_loc (MSG_NOTE, vect_location,
2026 			 "negative step with invariant source;"
2027 			 " no permute needed.\n");
2028       return VMAT_CONTIGUOUS_DOWN;
2029     }
2030 
2031   if (!perm_mask_for_reverse (vectype))
2032     {
2033       if (dump_enabled_p ())
2034 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2035 			 "negative step and reversing not supported.\n");
2036       *poffset = 0;
2037       return VMAT_ELEMENTWISE;
2038     }
2039 
2040   return VMAT_CONTIGUOUS_REVERSE;
2041 }
2042 
2043 /* STMT_INFO is either a masked or unconditional store.  Return the value
2044    being stored.  */
2045 
2046 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2047 vect_get_store_rhs (stmt_vec_info stmt_info)
2048 {
2049   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2050     {
2051       gcc_assert (gimple_assign_single_p (assign));
2052       return gimple_assign_rhs1 (assign);
2053     }
2054   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2055     {
2056       internal_fn ifn = gimple_call_internal_fn (call);
2057       int index = internal_fn_stored_value_index (ifn);
2058       gcc_assert (index >= 0);
2059       return gimple_call_arg (call, index);
2060     }
2061   gcc_unreachable ();
2062 }
2063 
2064 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2065 
2066    This function returns a vector type which can be composed with NETLS pieces,
2067    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2068    same vector size as the return vector.  It checks target whether supports
2069    pieces-size vector mode for construction firstly, if target fails to, check
2070    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2071    fails to find the available composition.
2072 
2073    For example, for (vtype=V16QI, nelts=4), we can probably get:
2074      - V16QI with PTYPE V4QI.
2075      - V4SI with PTYPE SI.
2076      - NULL_TREE.  */
2077 
2078 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2079 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2080 {
2081   gcc_assert (VECTOR_TYPE_P (vtype));
2082   gcc_assert (known_gt (nelts, 0U));
2083 
2084   machine_mode vmode = TYPE_MODE (vtype);
2085   if (!VECTOR_MODE_P (vmode))
2086     return NULL_TREE;
2087 
2088   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2089   unsigned int pbsize;
2090   if (constant_multiple_p (vbsize, nelts, &pbsize))
2091     {
2092       /* First check if vec_init optab supports construction from
2093 	 vector pieces directly.  */
2094       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2095       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2096       machine_mode rmode;
2097       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2098 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
2099 	      != CODE_FOR_nothing))
2100 	{
2101 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2102 	  return vtype;
2103 	}
2104 
2105       /* Otherwise check if exists an integer type of the same piece size and
2106 	 if vec_init optab supports construction from it directly.  */
2107       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2108 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2109 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
2110 	      != CODE_FOR_nothing))
2111 	{
2112 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
2113 	  return build_vector_type (*ptype, nelts);
2114 	}
2115     }
2116 
2117   return NULL_TREE;
2118 }
2119 
2120 /* A subroutine of get_load_store_type, with a subset of the same
2121    arguments.  Handle the case where STMT_INFO is part of a grouped load
2122    or store.
2123 
2124    For stores, the statements in the group are all consecutive
2125    and there is no gap at the end.  For loads, the statements in the
2126    group might not be consecutive; there can be gaps between statements
2127    as well as at the end.  */
2128 
2129 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2130 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2131 			   tree vectype, slp_tree slp_node,
2132 			   bool masked_p, vec_load_store_type vls_type,
2133 			   vect_memory_access_type *memory_access_type,
2134 			   poly_int64 *poffset,
2135 			   dr_alignment_support *alignment_support_scheme,
2136 			   int *misalignment,
2137 			   gather_scatter_info *gs_info)
2138 {
2139   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2140   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144   bool single_element_p = (stmt_info == first_stmt_info
2145 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2148 
2149   /* True if the vectorized statements would access beyond the last
2150      statement in the group.  */
2151   bool overrun_p = false;
2152 
2153   /* True if we can cope with such overrun by peeling for gaps, so that
2154      there is at least one final scalar iteration after the vector loop.  */
2155   bool can_overrun_p = (!masked_p
2156 			&& vls_type == VLS_LOAD
2157 			&& loop_vinfo
2158 			&& !loop->inner);
2159 
2160   /* There can only be a gap at the end of the group if the stride is
2161      known at compile time.  */
2162   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2163 
2164   /* Stores can't yet have gaps.  */
2165   gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2166 
2167   if (slp_node)
2168     {
2169       /* For SLP vectorization we directly vectorize a subchain
2170 	 without permutation.  */
2171       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2172 	first_dr_info
2173 	  = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2174       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2175 	{
2176 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2177 	     separated by the stride, until we have a complete vector.
2178 	     Fall back to scalar accesses if that isn't possible.  */
2179 	  if (multiple_p (nunits, group_size))
2180 	    *memory_access_type = VMAT_STRIDED_SLP;
2181 	  else
2182 	    *memory_access_type = VMAT_ELEMENTWISE;
2183 	}
2184       else
2185 	{
2186 	  overrun_p = loop_vinfo && gap != 0;
2187 	  if (overrun_p && vls_type != VLS_LOAD)
2188 	    {
2189 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190 			       "Grouped store with gaps requires"
2191 			       " non-consecutive accesses\n");
2192 	      return false;
2193 	    }
2194 	  /* An overrun is fine if the trailing elements are smaller
2195 	     than the alignment boundary B.  Every vector access will
2196 	     be a multiple of B and so we are guaranteed to access a
2197 	     non-gap element in the same B-sized block.  */
2198 	  if (overrun_p
2199 	      && gap < (vect_known_alignment_in_bytes (first_dr_info,
2200 						       vectype)
2201 			/ vect_get_scalar_dr_size (first_dr_info)))
2202 	    overrun_p = false;
2203 
2204 	  /* If the gap splits the vector in half and the target
2205 	     can do half-vector operations avoid the epilogue peeling
2206 	     by simply loading half of the vector only.  Usually
2207 	     the construction with an upper zero half will be elided.  */
2208 	  dr_alignment_support alss;
2209 	  int misalign = dr_misalignment (first_dr_info, vectype);
2210 	  tree half_vtype;
2211 	  if (overrun_p
2212 	      && !masked_p
2213 	      && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2214 							  vectype, misalign)))
2215 		   == dr_aligned
2216 		  || alss == dr_unaligned_supported)
2217 	      && known_eq (nunits, (group_size - gap) * 2)
2218 	      && known_eq (nunits, group_size)
2219 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
2220 		  != NULL_TREE))
2221 	    overrun_p = false;
2222 
2223 	  if (overrun_p && !can_overrun_p)
2224 	    {
2225 	      if (dump_enabled_p ())
2226 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2227 				 "Peeling for outer loop is not supported\n");
2228 	      return false;
2229 	    }
2230 	  int cmp = compare_step_with_zero (vinfo, stmt_info);
2231 	  if (cmp < 0)
2232 	    {
2233 	      if (single_element_p)
2234 		/* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
2235 		   only correct for single element "interleaving" SLP.  */
2236 		*memory_access_type = get_negative_load_store_type
2237 			     (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2238 	      else
2239 		{
2240 		  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2241 		     separated by the stride, until we have a complete vector.
2242 		     Fall back to scalar accesses if that isn't possible.  */
2243 		  if (multiple_p (nunits, group_size))
2244 		    *memory_access_type = VMAT_STRIDED_SLP;
2245 		  else
2246 		    *memory_access_type = VMAT_ELEMENTWISE;
2247 		}
2248 	    }
2249 	  else
2250 	    {
2251 	      gcc_assert (!loop_vinfo || cmp > 0);
2252 	      *memory_access_type = VMAT_CONTIGUOUS;
2253 	    }
2254 	}
2255     }
2256   else
2257     {
2258       /* We can always handle this case using elementwise accesses,
2259 	 but see if something more efficient is available.  */
2260       *memory_access_type = VMAT_ELEMENTWISE;
2261 
2262       /* If there is a gap at the end of the group then these optimizations
2263 	 would access excess elements in the last iteration.  */
2264       bool would_overrun_p = (gap != 0);
2265       /* An overrun is fine if the trailing elements are smaller than the
2266 	 alignment boundary B.  Every vector access will be a multiple of B
2267 	 and so we are guaranteed to access a non-gap element in the
2268 	 same B-sized block.  */
2269       if (would_overrun_p
2270 	  && !masked_p
2271 	  && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2272 		    / vect_get_scalar_dr_size (first_dr_info)))
2273 	would_overrun_p = false;
2274 
2275       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2276 	  && (can_overrun_p || !would_overrun_p)
2277 	  && compare_step_with_zero (vinfo, stmt_info) > 0)
2278 	{
2279 	  /* First cope with the degenerate case of a single-element
2280 	     vector.  */
2281 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2282 	    ;
2283 
2284 	  /* Otherwise try using LOAD/STORE_LANES.  */
2285 	  else if (vls_type == VLS_LOAD
2286 		   ? vect_load_lanes_supported (vectype, group_size, masked_p)
2287 		   : vect_store_lanes_supported (vectype, group_size,
2288 						 masked_p))
2289 	    {
2290 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2291 	      overrun_p = would_overrun_p;
2292 	    }
2293 
2294 	  /* If that fails, try using permuting loads.  */
2295 	  else if (vls_type == VLS_LOAD
2296 		   ? vect_grouped_load_supported (vectype, single_element_p,
2297 						  group_size)
2298 		   : vect_grouped_store_supported (vectype, group_size))
2299 	    {
2300 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 	      overrun_p = would_overrun_p;
2302 	    }
2303 	}
2304 
2305       /* As a last resort, trying using a gather load or scatter store.
2306 
2307 	 ??? Although the code can handle all group sizes correctly,
2308 	 it probably isn't a win to use separate strided accesses based
2309 	 on nearby locations.  Or, even if it's a win over scalar code,
2310 	 it might not be a win over vectorizing at a lower VF, if that
2311 	 allows us to use contiguous accesses.  */
2312       if (*memory_access_type == VMAT_ELEMENTWISE
2313 	  && single_element_p
2314 	  && loop_vinfo
2315 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2316 						 masked_p, gs_info))
2317 	*memory_access_type = VMAT_GATHER_SCATTER;
2318     }
2319 
2320   if (*memory_access_type == VMAT_GATHER_SCATTER
2321       || *memory_access_type == VMAT_ELEMENTWISE)
2322     {
2323       *alignment_support_scheme = dr_unaligned_supported;
2324       *misalignment = DR_MISALIGNMENT_UNKNOWN;
2325     }
2326   else
2327     {
2328       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2329       *alignment_support_scheme
2330 	= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2331 					 *misalignment);
2332     }
2333 
2334   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2335     {
2336       /* STMT is the leader of the group. Check the operands of all the
2337 	 stmts of the group.  */
2338       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2339       while (next_stmt_info)
2340 	{
2341 	  tree op = vect_get_store_rhs (next_stmt_info);
2342 	  enum vect_def_type dt;
2343 	  if (!vect_is_simple_use (op, vinfo, &dt))
2344 	    {
2345 	      if (dump_enabled_p ())
2346 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 				 "use not simple.\n");
2348 	      return false;
2349 	    }
2350 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2351 	}
2352     }
2353 
2354   if (overrun_p)
2355     {
2356       gcc_assert (can_overrun_p);
2357       if (dump_enabled_p ())
2358 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 			 "Data access with gaps requires scalar "
2360 			 "epilogue loop\n");
2361       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2362     }
2363 
2364   return true;
2365 }
2366 
2367 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2368    if there is a memory access type that the vectorized form can use,
2369    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2370    or scatters, fill in GS_INFO accordingly.  In addition
2371    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2372    the target does not support the alignment scheme.  *MISALIGNMENT
2373    is set according to the alignment of the access (including
2374    DR_MISALIGNMENT_UNKNOWN when it is unknown).
2375 
2376    SLP says whether we're performing SLP rather than loop vectorization.
2377    MASKED_P is true if the statement is conditional on a vectorized mask.
2378    VECTYPE is the vector type that the vectorized statements will use.
2379    NCOPIES is the number of vector statements that will be needed.  */
2380 
2381 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2382 get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
2383 		     tree vectype, slp_tree slp_node,
2384 		     bool masked_p, vec_load_store_type vls_type,
2385 		     unsigned int ncopies,
2386 		     vect_memory_access_type *memory_access_type,
2387 		     poly_int64 *poffset,
2388 		     dr_alignment_support *alignment_support_scheme,
2389 		     int *misalignment,
2390 		     gather_scatter_info *gs_info)
2391 {
2392   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2393   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2394   *misalignment = DR_MISALIGNMENT_UNKNOWN;
2395   *poffset = 0;
2396   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2397     {
2398       *memory_access_type = VMAT_GATHER_SCATTER;
2399       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2400 	gcc_unreachable ();
2401       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2402 				    &gs_info->offset_dt,
2403 				    &gs_info->offset_vectype))
2404 	{
2405 	  if (dump_enabled_p ())
2406 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 			     "%s index use not simple.\n",
2408 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2409 	  return false;
2410 	}
2411       else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2412 	{
2413 	  if (vls_type != VLS_LOAD)
2414 	    {
2415 	      if (dump_enabled_p ())
2416 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 				 "unsupported emulated scatter.\n");
2418 	      return false;
2419 	    }
2420 	  else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2421 		   || !TYPE_VECTOR_SUBPARTS
2422 			 (gs_info->offset_vectype).is_constant ()
2423 		   || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2424 					      (gs_info->offset_vectype),
2425 					    TYPE_VECTOR_SUBPARTS (vectype)))
2426 	    {
2427 	      if (dump_enabled_p ())
2428 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 				 "unsupported vector types for emulated "
2430 				 "gather.\n");
2431 	      return false;
2432 	    }
2433 	}
2434       /* Gather-scatter accesses perform only component accesses, alignment
2435 	 is irrelevant for them.  */
2436       *alignment_support_scheme = dr_unaligned_supported;
2437     }
2438   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439     {
2440       if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2441 				      masked_p,
2442 				      vls_type, memory_access_type, poffset,
2443 				      alignment_support_scheme,
2444 				      misalignment, gs_info))
2445 	return false;
2446     }
2447   else if (STMT_VINFO_STRIDED_P (stmt_info))
2448     {
2449       gcc_assert (!slp_node);
2450       if (loop_vinfo
2451 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 						 masked_p, gs_info))
2453 	*memory_access_type = VMAT_GATHER_SCATTER;
2454       else
2455 	*memory_access_type = VMAT_ELEMENTWISE;
2456       /* Alignment is irrelevant here.  */
2457       *alignment_support_scheme = dr_unaligned_supported;
2458     }
2459   else
2460     {
2461       int cmp = compare_step_with_zero (vinfo, stmt_info);
2462       if (cmp == 0)
2463 	{
2464 	  gcc_assert (vls_type == VLS_LOAD);
2465 	  *memory_access_type = VMAT_INVARIANT;
2466 	  /* Invariant accesses perform only component accesses, alignment
2467 	     is irrelevant for them.  */
2468 	  *alignment_support_scheme = dr_unaligned_supported;
2469 	}
2470       else
2471 	{
2472 	  if (cmp < 0)
2473 	    *memory_access_type = get_negative_load_store_type
2474 	       (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2475 	  else
2476 	    *memory_access_type = VMAT_CONTIGUOUS;
2477 	  *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2478 					   vectype, *poffset);
2479 	  *alignment_support_scheme
2480 	    = vect_supportable_dr_alignment (vinfo,
2481 					     STMT_VINFO_DR_INFO (stmt_info),
2482 					     vectype, *misalignment);
2483 	}
2484     }
2485 
2486   if ((*memory_access_type == VMAT_ELEMENTWISE
2487        || *memory_access_type == VMAT_STRIDED_SLP)
2488       && !nunits.is_constant ())
2489     {
2490       if (dump_enabled_p ())
2491 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 			 "Not using elementwise accesses due to variable "
2493 			 "vectorization factor.\n");
2494       return false;
2495     }
2496 
2497   if (*alignment_support_scheme == dr_unaligned_unsupported)
2498     {
2499       if (dump_enabled_p ())
2500 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 			 "unsupported unaligned access\n");
2502       return false;
2503     }
2504 
2505   /* FIXME: At the moment the cost model seems to underestimate the
2506      cost of using elementwise accesses.  This check preserves the
2507      traditional behavior until that can be fixed.  */
2508   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2509   if (!first_stmt_info)
2510     first_stmt_info = stmt_info;
2511   if (*memory_access_type == VMAT_ELEMENTWISE
2512       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2513       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2514 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2515 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2516     {
2517       if (dump_enabled_p ())
2518 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 			 "not falling back to elementwise accesses\n");
2520       return false;
2521     }
2522   return true;
2523 }
2524 
2525 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2526    conditional operation STMT_INFO.  When returning true, store the mask
2527    in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2528    vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2529    to the mask in *MASK_NODE if MASK_NODE is not NULL.  */
2530 
2531 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned mask_index,tree * mask,slp_tree * mask_node,vect_def_type * mask_dt_out,tree * mask_vectype_out)2532 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2533 			slp_tree slp_node, unsigned mask_index,
2534 			tree *mask, slp_tree *mask_node,
2535 			vect_def_type *mask_dt_out, tree *mask_vectype_out)
2536 {
2537   enum vect_def_type mask_dt;
2538   tree mask_vectype;
2539   slp_tree mask_node_1;
2540   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2541 			   mask, &mask_node_1, &mask_dt, &mask_vectype))
2542     {
2543       if (dump_enabled_p ())
2544 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 			 "mask use not simple.\n");
2546       return false;
2547     }
2548 
2549   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2550     {
2551       if (dump_enabled_p ())
2552 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 			 "mask argument is not a boolean.\n");
2554       return false;
2555     }
2556 
2557   /* If the caller is not prepared for adjusting an external/constant
2558      SLP mask vector type fail.  */
2559   if (slp_node
2560       && !mask_node
2561       && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2562     {
2563       if (dump_enabled_p ())
2564 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 			 "SLP mask argument is not vectorized.\n");
2566       return false;
2567     }
2568 
2569   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2570   if (!mask_vectype)
2571     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2572 
2573   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2574     {
2575       if (dump_enabled_p ())
2576 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 			 "could not find an appropriate vector mask type.\n");
2578       return false;
2579     }
2580 
2581   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2582 		TYPE_VECTOR_SUBPARTS (vectype)))
2583     {
2584       if (dump_enabled_p ())
2585 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 			 "vector mask type %T"
2587 			 " does not match vector data type %T.\n",
2588 			 mask_vectype, vectype);
2589 
2590       return false;
2591     }
2592 
2593   *mask_dt_out = mask_dt;
2594   *mask_vectype_out = mask_vectype;
2595   if (mask_node)
2596     *mask_node = mask_node_1;
2597   return true;
2598 }
2599 
2600 /* Return true if stored value RHS is suitable for vectorizing store
2601    statement STMT_INFO.  When returning true, store the type of the
2602    definition in *RHS_DT_OUT, the type of the vectorized store value in
2603    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2604 
2605 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2606 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2607 		      slp_tree slp_node, tree rhs,
2608 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2609 		      vec_load_store_type *vls_type_out)
2610 {
2611   /* In the case this is a store from a constant make sure
2612      native_encode_expr can handle it.  */
2613   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2614     {
2615       if (dump_enabled_p ())
2616 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2617 			 "cannot encode constant as a byte sequence.\n");
2618       return false;
2619     }
2620 
2621   unsigned op_no = 0;
2622   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2623     {
2624       if (gimple_call_internal_p (call)
2625 	  && internal_store_fn_p (gimple_call_internal_fn (call)))
2626 	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2627     }
2628 
2629   enum vect_def_type rhs_dt;
2630   tree rhs_vectype;
2631   slp_tree slp_op;
2632   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2633 			   &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2634     {
2635       if (dump_enabled_p ())
2636 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 			 "use not simple.\n");
2638       return false;
2639     }
2640 
2641   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2642   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2643     {
2644       if (dump_enabled_p ())
2645 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2646 			 "incompatible vector types.\n");
2647       return false;
2648     }
2649 
2650   *rhs_dt_out = rhs_dt;
2651   *rhs_vectype_out = rhs_vectype;
2652   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2653     *vls_type_out = VLS_STORE_INVARIANT;
2654   else
2655     *vls_type_out = VLS_STORE;
2656   return true;
2657 }
2658 
2659 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2660    Note that we support masks with floating-point type, in which case the
2661    floats are interpreted as a bitmask.  */
2662 
2663 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2664 vect_build_all_ones_mask (vec_info *vinfo,
2665 			  stmt_vec_info stmt_info, tree masktype)
2666 {
2667   if (TREE_CODE (masktype) == INTEGER_TYPE)
2668     return build_int_cst (masktype, -1);
2669   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2670     {
2671       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2672       mask = build_vector_from_val (masktype, mask);
2673       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2674     }
2675   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2676     {
2677       REAL_VALUE_TYPE r;
2678       long tmp[6];
2679       for (int j = 0; j < 6; ++j)
2680 	tmp[j] = -1;
2681       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2682       tree mask = build_real (TREE_TYPE (masktype), r);
2683       mask = build_vector_from_val (masktype, mask);
2684       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2685     }
2686   gcc_unreachable ();
2687 }
2688 
2689 /* Build an all-zero merge value of type VECTYPE while vectorizing
2690    STMT_INFO as a gather load.  */
2691 
2692 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2693 vect_build_zero_merge_argument (vec_info *vinfo,
2694 				stmt_vec_info stmt_info, tree vectype)
2695 {
2696   tree merge;
2697   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2698     merge = build_int_cst (TREE_TYPE (vectype), 0);
2699   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2700     {
2701       REAL_VALUE_TYPE r;
2702       long tmp[6];
2703       for (int j = 0; j < 6; ++j)
2704 	tmp[j] = 0;
2705       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2706       merge = build_real (TREE_TYPE (vectype), r);
2707     }
2708   else
2709     gcc_unreachable ();
2710   merge = build_vector_from_val (vectype, merge);
2711   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2712 }
2713 
2714 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2715    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2716    the gather load operation.  If the load is conditional, MASK is the
2717    unvectorized condition and MASK_DT is its definition type, otherwise
2718    MASK is null.  */
2719 
2720 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2721 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2722 			      gimple_stmt_iterator *gsi,
2723 			      gimple **vec_stmt,
2724 			      gather_scatter_info *gs_info,
2725 			      tree mask)
2726 {
2727   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2728   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2729   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2730   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2731   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2732   edge pe = loop_preheader_edge (loop);
2733   enum { NARROW, NONE, WIDEN } modifier;
2734   poly_uint64 gather_off_nunits
2735     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2736 
2737   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2738   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2739   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2740   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2741   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2742   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2743   tree scaletype = TREE_VALUE (arglist);
2744   tree real_masktype = masktype;
2745   gcc_checking_assert (types_compatible_p (srctype, rettype)
2746 		       && (!mask
2747 			   || TREE_CODE (masktype) == INTEGER_TYPE
2748 			   || types_compatible_p (srctype, masktype)));
2749   if (mask)
2750     masktype = truth_type_for (srctype);
2751 
2752   tree mask_halftype = masktype;
2753   tree perm_mask = NULL_TREE;
2754   tree mask_perm_mask = NULL_TREE;
2755   if (known_eq (nunits, gather_off_nunits))
2756     modifier = NONE;
2757   else if (known_eq (nunits * 2, gather_off_nunits))
2758     {
2759       modifier = WIDEN;
2760 
2761       /* Currently widening gathers and scatters are only supported for
2762 	 fixed-length vectors.  */
2763       int count = gather_off_nunits.to_constant ();
2764       vec_perm_builder sel (count, count, 1);
2765       for (int i = 0; i < count; ++i)
2766 	sel.quick_push (i | (count / 2));
2767 
2768       vec_perm_indices indices (sel, 1, count);
2769       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2770 					      indices);
2771     }
2772   else if (known_eq (nunits, gather_off_nunits * 2))
2773     {
2774       modifier = NARROW;
2775 
2776       /* Currently narrowing gathers and scatters are only supported for
2777 	 fixed-length vectors.  */
2778       int count = nunits.to_constant ();
2779       vec_perm_builder sel (count, count, 1);
2780       sel.quick_grow (count);
2781       for (int i = 0; i < count; ++i)
2782 	sel[i] = i < count / 2 ? i : i + count / 2;
2783       vec_perm_indices indices (sel, 2, count);
2784       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2785 
2786       ncopies *= 2;
2787 
2788       if (mask && masktype == real_masktype)
2789 	{
2790 	  for (int i = 0; i < count; ++i)
2791 	    sel[i] = i | (count / 2);
2792 	  indices.new_vector (sel, 2, count);
2793 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2794 	}
2795       else if (mask)
2796 	mask_halftype = truth_type_for (gs_info->offset_vectype);
2797     }
2798   else
2799     gcc_unreachable ();
2800 
2801   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2802   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803 
2804   tree ptr = fold_convert (ptrtype, gs_info->base);
2805   if (!is_gimple_min_invariant (ptr))
2806     {
2807       gimple_seq seq;
2808       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2809       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2810       gcc_assert (!new_bb);
2811     }
2812 
2813   tree scale = build_int_cst (scaletype, gs_info->scale);
2814 
2815   tree vec_oprnd0 = NULL_TREE;
2816   tree vec_mask = NULL_TREE;
2817   tree src_op = NULL_TREE;
2818   tree mask_op = NULL_TREE;
2819   tree prev_res = NULL_TREE;
2820 
2821   if (!mask)
2822     {
2823       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2824       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2825     }
2826 
2827   auto_vec<tree> vec_oprnds0;
2828   auto_vec<tree> vec_masks;
2829   vect_get_vec_defs_for_operand (vinfo, stmt_info,
2830 				 modifier == WIDEN ? ncopies / 2 : ncopies,
2831 				 gs_info->offset, &vec_oprnds0);
2832   if (mask)
2833     vect_get_vec_defs_for_operand (vinfo, stmt_info,
2834 				   modifier == NARROW ? ncopies / 2 : ncopies,
2835 				   mask, &vec_masks, masktype);
2836   for (int j = 0; j < ncopies; ++j)
2837     {
2838       tree op, var;
2839       if (modifier == WIDEN && (j & 1))
2840 	op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2841 				   perm_mask, stmt_info, gsi);
2842       else
2843 	op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2844 
2845       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2846 	{
2847 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2848 				TYPE_VECTOR_SUBPARTS (idxtype)));
2849 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2850 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2851 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2852 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2853 	  op = var;
2854 	}
2855 
2856       if (mask)
2857 	{
2858 	  if (mask_perm_mask && (j & 1))
2859 	    mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2860 					    mask_perm_mask, stmt_info, gsi);
2861 	  else
2862 	    {
2863 	      if (modifier == NARROW)
2864 		{
2865 		  if ((j & 1) == 0)
2866 		    vec_mask = vec_masks[j / 2];
2867 		}
2868 	      else
2869 		vec_mask = vec_masks[j];
2870 
2871 	      mask_op = vec_mask;
2872 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2873 		{
2874 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2875 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2876 		  gcc_assert (known_eq (sub1, sub2));
2877 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2878 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2879 		  gassign *new_stmt
2880 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2881 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 		  mask_op = var;
2883 		}
2884 	    }
2885 	  if (modifier == NARROW && masktype != real_masktype)
2886 	    {
2887 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2888 	      gassign *new_stmt
2889 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2890 						    : VEC_UNPACK_LO_EXPR,
2891 				       mask_op);
2892 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2893 	      mask_op = var;
2894 	    }
2895 	  src_op = mask_op;
2896 	}
2897 
2898       tree mask_arg = mask_op;
2899       if (masktype != real_masktype)
2900 	{
2901 	  tree utype, optype = TREE_TYPE (mask_op);
2902 	  if (VECTOR_TYPE_P (real_masktype)
2903 	      || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2904 	    utype = real_masktype;
2905 	  else
2906 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2907 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2908 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2909 	  gassign *new_stmt
2910 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2911 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2912 	  mask_arg = var;
2913 	  if (!useless_type_conversion_p (real_masktype, utype))
2914 	    {
2915 	      gcc_assert (TYPE_PRECISION (utype)
2916 			  <= TYPE_PRECISION (real_masktype));
2917 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2918 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2919 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2920 	      mask_arg = var;
2921 	    }
2922 	  src_op = build_zero_cst (srctype);
2923 	}
2924       gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2925 					    mask_arg, scale);
2926 
2927       if (!useless_type_conversion_p (vectype, rettype))
2928 	{
2929 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2930 				TYPE_VECTOR_SUBPARTS (rettype)));
2931 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2932 	  gimple_call_set_lhs (new_stmt, op);
2933 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2934 	  var = make_ssa_name (vec_dest);
2935 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2936 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2937 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2938 	}
2939       else
2940 	{
2941 	  var = make_ssa_name (vec_dest, new_stmt);
2942 	  gimple_call_set_lhs (new_stmt, var);
2943 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2944 	}
2945 
2946       if (modifier == NARROW)
2947 	{
2948 	  if ((j & 1) == 0)
2949 	    {
2950 	      prev_res = var;
2951 	      continue;
2952 	    }
2953 	  var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2954 				      stmt_info, gsi);
2955 	  new_stmt = SSA_NAME_DEF_STMT (var);
2956 	}
2957 
2958       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2959     }
2960   *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2961 }
2962 
2963 /* Prepare the base and offset in GS_INFO for vectorization.
2964    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2965    to the vectorized offset argument for the first copy of STMT_INFO.
2966    STMT_INFO is the statement described by GS_INFO and LOOP is the
2967    containing loop.  */
2968 
2969 static void
vect_get_gather_scatter_ops(loop_vec_info loop_vinfo,class loop * loop,stmt_vec_info stmt_info,slp_tree slp_node,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset)2970 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2971 			     class loop *loop, stmt_vec_info stmt_info,
2972 			     slp_tree slp_node, gather_scatter_info *gs_info,
2973 			     tree *dataref_ptr, vec<tree> *vec_offset)
2974 {
2975   gimple_seq stmts = NULL;
2976   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2977   if (stmts != NULL)
2978     {
2979       basic_block new_bb;
2980       edge pe = loop_preheader_edge (loop);
2981       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2982       gcc_assert (!new_bb);
2983     }
2984   if (slp_node)
2985     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2986   else
2987     {
2988       unsigned ncopies
2989 	= vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2990       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2991 				     gs_info->offset, vec_offset,
2992 				     gs_info->offset_vectype);
2993     }
2994 }
2995 
2996 /* Prepare to implement a grouped or strided load or store using
2997    the gather load or scatter store operation described by GS_INFO.
2998    STMT_INFO is the load or store statement.
2999 
3000    Set *DATAREF_BUMP to the amount that should be added to the base
3001    address after each copy of the vectorized statement.  Set *VEC_OFFSET
3002    to an invariant offset vector in which element I has the value
3003    I * DR_STEP / SCALE.  */
3004 
3005 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3006 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3007 				 loop_vec_info loop_vinfo,
3008 				 gather_scatter_info *gs_info,
3009 				 tree *dataref_bump, tree *vec_offset)
3010 {
3011   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3012   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3013 
3014   tree bump = size_binop (MULT_EXPR,
3015 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3016 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3017   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3018 
3019   /* The offset given in GS_INFO can have pointer type, so use the element
3020      type of the vector instead.  */
3021   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3022 
3023   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3024   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3025 			  ssize_int (gs_info->scale));
3026   step = fold_convert (offset_type, step);
3027 
3028   /* Create {0, X, X*2, X*3, ...}.  */
3029   tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3030 			     build_zero_cst (offset_type), step);
3031   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3032 }
3033 
3034 /* Return the amount that should be added to a vector pointer to move
3035    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3036    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3037    vectorization.  */
3038 
3039 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3040 vect_get_data_ptr_increment (vec_info *vinfo,
3041 			     dr_vec_info *dr_info, tree aggr_type,
3042 			     vect_memory_access_type memory_access_type)
3043 {
3044   if (memory_access_type == VMAT_INVARIANT)
3045     return size_zero_node;
3046 
3047   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3048   tree step = vect_dr_behavior (vinfo, dr_info)->step;
3049   if (tree_int_cst_sgn (step) == -1)
3050     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3051   return iv_step;
3052 }
3053 
3054 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
3055 
3056 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)3057 vectorizable_bswap (vec_info *vinfo,
3058 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3059 		    gimple **vec_stmt, slp_tree slp_node,
3060 		    slp_tree *slp_op,
3061 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3062 {
3063   tree op, vectype;
3064   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3065   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3066   unsigned ncopies;
3067 
3068   op = gimple_call_arg (stmt, 0);
3069   vectype = STMT_VINFO_VECTYPE (stmt_info);
3070   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3071 
3072   /* Multiple types in SLP are handled by creating the appropriate number of
3073      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3074      case of SLP.  */
3075   if (slp_node)
3076     ncopies = 1;
3077   else
3078     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3079 
3080   gcc_assert (ncopies >= 1);
3081 
3082   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3083   if (! char_vectype)
3084     return false;
3085 
3086   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3087   unsigned word_bytes;
3088   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3089     return false;
3090 
3091   /* The encoding uses one stepped pattern for each byte in the word.  */
3092   vec_perm_builder elts (num_bytes, word_bytes, 3);
3093   for (unsigned i = 0; i < 3; ++i)
3094     for (unsigned j = 0; j < word_bytes; ++j)
3095       elts.quick_push ((i + 1) * word_bytes - j - 1);
3096 
3097   vec_perm_indices indices (elts, 1, num_bytes);
3098   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3099     return false;
3100 
3101   if (! vec_stmt)
3102     {
3103       if (slp_node
3104 	  && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3105 	{
3106 	  if (dump_enabled_p ())
3107 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3108 			     "incompatible vector types for invariants\n");
3109 	  return false;
3110 	}
3111 
3112       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3113       DUMP_VECT_SCOPE ("vectorizable_bswap");
3114       record_stmt_cost (cost_vec,
3115 			1, vector_stmt, stmt_info, 0, vect_prologue);
3116       record_stmt_cost (cost_vec,
3117 			slp_node
3118 			? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3119 			vec_perm, stmt_info, 0, vect_body);
3120       return true;
3121     }
3122 
3123   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3124 
3125   /* Transform.  */
3126   vec<tree> vec_oprnds = vNULL;
3127   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3128 		     op, &vec_oprnds);
3129   /* Arguments are ready. create the new vector stmt.  */
3130   unsigned i;
3131   tree vop;
3132   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3133     {
3134       gimple *new_stmt;
3135       tree tem = make_ssa_name (char_vectype);
3136       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3137 						   char_vectype, vop));
3138       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3139       tree tem2 = make_ssa_name (char_vectype);
3140       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3141 				      tem, tem, bswap_vconst);
3142       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3143       tem = make_ssa_name (vectype);
3144       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3145 						   vectype, tem2));
3146       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3147       if (slp_node)
3148 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3149       else
3150 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3151     }
3152 
3153   if (!slp_node)
3154     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3155 
3156   vec_oprnds.release ();
3157   return true;
3158 }
3159 
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162    in a single step.  On success, store the binary pack code in
3163    *CONVERT_CODE.  */
3164 
3165 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3166 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3167 			  tree_code *convert_code)
3168 {
3169   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3170       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3171     return false;
3172 
3173   tree_code code;
3174   int multi_step_cvt = 0;
3175   auto_vec <tree, 8> interm_types;
3176   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3177 					&code, &multi_step_cvt, &interm_types)
3178       || multi_step_cvt)
3179     return false;
3180 
3181   *convert_code = code;
3182   return true;
3183 }
3184 
3185 /* Function vectorizable_call.
3186 
3187    Check if STMT_INFO performs a function call that can be vectorized.
3188    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3189    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3190    Return true if STMT_INFO is vectorizable in this way.  */
3191 
3192 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3193 vectorizable_call (vec_info *vinfo,
3194 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3195 		   gimple **vec_stmt, slp_tree slp_node,
3196 		   stmt_vector_for_cost *cost_vec)
3197 {
3198   gcall *stmt;
3199   tree vec_dest;
3200   tree scalar_dest;
3201   tree op;
3202   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3203   tree vectype_out, vectype_in;
3204   poly_uint64 nunits_in;
3205   poly_uint64 nunits_out;
3206   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3207   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3208   tree fndecl, new_temp, rhs_type;
3209   enum vect_def_type dt[4]
3210     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3211 	vect_unknown_def_type };
3212   tree vectypes[ARRAY_SIZE (dt)] = {};
3213   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3214   int ndts = ARRAY_SIZE (dt);
3215   int ncopies, j;
3216   auto_vec<tree, 8> vargs;
3217   enum { NARROW, NONE, WIDEN } modifier;
3218   size_t i, nargs;
3219   tree lhs;
3220 
3221   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3222     return false;
3223 
3224   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3225       && ! vec_stmt)
3226     return false;
3227 
3228   /* Is STMT_INFO a vectorizable call?   */
3229   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3230   if (!stmt)
3231     return false;
3232 
3233   if (gimple_call_internal_p (stmt)
3234       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3235 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3236     /* Handled by vectorizable_load and vectorizable_store.  */
3237     return false;
3238 
3239   if (gimple_call_lhs (stmt) == NULL_TREE
3240       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3241     return false;
3242 
3243   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3244 
3245   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3246 
3247   /* Process function arguments.  */
3248   rhs_type = NULL_TREE;
3249   vectype_in = NULL_TREE;
3250   nargs = gimple_call_num_args (stmt);
3251 
3252   /* Bail out if the function has more than four arguments, we do not have
3253      interesting builtin functions to vectorize with more than two arguments
3254      except for fma.  No arguments is also not good.  */
3255   if (nargs == 0 || nargs > 4)
3256     return false;
3257 
3258   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3259   combined_fn cfn = gimple_call_combined_fn (stmt);
3260   if (cfn == CFN_GOMP_SIMD_LANE)
3261     {
3262       nargs = 0;
3263       rhs_type = unsigned_type_node;
3264     }
3265 
3266   int mask_opno = -1;
3267   if (internal_fn_p (cfn))
3268     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3269 
3270   for (i = 0; i < nargs; i++)
3271     {
3272       if ((int) i == mask_opno)
3273 	{
3274 	  if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3275 				       &op, &slp_op[i], &dt[i], &vectypes[i]))
3276 	    return false;
3277 	  continue;
3278 	}
3279 
3280       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3281 			       i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3282 	{
3283 	  if (dump_enabled_p ())
3284 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 			     "use not simple.\n");
3286 	  return false;
3287 	}
3288 
3289       /* We can only handle calls with arguments of the same type.  */
3290       if (rhs_type
3291 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3292 	{
3293 	  if (dump_enabled_p ())
3294 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295                              "argument types differ.\n");
3296 	  return false;
3297 	}
3298       if (!rhs_type)
3299 	rhs_type = TREE_TYPE (op);
3300 
3301       if (!vectype_in)
3302 	vectype_in = vectypes[i];
3303       else if (vectypes[i]
3304 	       && !types_compatible_p (vectypes[i], vectype_in))
3305 	{
3306 	  if (dump_enabled_p ())
3307 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308                              "argument vector types differ.\n");
3309 	  return false;
3310 	}
3311     }
3312   /* If all arguments are external or constant defs, infer the vector type
3313      from the scalar type.  */
3314   if (!vectype_in)
3315     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3316   if (vec_stmt)
3317     gcc_assert (vectype_in);
3318   if (!vectype_in)
3319     {
3320       if (dump_enabled_p ())
3321 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 			 "no vectype for scalar type %T\n", rhs_type);
3323 
3324       return false;
3325     }
3326   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3327      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3328      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3329      by a pack of the two vectors into an SI vector.  We would need
3330      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3331   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3332     {
3333       if (dump_enabled_p ())
3334 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3335 			 "mismatched vector sizes %T and %T\n",
3336 			 vectype_in, vectype_out);
3337       return false;
3338     }
3339 
3340   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3341       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3342     {
3343       if (dump_enabled_p ())
3344 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3345 			 "mixed mask and nonmask vector types\n");
3346       return false;
3347     }
3348 
3349   /* FORNOW */
3350   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3351   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3352   if (known_eq (nunits_in * 2, nunits_out))
3353     modifier = NARROW;
3354   else if (known_eq (nunits_out, nunits_in))
3355     modifier = NONE;
3356   else if (known_eq (nunits_out * 2, nunits_in))
3357     modifier = WIDEN;
3358   else
3359     return false;
3360 
3361   /* We only handle functions that do not read or clobber memory.  */
3362   if (gimple_vuse (stmt))
3363     {
3364       if (dump_enabled_p ())
3365 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 			 "function reads from or writes to memory.\n");
3367       return false;
3368     }
3369 
3370   /* For now, we only vectorize functions if a target specific builtin
3371      is available.  TODO -- in some cases, it might be profitable to
3372      insert the calls for pieces of the vector, in order to be able
3373      to vectorize other operations in the loop.  */
3374   fndecl = NULL_TREE;
3375   internal_fn ifn = IFN_LAST;
3376   tree callee = gimple_call_fndecl (stmt);
3377 
3378   /* First try using an internal function.  */
3379   tree_code convert_code = ERROR_MARK;
3380   if (cfn != CFN_LAST
3381       && (modifier == NONE
3382 	  || (modifier == NARROW
3383 	      && simple_integer_narrowing (vectype_out, vectype_in,
3384 					   &convert_code))))
3385     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3386 					  vectype_in);
3387 
3388   /* If that fails, try asking for a target-specific built-in function.  */
3389   if (ifn == IFN_LAST)
3390     {
3391       if (cfn != CFN_LAST)
3392 	fndecl = targetm.vectorize.builtin_vectorized_function
3393 	  (cfn, vectype_out, vectype_in);
3394       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3395 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3396 	  (callee, vectype_out, vectype_in);
3397     }
3398 
3399   if (ifn == IFN_LAST && !fndecl)
3400     {
3401       if (cfn == CFN_GOMP_SIMD_LANE
3402 	  && !slp_node
3403 	  && loop_vinfo
3404 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3405 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3406 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3407 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3408 	{
3409 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3410 	     { 0, 1, 2, ... vf - 1 } vector.  */
3411 	  gcc_assert (nargs == 0);
3412 	}
3413       else if (modifier == NONE
3414 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3415 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3416 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3417 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3418 	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3419 				   slp_op, vectype_in, cost_vec);
3420       else
3421 	{
3422 	  if (dump_enabled_p ())
3423 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3424 			     "function is not vectorizable.\n");
3425 	  return false;
3426 	}
3427     }
3428 
3429   if (slp_node)
3430     ncopies = 1;
3431   else if (modifier == NARROW && ifn == IFN_LAST)
3432     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3433   else
3434     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3435 
3436   /* Sanity check: make sure that at least one copy of the vectorized stmt
3437      needs to be generated.  */
3438   gcc_assert (ncopies >= 1);
3439 
3440   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3441   internal_fn cond_fn = get_conditional_internal_fn (ifn);
3442   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3443   if (!vec_stmt) /* transformation not required.  */
3444     {
3445       if (slp_node)
3446 	for (i = 0; i < nargs; ++i)
3447 	  if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3448 	    {
3449 	      if (dump_enabled_p ())
3450 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 				 "incompatible vector types for invariants\n");
3452 	      return false;
3453 	    }
3454       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3455       DUMP_VECT_SCOPE ("vectorizable_call");
3456       vect_model_simple_cost (vinfo, stmt_info,
3457 			      ncopies, dt, ndts, slp_node, cost_vec);
3458       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3459 	record_stmt_cost (cost_vec, ncopies / 2,
3460 			  vec_promote_demote, stmt_info, 0, vect_body);
3461 
3462       if (loop_vinfo
3463 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3464 	  && (reduc_idx >= 0 || mask_opno >= 0))
3465 	{
3466 	  if (reduc_idx >= 0
3467 	      && (cond_fn == IFN_LAST
3468 		  || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3469 						      OPTIMIZE_FOR_SPEED)))
3470 	    {
3471 	      if (dump_enabled_p ())
3472 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3473 				 "can't use a fully-masked loop because no"
3474 				 " conditional operation is available.\n");
3475 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3476 	    }
3477 	  else
3478 	    {
3479 	      unsigned int nvectors
3480 		= (slp_node
3481 		   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3482 		   : ncopies);
3483 	      tree scalar_mask = NULL_TREE;
3484 	      if (mask_opno >= 0)
3485 		scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3486 	      vect_record_loop_mask (loop_vinfo, masks, nvectors,
3487 				     vectype_out, scalar_mask);
3488 	    }
3489 	}
3490       return true;
3491     }
3492 
3493   /* Transform.  */
3494 
3495   if (dump_enabled_p ())
3496     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3497 
3498   /* Handle def.  */
3499   scalar_dest = gimple_call_lhs (stmt);
3500   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3501 
3502   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3503   unsigned int vect_nargs = nargs;
3504   if (masked_loop_p && reduc_idx >= 0)
3505     {
3506       ifn = cond_fn;
3507       vect_nargs += 2;
3508     }
3509 
3510   if (modifier == NONE || ifn != IFN_LAST)
3511     {
3512       tree prev_res = NULL_TREE;
3513       vargs.safe_grow (vect_nargs, true);
3514       auto_vec<vec<tree> > vec_defs (nargs);
3515       for (j = 0; j < ncopies; ++j)
3516 	{
3517 	  /* Build argument list for the vectorized call.  */
3518 	  if (slp_node)
3519 	    {
3520 	      vec<tree> vec_oprnds0;
3521 
3522 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3523 	      vec_oprnds0 = vec_defs[0];
3524 
3525 	      /* Arguments are ready.  Create the new vector stmt.  */
3526 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3527 		{
3528 		  int varg = 0;
3529 		  if (masked_loop_p && reduc_idx >= 0)
3530 		    {
3531 		      unsigned int vec_num = vec_oprnds0.length ();
3532 		      /* Always true for SLP.  */
3533 		      gcc_assert (ncopies == 1);
3534 		      vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3535 							  vectype_out, i);
3536 		    }
3537 		  size_t k;
3538 		  for (k = 0; k < nargs; k++)
3539 		    {
3540 		      vec<tree> vec_oprndsk = vec_defs[k];
3541 		      vargs[varg++] = vec_oprndsk[i];
3542 		    }
3543 		  if (masked_loop_p && reduc_idx >= 0)
3544 		    vargs[varg++] = vargs[reduc_idx + 1];
3545 		  gimple *new_stmt;
3546 		  if (modifier == NARROW)
3547 		    {
3548 		      /* We don't define any narrowing conditional functions
3549 			 at present.  */
3550 		      gcc_assert (mask_opno < 0);
3551 		      tree half_res = make_ssa_name (vectype_in);
3552 		      gcall *call
3553 			= gimple_build_call_internal_vec (ifn, vargs);
3554 		      gimple_call_set_lhs (call, half_res);
3555 		      gimple_call_set_nothrow (call, true);
3556 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3557 		      if ((i & 1) == 0)
3558 			{
3559 			  prev_res = half_res;
3560 			  continue;
3561 			}
3562 		      new_temp = make_ssa_name (vec_dest);
3563 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3564 						      prev_res, half_res);
3565 		      vect_finish_stmt_generation (vinfo, stmt_info,
3566 						   new_stmt, gsi);
3567 		    }
3568 		  else
3569 		    {
3570 		      if (mask_opno >= 0 && masked_loop_p)
3571 			{
3572 			  unsigned int vec_num = vec_oprnds0.length ();
3573 			  /* Always true for SLP.  */
3574 			  gcc_assert (ncopies == 1);
3575 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3576 							  vectype_out, i);
3577 			  vargs[mask_opno] = prepare_vec_mask
3578 			    (loop_vinfo, TREE_TYPE (mask), mask,
3579 			     vargs[mask_opno], gsi);
3580 			}
3581 
3582 		      gcall *call;
3583 		      if (ifn != IFN_LAST)
3584 			call = gimple_build_call_internal_vec (ifn, vargs);
3585 		      else
3586 			call = gimple_build_call_vec (fndecl, vargs);
3587 		      new_temp = make_ssa_name (vec_dest, call);
3588 		      gimple_call_set_lhs (call, new_temp);
3589 		      gimple_call_set_nothrow (call, true);
3590 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3591 		      new_stmt = call;
3592 		    }
3593 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3594 		}
3595 	      continue;
3596 	    }
3597 
3598 	  int varg = 0;
3599 	  if (masked_loop_p && reduc_idx >= 0)
3600 	    vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3601 						vectype_out, j);
3602 	  for (i = 0; i < nargs; i++)
3603 	    {
3604 	      op = gimple_call_arg (stmt, i);
3605 	      if (j == 0)
3606 		{
3607 		  vec_defs.quick_push (vNULL);
3608 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3609 						 op, &vec_defs[i],
3610 						 vectypes[i]);
3611 		}
3612 	      vargs[varg++] = vec_defs[i][j];
3613 	    }
3614 	  if (masked_loop_p && reduc_idx >= 0)
3615 	    vargs[varg++] = vargs[reduc_idx + 1];
3616 
3617 	  if (mask_opno >= 0 && masked_loop_p)
3618 	    {
3619 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3620 					      vectype_out, j);
3621 	      vargs[mask_opno]
3622 		= prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3623 				    vargs[mask_opno], gsi);
3624 	    }
3625 
3626 	  gimple *new_stmt;
3627 	  if (cfn == CFN_GOMP_SIMD_LANE)
3628 	    {
3629 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3630 	      tree new_var
3631 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3632 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3633 	      vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3634 	      new_temp = make_ssa_name (vec_dest);
3635 	      new_stmt = gimple_build_assign (new_temp, new_var);
3636 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3637 	    }
3638 	  else if (modifier == NARROW)
3639 	    {
3640 	      /* We don't define any narrowing conditional functions at
3641 		 present.  */
3642 	      gcc_assert (mask_opno < 0);
3643 	      tree half_res = make_ssa_name (vectype_in);
3644 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3645 	      gimple_call_set_lhs (call, half_res);
3646 	      gimple_call_set_nothrow (call, true);
3647 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3648 	      if ((j & 1) == 0)
3649 		{
3650 		  prev_res = half_res;
3651 		  continue;
3652 		}
3653 	      new_temp = make_ssa_name (vec_dest);
3654 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3655 					      prev_res, half_res);
3656 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3657 	    }
3658 	  else
3659 	    {
3660 	      gcall *call;
3661 	      if (ifn != IFN_LAST)
3662 		call = gimple_build_call_internal_vec (ifn, vargs);
3663 	      else
3664 		call = gimple_build_call_vec (fndecl, vargs);
3665 	      new_temp = make_ssa_name (vec_dest, call);
3666 	      gimple_call_set_lhs (call, new_temp);
3667 	      gimple_call_set_nothrow (call, true);
3668 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3669 	      new_stmt = call;
3670 	    }
3671 
3672 	  if (j == (modifier == NARROW ? 1 : 0))
3673 	    *vec_stmt = new_stmt;
3674 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3675 	}
3676       for (i = 0; i < nargs; i++)
3677 	{
3678 	  vec<tree> vec_oprndsi = vec_defs[i];
3679 	  vec_oprndsi.release ();
3680 	}
3681     }
3682   else if (modifier == NARROW)
3683     {
3684       auto_vec<vec<tree> > vec_defs (nargs);
3685       /* We don't define any narrowing conditional functions at present.  */
3686       gcc_assert (mask_opno < 0);
3687       for (j = 0; j < ncopies; ++j)
3688 	{
3689 	  /* Build argument list for the vectorized call.  */
3690 	  if (j == 0)
3691 	    vargs.create (nargs * 2);
3692 	  else
3693 	    vargs.truncate (0);
3694 
3695 	  if (slp_node)
3696 	    {
3697 	      vec<tree> vec_oprnds0;
3698 
3699 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3700 	      vec_oprnds0 = vec_defs[0];
3701 
3702 	      /* Arguments are ready.  Create the new vector stmt.  */
3703 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3704 		{
3705 		  size_t k;
3706 		  vargs.truncate (0);
3707 		  for (k = 0; k < nargs; k++)
3708 		    {
3709 		      vec<tree> vec_oprndsk = vec_defs[k];
3710 		      vargs.quick_push (vec_oprndsk[i]);
3711 		      vargs.quick_push (vec_oprndsk[i + 1]);
3712 		    }
3713 		  gcall *call;
3714 		  if (ifn != IFN_LAST)
3715 		    call = gimple_build_call_internal_vec (ifn, vargs);
3716 		  else
3717 		    call = gimple_build_call_vec (fndecl, vargs);
3718 		  new_temp = make_ssa_name (vec_dest, call);
3719 		  gimple_call_set_lhs (call, new_temp);
3720 		  gimple_call_set_nothrow (call, true);
3721 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3722 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3723 		}
3724 	      continue;
3725 	    }
3726 
3727 	  for (i = 0; i < nargs; i++)
3728 	    {
3729 	      op = gimple_call_arg (stmt, i);
3730 	      if (j == 0)
3731 		{
3732 		  vec_defs.quick_push (vNULL);
3733 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3734 						 op, &vec_defs[i], vectypes[i]);
3735 		}
3736 	      vec_oprnd0 = vec_defs[i][2*j];
3737 	      vec_oprnd1 = vec_defs[i][2*j+1];
3738 
3739 	      vargs.quick_push (vec_oprnd0);
3740 	      vargs.quick_push (vec_oprnd1);
3741 	    }
3742 
3743 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3744 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3745 	  gimple_call_set_lhs (new_stmt, new_temp);
3746 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3747 
3748 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3749 	}
3750 
3751       if (!slp_node)
3752 	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3753 
3754       for (i = 0; i < nargs; i++)
3755 	{
3756 	  vec<tree> vec_oprndsi = vec_defs[i];
3757 	  vec_oprndsi.release ();
3758 	}
3759     }
3760   else
3761     /* No current target implements this case.  */
3762     return false;
3763 
3764   vargs.release ();
3765 
3766   /* The call in STMT might prevent it from being removed in dce.
3767      We however cannot remove it here, due to the way the ssa name
3768      it defines is mapped to the new definition.  So just replace
3769      rhs of the statement with something harmless.  */
3770 
3771   if (slp_node)
3772     return true;
3773 
3774   stmt_info = vect_orig_stmt (stmt_info);
3775   lhs = gimple_get_lhs (stmt_info->stmt);
3776 
3777   gassign *new_stmt
3778     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3779   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3780 
3781   return true;
3782 }
3783 
3784 
3785 struct simd_call_arg_info
3786 {
3787   tree vectype;
3788   tree op;
3789   HOST_WIDE_INT linear_step;
3790   enum vect_def_type dt;
3791   unsigned int align;
3792   bool simd_lane_linear;
3793 };
3794 
3795 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3796    is linear within simd lane (but not within whole loop), note it in
3797    *ARGINFO.  */
3798 
3799 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3800 vect_simd_lane_linear (tree op, class loop *loop,
3801 		       struct simd_call_arg_info *arginfo)
3802 {
3803   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3804 
3805   if (!is_gimple_assign (def_stmt)
3806       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3807       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3808     return;
3809 
3810   tree base = gimple_assign_rhs1 (def_stmt);
3811   HOST_WIDE_INT linear_step = 0;
3812   tree v = gimple_assign_rhs2 (def_stmt);
3813   while (TREE_CODE (v) == SSA_NAME)
3814     {
3815       tree t;
3816       def_stmt = SSA_NAME_DEF_STMT (v);
3817       if (is_gimple_assign (def_stmt))
3818 	switch (gimple_assign_rhs_code (def_stmt))
3819 	  {
3820 	  case PLUS_EXPR:
3821 	    t = gimple_assign_rhs2 (def_stmt);
3822 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3823 	      return;
3824 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3825 	    v = gimple_assign_rhs1 (def_stmt);
3826 	    continue;
3827 	  case MULT_EXPR:
3828 	    t = gimple_assign_rhs2 (def_stmt);
3829 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3830 	      return;
3831 	    linear_step = tree_to_shwi (t);
3832 	    v = gimple_assign_rhs1 (def_stmt);
3833 	    continue;
3834 	  CASE_CONVERT:
3835 	    t = gimple_assign_rhs1 (def_stmt);
3836 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3837 		|| (TYPE_PRECISION (TREE_TYPE (v))
3838 		    < TYPE_PRECISION (TREE_TYPE (t))))
3839 	      return;
3840 	    if (!linear_step)
3841 	      linear_step = 1;
3842 	    v = t;
3843 	    continue;
3844 	  default:
3845 	    return;
3846 	  }
3847       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3848 	       && loop->simduid
3849 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3850 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3851 		   == loop->simduid))
3852 	{
3853 	  if (!linear_step)
3854 	    linear_step = 1;
3855 	  arginfo->linear_step = linear_step;
3856 	  arginfo->op = base;
3857 	  arginfo->simd_lane_linear = true;
3858 	  return;
3859 	}
3860     }
3861 }
3862 
3863 /* Return the number of elements in vector type VECTYPE, which is associated
3864    with a SIMD clone.  At present these vectors always have a constant
3865    length.  */
3866 
3867 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3868 simd_clone_subparts (tree vectype)
3869 {
3870   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3871 }
3872 
3873 /* Function vectorizable_simd_clone_call.
3874 
3875    Check if STMT_INFO performs a function call that can be vectorized
3876    by calling a simd clone of the function.
3877    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3878    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3879    Return true if STMT_INFO is vectorizable in this way.  */
3880 
3881 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3882 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3883 			      gimple_stmt_iterator *gsi,
3884 			      gimple **vec_stmt, slp_tree slp_node,
3885 			      stmt_vector_for_cost *)
3886 {
3887   tree vec_dest;
3888   tree scalar_dest;
3889   tree op, type;
3890   tree vec_oprnd0 = NULL_TREE;
3891   tree vectype;
3892   poly_uint64 nunits;
3893   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3894   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3895   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3896   tree fndecl, new_temp;
3897   int ncopies, j;
3898   auto_vec<simd_call_arg_info> arginfo;
3899   vec<tree> vargs = vNULL;
3900   size_t i, nargs;
3901   tree lhs, rtype, ratype;
3902   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3903 
3904   /* Is STMT a vectorizable call?   */
3905   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3906   if (!stmt)
3907     return false;
3908 
3909   fndecl = gimple_call_fndecl (stmt);
3910   if (fndecl == NULL_TREE)
3911     return false;
3912 
3913   struct cgraph_node *node = cgraph_node::get (fndecl);
3914   if (node == NULL || node->simd_clones == NULL)
3915     return false;
3916 
3917   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3918     return false;
3919 
3920   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3921       && ! vec_stmt)
3922     return false;
3923 
3924   if (gimple_call_lhs (stmt)
3925       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3926     return false;
3927 
3928   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3929 
3930   vectype = STMT_VINFO_VECTYPE (stmt_info);
3931 
3932   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3933     return false;
3934 
3935   /* FORNOW */
3936   if (slp_node)
3937     return false;
3938 
3939   /* Process function arguments.  */
3940   nargs = gimple_call_num_args (stmt);
3941 
3942   /* Bail out if the function has zero arguments.  */
3943   if (nargs == 0)
3944     return false;
3945 
3946   arginfo.reserve (nargs, true);
3947 
3948   for (i = 0; i < nargs; i++)
3949     {
3950       simd_call_arg_info thisarginfo;
3951       affine_iv iv;
3952 
3953       thisarginfo.linear_step = 0;
3954       thisarginfo.align = 0;
3955       thisarginfo.op = NULL_TREE;
3956       thisarginfo.simd_lane_linear = false;
3957 
3958       op = gimple_call_arg (stmt, i);
3959       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3960 			       &thisarginfo.vectype)
3961 	  || thisarginfo.dt == vect_uninitialized_def)
3962 	{
3963 	  if (dump_enabled_p ())
3964 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3965 			     "use not simple.\n");
3966 	  return false;
3967 	}
3968 
3969       if (thisarginfo.dt == vect_constant_def
3970 	  || thisarginfo.dt == vect_external_def)
3971 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3972       else
3973 	{
3974 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
3975 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3976 	    {
3977 	      if (dump_enabled_p ())
3978 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 				 "vector mask arguments are not supported\n");
3980 	      return false;
3981 	    }
3982 	}
3983 
3984       /* For linear arguments, the analyze phase should have saved
3985 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3986       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3987 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3988 	{
3989 	  gcc_assert (vec_stmt);
3990 	  thisarginfo.linear_step
3991 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3992 	  thisarginfo.op
3993 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3994 	  thisarginfo.simd_lane_linear
3995 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3996 	       == boolean_true_node);
3997 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3998 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3999 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4000 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
4001 	    {
4002 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4003 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4004 	      tree opt = TREE_TYPE (thisarginfo.op);
4005 	      bias = fold_convert (TREE_TYPE (step), bias);
4006 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4007 	      thisarginfo.op
4008 		= fold_build2 (POINTER_TYPE_P (opt)
4009 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4010 			       thisarginfo.op, bias);
4011 	    }
4012 	}
4013       else if (!vec_stmt
4014 	       && thisarginfo.dt != vect_constant_def
4015 	       && thisarginfo.dt != vect_external_def
4016 	       && loop_vinfo
4017 	       && TREE_CODE (op) == SSA_NAME
4018 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
4019 			     &iv, false)
4020 	       && tree_fits_shwi_p (iv.step))
4021 	{
4022 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
4023 	  thisarginfo.op = iv.base;
4024 	}
4025       else if ((thisarginfo.dt == vect_constant_def
4026 		|| thisarginfo.dt == vect_external_def)
4027 	       && POINTER_TYPE_P (TREE_TYPE (op)))
4028 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4029       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4030 	 linear too.  */
4031       if (POINTER_TYPE_P (TREE_TYPE (op))
4032 	  && !thisarginfo.linear_step
4033 	  && !vec_stmt
4034 	  && thisarginfo.dt != vect_constant_def
4035 	  && thisarginfo.dt != vect_external_def
4036 	  && loop_vinfo
4037 	  && !slp_node
4038 	  && TREE_CODE (op) == SSA_NAME)
4039 	vect_simd_lane_linear (op, loop, &thisarginfo);
4040 
4041       arginfo.quick_push (thisarginfo);
4042     }
4043 
4044   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4045   if (!vf.is_constant ())
4046     {
4047       if (dump_enabled_p ())
4048 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4049 			 "not considering SIMD clones; not yet supported"
4050 			 " for variable-width vectors.\n");
4051       return false;
4052     }
4053 
4054   unsigned int badness = 0;
4055   struct cgraph_node *bestn = NULL;
4056   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4057     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4058   else
4059     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4060 	 n = n->simdclone->next_clone)
4061       {
4062 	unsigned int this_badness = 0;
4063 	unsigned int num_calls;
4064 	if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4065 	    || n->simdclone->nargs != nargs)
4066 	  continue;
4067 	if (num_calls != 1)
4068 	  this_badness += exact_log2 (num_calls) * 4096;
4069 	if (n->simdclone->inbranch)
4070 	  this_badness += 8192;
4071 	int target_badness = targetm.simd_clone.usable (n);
4072 	if (target_badness < 0)
4073 	  continue;
4074 	this_badness += target_badness * 512;
4075 	/* FORNOW: Have to add code to add the mask argument.  */
4076 	if (n->simdclone->inbranch)
4077 	  continue;
4078 	for (i = 0; i < nargs; i++)
4079 	  {
4080 	    switch (n->simdclone->args[i].arg_type)
4081 	      {
4082 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4083 		if (!useless_type_conversion_p
4084 			(n->simdclone->args[i].orig_type,
4085 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4086 		  i = -1;
4087 		else if (arginfo[i].dt == vect_constant_def
4088 			 || arginfo[i].dt == vect_external_def
4089 			 || arginfo[i].linear_step)
4090 		  this_badness += 64;
4091 		break;
4092 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4093 		if (arginfo[i].dt != vect_constant_def
4094 		    && arginfo[i].dt != vect_external_def)
4095 		  i = -1;
4096 		break;
4097 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4098 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4099 		if (arginfo[i].dt == vect_constant_def
4100 		    || arginfo[i].dt == vect_external_def
4101 		    || (arginfo[i].linear_step
4102 			!= n->simdclone->args[i].linear_step))
4103 		  i = -1;
4104 		break;
4105 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4106 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4107 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4108 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4109 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4110 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4111 		/* FORNOW */
4112 		i = -1;
4113 		break;
4114 	      case SIMD_CLONE_ARG_TYPE_MASK:
4115 		gcc_unreachable ();
4116 	      }
4117 	    if (i == (size_t) -1)
4118 	      break;
4119 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4120 	      {
4121 		i = -1;
4122 		break;
4123 	      }
4124 	    if (arginfo[i].align)
4125 	      this_badness += (exact_log2 (arginfo[i].align)
4126 			       - exact_log2 (n->simdclone->args[i].alignment));
4127 	  }
4128 	if (i == (size_t) -1)
4129 	  continue;
4130 	if (bestn == NULL || this_badness < badness)
4131 	  {
4132 	    bestn = n;
4133 	    badness = this_badness;
4134 	  }
4135       }
4136 
4137   if (bestn == NULL)
4138     return false;
4139 
4140   for (i = 0; i < nargs; i++)
4141     if ((arginfo[i].dt == vect_constant_def
4142 	 || arginfo[i].dt == vect_external_def)
4143 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4144       {
4145 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4146 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4147 							  slp_node);
4148 	if (arginfo[i].vectype == NULL
4149 	    || !constant_multiple_p (bestn->simdclone->simdlen,
4150 				     simd_clone_subparts (arginfo[i].vectype)))
4151 	  return false;
4152       }
4153 
4154   fndecl = bestn->decl;
4155   nunits = bestn->simdclone->simdlen;
4156   ncopies = vector_unroll_factor (vf, nunits);
4157 
4158   /* If the function isn't const, only allow it in simd loops where user
4159      has asserted that at least nunits consecutive iterations can be
4160      performed using SIMD instructions.  */
4161   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4162       && gimple_vuse (stmt))
4163     return false;
4164 
4165   /* Sanity check: make sure that at least one copy of the vectorized stmt
4166      needs to be generated.  */
4167   gcc_assert (ncopies >= 1);
4168 
4169   if (!vec_stmt) /* transformation not required.  */
4170     {
4171       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4172       for (i = 0; i < nargs; i++)
4173 	if ((bestn->simdclone->args[i].arg_type
4174 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4175 	    || (bestn->simdclone->args[i].arg_type
4176 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4177 	  {
4178 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4179 									+ 1,
4180 								      true);
4181 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4182 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4183 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4184 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4185 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4186 	    tree sll = arginfo[i].simd_lane_linear
4187 		       ? boolean_true_node : boolean_false_node;
4188 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4189 	  }
4190       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4191       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4192 /*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
4193 				dt, slp_node, cost_vec); */
4194       return true;
4195     }
4196 
4197   /* Transform.  */
4198 
4199   if (dump_enabled_p ())
4200     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4201 
4202   /* Handle def.  */
4203   scalar_dest = gimple_call_lhs (stmt);
4204   vec_dest = NULL_TREE;
4205   rtype = NULL_TREE;
4206   ratype = NULL_TREE;
4207   if (scalar_dest)
4208     {
4209       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4210       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4211       if (TREE_CODE (rtype) == ARRAY_TYPE)
4212 	{
4213 	  ratype = rtype;
4214 	  rtype = TREE_TYPE (ratype);
4215 	}
4216     }
4217 
4218   auto_vec<vec<tree> > vec_oprnds;
4219   auto_vec<unsigned> vec_oprnds_i;
4220   vec_oprnds.safe_grow_cleared (nargs, true);
4221   vec_oprnds_i.safe_grow_cleared (nargs, true);
4222   for (j = 0; j < ncopies; ++j)
4223     {
4224       /* Build argument list for the vectorized call.  */
4225       if (j == 0)
4226 	vargs.create (nargs);
4227       else
4228 	vargs.truncate (0);
4229 
4230       for (i = 0; i < nargs; i++)
4231 	{
4232 	  unsigned int k, l, m, o;
4233 	  tree atype;
4234 	  op = gimple_call_arg (stmt, i);
4235 	  switch (bestn->simdclone->args[i].arg_type)
4236 	    {
4237 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4238 	      atype = bestn->simdclone->args[i].vector_type;
4239 	      o = vector_unroll_factor (nunits,
4240 					simd_clone_subparts (atype));
4241 	      for (m = j * o; m < (j + 1) * o; m++)
4242 		{
4243 		  if (simd_clone_subparts (atype)
4244 		      < simd_clone_subparts (arginfo[i].vectype))
4245 		    {
4246 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4247 		      k = (simd_clone_subparts (arginfo[i].vectype)
4248 			   / simd_clone_subparts (atype));
4249 		      gcc_assert ((k & (k - 1)) == 0);
4250 		      if (m == 0)
4251 			{
4252 			  vect_get_vec_defs_for_operand (vinfo, stmt_info,
4253 							 ncopies * o / k, op,
4254 							 &vec_oprnds[i]);
4255 			  vec_oprnds_i[i] = 0;
4256 			  vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4257 			}
4258 		      else
4259 			{
4260 			  vec_oprnd0 = arginfo[i].op;
4261 			  if ((m & (k - 1)) == 0)
4262 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4263 			}
4264 		      arginfo[i].op = vec_oprnd0;
4265 		      vec_oprnd0
4266 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4267 				  bitsize_int (prec),
4268 				  bitsize_int ((m & (k - 1)) * prec));
4269 		      gassign *new_stmt
4270 			= gimple_build_assign (make_ssa_name (atype),
4271 					       vec_oprnd0);
4272 		      vect_finish_stmt_generation (vinfo, stmt_info,
4273 						   new_stmt, gsi);
4274 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4275 		    }
4276 		  else
4277 		    {
4278 		      k = (simd_clone_subparts (atype)
4279 			   / simd_clone_subparts (arginfo[i].vectype));
4280 		      gcc_assert ((k & (k - 1)) == 0);
4281 		      vec<constructor_elt, va_gc> *ctor_elts;
4282 		      if (k != 1)
4283 			vec_alloc (ctor_elts, k);
4284 		      else
4285 			ctor_elts = NULL;
4286 		      for (l = 0; l < k; l++)
4287 			{
4288 			  if (m == 0 && l == 0)
4289 			    {
4290 			      vect_get_vec_defs_for_operand (vinfo, stmt_info,
4291 							     k * o * ncopies,
4292 							     op,
4293 							     &vec_oprnds[i]);
4294 			      vec_oprnds_i[i] = 0;
4295 			      vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4296 			    }
4297 			  else
4298 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4299 			  arginfo[i].op = vec_oprnd0;
4300 			  if (k == 1)
4301 			    break;
4302 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4303 						  vec_oprnd0);
4304 			}
4305 		      if (k == 1)
4306 			if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4307 						       atype))
4308 			  {
4309 			    vec_oprnd0
4310 			      = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4311 			    gassign *new_stmt
4312 			      = gimple_build_assign (make_ssa_name (atype),
4313 						     vec_oprnd0);
4314 			    vect_finish_stmt_generation (vinfo, stmt_info,
4315 							 new_stmt, gsi);
4316 			    vargs.safe_push (gimple_assign_lhs (new_stmt));
4317 			  }
4318 			else
4319 			  vargs.safe_push (vec_oprnd0);
4320 		      else
4321 			{
4322 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4323 			  gassign *new_stmt
4324 			    = gimple_build_assign (make_ssa_name (atype),
4325 						   vec_oprnd0);
4326 			  vect_finish_stmt_generation (vinfo, stmt_info,
4327 						       new_stmt, gsi);
4328 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4329 			}
4330 		    }
4331 		}
4332 	      break;
4333 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4334 	      vargs.safe_push (op);
4335 	      break;
4336 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4337 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4338 	      if (j == 0)
4339 		{
4340 		  gimple_seq stmts;
4341 		  arginfo[i].op
4342 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
4343 					    &stmts, true, NULL_TREE);
4344 		  if (stmts != NULL)
4345 		    {
4346 		      basic_block new_bb;
4347 		      edge pe = loop_preheader_edge (loop);
4348 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4349 		      gcc_assert (!new_bb);
4350 		    }
4351 		  if (arginfo[i].simd_lane_linear)
4352 		    {
4353 		      vargs.safe_push (arginfo[i].op);
4354 		      break;
4355 		    }
4356 		  tree phi_res = copy_ssa_name (op);
4357 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4358 		  add_phi_arg (new_phi, arginfo[i].op,
4359 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4360 		  enum tree_code code
4361 		    = POINTER_TYPE_P (TREE_TYPE (op))
4362 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4363 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4364 			      ? sizetype : TREE_TYPE (op);
4365 		  poly_widest_int cst
4366 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4367 			       ncopies * nunits);
4368 		  tree tcst = wide_int_to_tree (type, cst);
4369 		  tree phi_arg = copy_ssa_name (op);
4370 		  gassign *new_stmt
4371 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4372 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4373 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4374 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4375 			       UNKNOWN_LOCATION);
4376 		  arginfo[i].op = phi_res;
4377 		  vargs.safe_push (phi_res);
4378 		}
4379 	      else
4380 		{
4381 		  enum tree_code code
4382 		    = POINTER_TYPE_P (TREE_TYPE (op))
4383 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4384 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4385 			      ? sizetype : TREE_TYPE (op);
4386 		  poly_widest_int cst
4387 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4388 			       j * nunits);
4389 		  tree tcst = wide_int_to_tree (type, cst);
4390 		  new_temp = make_ssa_name (TREE_TYPE (op));
4391 		  gassign *new_stmt
4392 		    = gimple_build_assign (new_temp, code,
4393 					   arginfo[i].op, tcst);
4394 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4395 		  vargs.safe_push (new_temp);
4396 		}
4397 	      break;
4398 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4399 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4400 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4401 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4402 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4403 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4404 	    default:
4405 	      gcc_unreachable ();
4406 	    }
4407 	}
4408 
4409       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4410       if (vec_dest)
4411 	{
4412 	  gcc_assert (ratype
4413 		      || known_eq (simd_clone_subparts (rtype), nunits));
4414 	  if (ratype)
4415 	    new_temp = create_tmp_var (ratype);
4416 	  else if (useless_type_conversion_p (vectype, rtype))
4417 	    new_temp = make_ssa_name (vec_dest, new_call);
4418 	  else
4419 	    new_temp = make_ssa_name (rtype, new_call);
4420 	  gimple_call_set_lhs (new_call, new_temp);
4421 	}
4422       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4423       gimple *new_stmt = new_call;
4424 
4425       if (vec_dest)
4426 	{
4427 	  if (!multiple_p (simd_clone_subparts (vectype), nunits))
4428 	    {
4429 	      unsigned int k, l;
4430 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4431 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4432 	      k = vector_unroll_factor (nunits,
4433 					simd_clone_subparts (vectype));
4434 	      gcc_assert ((k & (k - 1)) == 0);
4435 	      for (l = 0; l < k; l++)
4436 		{
4437 		  tree t;
4438 		  if (ratype)
4439 		    {
4440 		      t = build_fold_addr_expr (new_temp);
4441 		      t = build2 (MEM_REF, vectype, t,
4442 				  build_int_cst (TREE_TYPE (t), l * bytes));
4443 		    }
4444 		  else
4445 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4446 				bitsize_int (prec), bitsize_int (l * prec));
4447 		  new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4448 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4449 
4450 		  if (j == 0 && l == 0)
4451 		    *vec_stmt = new_stmt;
4452 		  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4453 		}
4454 
4455 	      if (ratype)
4456 		vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4457 	      continue;
4458 	    }
4459 	  else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4460 	    {
4461 	      unsigned int k = (simd_clone_subparts (vectype)
4462 				/ simd_clone_subparts (rtype));
4463 	      gcc_assert ((k & (k - 1)) == 0);
4464 	      if ((j & (k - 1)) == 0)
4465 		vec_alloc (ret_ctor_elts, k);
4466 	      if (ratype)
4467 		{
4468 		  unsigned int m, o;
4469 		  o = vector_unroll_factor (nunits,
4470 					    simd_clone_subparts (rtype));
4471 		  for (m = 0; m < o; m++)
4472 		    {
4473 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4474 					 size_int (m), NULL_TREE, NULL_TREE);
4475 		      new_stmt = gimple_build_assign (make_ssa_name (rtype),
4476 						      tem);
4477 		      vect_finish_stmt_generation (vinfo, stmt_info,
4478 						   new_stmt, gsi);
4479 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4480 					      gimple_assign_lhs (new_stmt));
4481 		    }
4482 		  vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4483 		}
4484 	      else
4485 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4486 	      if ((j & (k - 1)) != k - 1)
4487 		continue;
4488 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4489 	      new_stmt
4490 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4491 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4492 
4493 	      if ((unsigned) j == k - 1)
4494 		*vec_stmt = new_stmt;
4495 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4496 	      continue;
4497 	    }
4498 	  else if (ratype)
4499 	    {
4500 	      tree t = build_fold_addr_expr (new_temp);
4501 	      t = build2 (MEM_REF, vectype, t,
4502 			  build_int_cst (TREE_TYPE (t), 0));
4503 	      new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4504 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4505 	      vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4506 	    }
4507 	  else if (!useless_type_conversion_p (vectype, rtype))
4508 	    {
4509 	      vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4510 	      new_stmt
4511 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4512 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4513 	    }
4514 	}
4515 
4516       if (j == 0)
4517 	*vec_stmt = new_stmt;
4518       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4519     }
4520 
4521   for (i = 0; i < nargs; ++i)
4522     {
4523       vec<tree> oprndsi = vec_oprnds[i];
4524       oprndsi.release ();
4525     }
4526   vargs.release ();
4527 
4528   /* The call in STMT might prevent it from being removed in dce.
4529      We however cannot remove it here, due to the way the ssa name
4530      it defines is mapped to the new definition.  So just replace
4531      rhs of the statement with something harmless.  */
4532 
4533   if (slp_node)
4534     return true;
4535 
4536   gimple *new_stmt;
4537   if (scalar_dest)
4538     {
4539       type = TREE_TYPE (scalar_dest);
4540       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4541       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4542     }
4543   else
4544     new_stmt = gimple_build_nop ();
4545   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4546   unlink_stmt_vdef (stmt);
4547 
4548   return true;
4549 }
4550 
4551 
4552 /* Function vect_gen_widened_results_half
4553 
4554    Create a vector stmt whose code, type, number of arguments, and result
4555    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4556    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4557    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4558    needs to be created (DECL is a function-decl of a target-builtin).
4559    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4560 
4561 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4562 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4563                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4564 			       tree vec_dest, gimple_stmt_iterator *gsi,
4565 			       stmt_vec_info stmt_info)
4566 {
4567   gimple *new_stmt;
4568   tree new_temp;
4569 
4570   /* Generate half of the widened result:  */
4571   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4572   if (op_type != binary_op)
4573     vec_oprnd1 = NULL;
4574   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4575   new_temp = make_ssa_name (vec_dest, new_stmt);
4576   gimple_assign_set_lhs (new_stmt, new_temp);
4577   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4578 
4579   return new_stmt;
4580 }
4581 
4582 
4583 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4584    For multi-step conversions store the resulting vectors and call the function
4585    recursively.  */
4586 
4587 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> & vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4588 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4589 				       int multi_step_cvt,
4590 				       stmt_vec_info stmt_info,
4591 				       vec<tree> &vec_dsts,
4592 				       gimple_stmt_iterator *gsi,
4593 				       slp_tree slp_node, enum tree_code code)
4594 {
4595   unsigned int i;
4596   tree vop0, vop1, new_tmp, vec_dest;
4597 
4598   vec_dest = vec_dsts.pop ();
4599 
4600   for (i = 0; i < vec_oprnds->length (); i += 2)
4601     {
4602       /* Create demotion operation.  */
4603       vop0 = (*vec_oprnds)[i];
4604       vop1 = (*vec_oprnds)[i + 1];
4605       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4606       new_tmp = make_ssa_name (vec_dest, new_stmt);
4607       gimple_assign_set_lhs (new_stmt, new_tmp);
4608       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4609 
4610       if (multi_step_cvt)
4611 	/* Store the resulting vector for next recursive call.  */
4612 	(*vec_oprnds)[i/2] = new_tmp;
4613       else
4614 	{
4615 	  /* This is the last step of the conversion sequence. Store the
4616 	     vectors in SLP_NODE or in vector info of the scalar statement
4617 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4618 	  if (slp_node)
4619 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4620 	  else
4621 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4622 	}
4623     }
4624 
4625   /* For multi-step demotion operations we first generate demotion operations
4626      from the source type to the intermediate types, and then combine the
4627      results (stored in VEC_OPRNDS) in demotion operation to the destination
4628      type.  */
4629   if (multi_step_cvt)
4630     {
4631       /* At each level of recursion we have half of the operands we had at the
4632 	 previous level.  */
4633       vec_oprnds->truncate ((i+1)/2);
4634       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4635 					     multi_step_cvt - 1,
4636 					     stmt_info, vec_dsts, gsi,
4637 					     slp_node, VEC_PACK_TRUNC_EXPR);
4638     }
4639 
4640   vec_dsts.quick_push (vec_dest);
4641 }
4642 
4643 
4644 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4645    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4646    STMT_INFO.  For multi-step conversions store the resulting vectors and
4647    call the function recursively.  */
4648 
4649 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4650 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4651 					vec<tree> *vec_oprnds0,
4652 					vec<tree> *vec_oprnds1,
4653 					stmt_vec_info stmt_info, tree vec_dest,
4654 					gimple_stmt_iterator *gsi,
4655 					enum tree_code code1,
4656 					enum tree_code code2, int op_type)
4657 {
4658   int i;
4659   tree vop0, vop1, new_tmp1, new_tmp2;
4660   gimple *new_stmt1, *new_stmt2;
4661   vec<tree> vec_tmp = vNULL;
4662 
4663   vec_tmp.create (vec_oprnds0->length () * 2);
4664   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4665     {
4666       if (op_type == binary_op)
4667 	vop1 = (*vec_oprnds1)[i];
4668       else
4669 	vop1 = NULL_TREE;
4670 
4671       /* Generate the two halves of promotion operation.  */
4672       new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4673 						 op_type, vec_dest, gsi,
4674 						 stmt_info);
4675       new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4676 						 op_type, vec_dest, gsi,
4677 						 stmt_info);
4678       if (is_gimple_call (new_stmt1))
4679 	{
4680 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4681 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4682 	}
4683       else
4684 	{
4685 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4686 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4687 	}
4688 
4689       /* Store the results for the next step.  */
4690       vec_tmp.quick_push (new_tmp1);
4691       vec_tmp.quick_push (new_tmp2);
4692     }
4693 
4694   vec_oprnds0->release ();
4695   *vec_oprnds0 = vec_tmp;
4696 }
4697 
4698 /* Create vectorized promotion stmts for widening stmts using only half the
4699    potential vector size for input.  */
4700 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4701 vect_create_half_widening_stmts (vec_info *vinfo,
4702 					vec<tree> *vec_oprnds0,
4703 					vec<tree> *vec_oprnds1,
4704 					stmt_vec_info stmt_info, tree vec_dest,
4705 					gimple_stmt_iterator *gsi,
4706 					enum tree_code code1,
4707 					int op_type)
4708 {
4709   int i;
4710   tree vop0, vop1;
4711   gimple *new_stmt1;
4712   gimple *new_stmt2;
4713   gimple *new_stmt3;
4714   vec<tree> vec_tmp = vNULL;
4715 
4716   vec_tmp.create (vec_oprnds0->length ());
4717   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4718     {
4719       tree new_tmp1, new_tmp2, new_tmp3, out_type;
4720 
4721       gcc_assert (op_type == binary_op);
4722       vop1 = (*vec_oprnds1)[i];
4723 
4724       /* Widen the first vector input.  */
4725       out_type = TREE_TYPE (vec_dest);
4726       new_tmp1 = make_ssa_name (out_type);
4727       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4728       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4729       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4730 	{
4731 	  /* Widen the second vector input.  */
4732 	  new_tmp2 = make_ssa_name (out_type);
4733 	  new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4734 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4735 	  /* Perform the operation.  With both vector inputs widened.  */
4736 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4737 	}
4738       else
4739 	{
4740 	  /* Perform the operation.  With the single vector input widened.  */
4741 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4742       }
4743 
4744       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4745       gimple_assign_set_lhs (new_stmt3, new_tmp3);
4746       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4747 
4748       /* Store the results for the next step.  */
4749       vec_tmp.quick_push (new_tmp3);
4750     }
4751 
4752   vec_oprnds0->release ();
4753   *vec_oprnds0 = vec_tmp;
4754 }
4755 
4756 
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760    Return true if STMT_INFO is vectorizable in this way.  */
4761 
4762 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4763 vectorizable_conversion (vec_info *vinfo,
4764 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4765 			 gimple **vec_stmt, slp_tree slp_node,
4766 			 stmt_vector_for_cost *cost_vec)
4767 {
4768   tree vec_dest;
4769   tree scalar_dest;
4770   tree op0, op1 = NULL_TREE;
4771   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4772   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4773   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4774   tree new_temp;
4775   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4776   int ndts = 2;
4777   poly_uint64 nunits_in;
4778   poly_uint64 nunits_out;
4779   tree vectype_out, vectype_in;
4780   int ncopies, i;
4781   tree lhs_type, rhs_type;
4782   enum { NARROW, NONE, WIDEN } modifier;
4783   vec<tree> vec_oprnds0 = vNULL;
4784   vec<tree> vec_oprnds1 = vNULL;
4785   tree vop0;
4786   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4787   int multi_step_cvt = 0;
4788   vec<tree> interm_types = vNULL;
4789   tree intermediate_type, cvt_type = NULL_TREE;
4790   int op_type;
4791   unsigned short fltsz;
4792 
4793   /* Is STMT a vectorizable conversion?   */
4794 
4795   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4796     return false;
4797 
4798   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4799       && ! vec_stmt)
4800     return false;
4801 
4802   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4803   if (!stmt)
4804     return false;
4805 
4806   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4807     return false;
4808 
4809   code = gimple_assign_rhs_code (stmt);
4810   if (!CONVERT_EXPR_CODE_P (code)
4811       && code != FIX_TRUNC_EXPR
4812       && code != FLOAT_EXPR
4813       && code != WIDEN_PLUS_EXPR
4814       && code != WIDEN_MINUS_EXPR
4815       && code != WIDEN_MULT_EXPR
4816       && code != WIDEN_LSHIFT_EXPR)
4817     return false;
4818 
4819   bool widen_arith = (code == WIDEN_PLUS_EXPR
4820 		      || code == WIDEN_MINUS_EXPR
4821 		      || code == WIDEN_MULT_EXPR
4822 		      || code == WIDEN_LSHIFT_EXPR);
4823   op_type = TREE_CODE_LENGTH (code);
4824 
4825   /* Check types of lhs and rhs.  */
4826   scalar_dest = gimple_assign_lhs (stmt);
4827   lhs_type = TREE_TYPE (scalar_dest);
4828   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4829 
4830   /* Check the operands of the operation.  */
4831   slp_tree slp_op0, slp_op1 = NULL;
4832   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4833 			   0, &op0, &slp_op0, &dt[0], &vectype_in))
4834     {
4835       if (dump_enabled_p ())
4836 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4837 			 "use not simple.\n");
4838       return false;
4839     }
4840 
4841   rhs_type = TREE_TYPE (op0);
4842   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4843       && !((INTEGRAL_TYPE_P (lhs_type)
4844 	    && INTEGRAL_TYPE_P (rhs_type))
4845 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4846 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4847     return false;
4848 
4849   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850       && ((INTEGRAL_TYPE_P (lhs_type)
4851 	   && !type_has_mode_precision_p (lhs_type))
4852 	  || (INTEGRAL_TYPE_P (rhs_type)
4853 	      && !type_has_mode_precision_p (rhs_type))))
4854     {
4855       if (dump_enabled_p ())
4856 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4857                          "type conversion to/from bit-precision unsupported."
4858                          "\n");
4859       return false;
4860     }
4861 
4862   if (op_type == binary_op)
4863     {
4864       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4865 		  || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4866 
4867       op1 = gimple_assign_rhs2 (stmt);
4868       tree vectype1_in;
4869       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4870 			       &op1, &slp_op1, &dt[1], &vectype1_in))
4871 	{
4872           if (dump_enabled_p ())
4873             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874                              "use not simple.\n");
4875 	  return false;
4876 	}
4877       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4878 	 OP1.  */
4879       if (!vectype_in)
4880 	vectype_in = vectype1_in;
4881     }
4882 
4883   /* If op0 is an external or constant def, infer the vector type
4884      from the scalar type.  */
4885   if (!vectype_in)
4886     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4887   if (vec_stmt)
4888     gcc_assert (vectype_in);
4889   if (!vectype_in)
4890     {
4891       if (dump_enabled_p ())
4892 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4893 			 "no vectype for scalar type %T\n", rhs_type);
4894 
4895       return false;
4896     }
4897 
4898   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4899       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4900     {
4901       if (dump_enabled_p ())
4902 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 			 "can't convert between boolean and non "
4904 			 "boolean vectors %T\n", rhs_type);
4905 
4906       return false;
4907     }
4908 
4909   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4910   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911   if (known_eq (nunits_out, nunits_in))
4912     if (widen_arith)
4913       modifier = WIDEN;
4914     else
4915       modifier = NONE;
4916   else if (multiple_p (nunits_out, nunits_in))
4917     modifier = NARROW;
4918   else
4919     {
4920       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4921       modifier = WIDEN;
4922     }
4923 
4924   /* Multiple types in SLP are handled by creating the appropriate number of
4925      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4926      case of SLP.  */
4927   if (slp_node)
4928     ncopies = 1;
4929   else if (modifier == NARROW)
4930     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4931   else
4932     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4933 
4934   /* Sanity check: make sure that at least one copy of the vectorized stmt
4935      needs to be generated.  */
4936   gcc_assert (ncopies >= 1);
4937 
4938   bool found_mode = false;
4939   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4940   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4941   opt_scalar_mode rhs_mode_iter;
4942 
4943   /* Supportable by target?  */
4944   switch (modifier)
4945     {
4946     case NONE:
4947       if (code != FIX_TRUNC_EXPR
4948 	  && code != FLOAT_EXPR
4949 	  && !CONVERT_EXPR_CODE_P (code))
4950 	return false;
4951       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4952 	break;
4953       /* FALLTHRU */
4954     unsupported:
4955       if (dump_enabled_p ())
4956 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957                          "conversion not supported by target.\n");
4958       return false;
4959 
4960     case WIDEN:
4961       if (known_eq (nunits_in, nunits_out))
4962 	{
4963 	  if (!supportable_half_widening_operation (code, vectype_out,
4964 						   vectype_in, &code1))
4965 	    goto unsupported;
4966 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4967 	  break;
4968 	}
4969       if (supportable_widening_operation (vinfo, code, stmt_info,
4970 					       vectype_out, vectype_in, &code1,
4971 					       &code2, &multi_step_cvt,
4972 					       &interm_types))
4973 	{
4974 	  /* Binary widening operation can only be supported directly by the
4975 	     architecture.  */
4976 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4977 	  break;
4978 	}
4979 
4980       if (code != FLOAT_EXPR
4981 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4982 	goto unsupported;
4983 
4984       fltsz = GET_MODE_SIZE (lhs_mode);
4985       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4986 	{
4987 	  rhs_mode = rhs_mode_iter.require ();
4988 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4989 	    break;
4990 
4991 	  cvt_type
4992 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4993 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4994 	  if (cvt_type == NULL_TREE)
4995 	    goto unsupported;
4996 
4997 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4998 	    {
4999 	      if (!supportable_convert_operation (code, vectype_out,
5000 						  cvt_type, &codecvt1))
5001 		goto unsupported;
5002 	    }
5003 	  else if (!supportable_widening_operation (vinfo, code, stmt_info,
5004 						    vectype_out, cvt_type,
5005 						    &codecvt1, &codecvt2,
5006 						    &multi_step_cvt,
5007 						    &interm_types))
5008 	    continue;
5009 	  else
5010 	    gcc_assert (multi_step_cvt == 0);
5011 
5012 	  if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5013 					      cvt_type,
5014 					      vectype_in, &code1, &code2,
5015 					      &multi_step_cvt, &interm_types))
5016 	    {
5017 	      found_mode = true;
5018 	      break;
5019 	    }
5020 	}
5021 
5022       if (!found_mode)
5023 	goto unsupported;
5024 
5025       if (GET_MODE_SIZE (rhs_mode) == fltsz)
5026 	codecvt2 = ERROR_MARK;
5027       else
5028 	{
5029 	  multi_step_cvt++;
5030 	  interm_types.safe_push (cvt_type);
5031 	  cvt_type = NULL_TREE;
5032 	}
5033       break;
5034 
5035     case NARROW:
5036       gcc_assert (op_type == unary_op);
5037       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5038 					   &code1, &multi_step_cvt,
5039 					   &interm_types))
5040 	break;
5041 
5042       if (code != FIX_TRUNC_EXPR
5043 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5044 	goto unsupported;
5045 
5046       cvt_type
5047 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5048       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5049       if (cvt_type == NULL_TREE)
5050 	goto unsupported;
5051       if (!supportable_convert_operation (code, cvt_type, vectype_in,
5052 					  &codecvt1))
5053 	goto unsupported;
5054       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5055 					   &code1, &multi_step_cvt,
5056 					   &interm_types))
5057 	break;
5058       goto unsupported;
5059 
5060     default:
5061       gcc_unreachable ();
5062     }
5063 
5064   if (!vec_stmt)		/* transformation not required.  */
5065     {
5066       if (slp_node
5067 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5068 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5069 	{
5070 	  if (dump_enabled_p ())
5071 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 			     "incompatible vector types for invariants\n");
5073 	  return false;
5074 	}
5075       DUMP_VECT_SCOPE ("vectorizable_conversion");
5076       if (modifier == NONE)
5077         {
5078 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5079 	  vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5080 				  cost_vec);
5081 	}
5082       else if (modifier == NARROW)
5083 	{
5084 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5085 	  /* The final packing step produces one vector result per copy.  */
5086 	  unsigned int nvectors
5087 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5088 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5089 					      multi_step_cvt, cost_vec,
5090 					      widen_arith);
5091 	}
5092       else
5093 	{
5094 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5095 	  /* The initial unpacking step produces two vector results
5096 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
5097 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
5098 	  unsigned int nvectors
5099 	    = (slp_node
5100 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5101 	       : ncopies * 2);
5102 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5103 					      multi_step_cvt, cost_vec,
5104 					      widen_arith);
5105 	}
5106       interm_types.release ();
5107       return true;
5108     }
5109 
5110   /* Transform.  */
5111   if (dump_enabled_p ())
5112     dump_printf_loc (MSG_NOTE, vect_location,
5113                      "transform conversion. ncopies = %d.\n", ncopies);
5114 
5115   if (op_type == binary_op)
5116     {
5117       if (CONSTANT_CLASS_P (op0))
5118 	op0 = fold_convert (TREE_TYPE (op1), op0);
5119       else if (CONSTANT_CLASS_P (op1))
5120 	op1 = fold_convert (TREE_TYPE (op0), op1);
5121     }
5122 
5123   /* In case of multi-step conversion, we first generate conversion operations
5124      to the intermediate types, and then from that types to the final one.
5125      We create vector destinations for the intermediate type (TYPES) received
5126      from supportable_*_operation, and store them in the correct order
5127      for future use in vect_create_vectorized_*_stmts ().  */
5128   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5129   vec_dest = vect_create_destination_var (scalar_dest,
5130 					  (cvt_type && modifier == WIDEN)
5131 					  ? cvt_type : vectype_out);
5132   vec_dsts.quick_push (vec_dest);
5133 
5134   if (multi_step_cvt)
5135     {
5136       for (i = interm_types.length () - 1;
5137 	   interm_types.iterate (i, &intermediate_type); i--)
5138 	{
5139 	  vec_dest = vect_create_destination_var (scalar_dest,
5140 						  intermediate_type);
5141 	  vec_dsts.quick_push (vec_dest);
5142 	}
5143     }
5144 
5145   if (cvt_type)
5146     vec_dest = vect_create_destination_var (scalar_dest,
5147 					    modifier == WIDEN
5148 					    ? vectype_out : cvt_type);
5149 
5150   int ninputs = 1;
5151   if (!slp_node)
5152     {
5153       if (modifier == WIDEN)
5154 	;
5155       else if (modifier == NARROW)
5156 	{
5157 	  if (multi_step_cvt)
5158 	    ninputs = vect_pow2 (multi_step_cvt);
5159 	  ninputs *= 2;
5160 	}
5161     }
5162 
5163   switch (modifier)
5164     {
5165     case NONE:
5166       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5167 			 op0, &vec_oprnds0);
5168       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5169 	{
5170 	  /* Arguments are ready, create the new vector stmt.  */
5171 	  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5172 	  gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5173 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5174 	  gimple_assign_set_lhs (new_stmt, new_temp);
5175 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5176 
5177 	  if (slp_node)
5178 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5179 	  else
5180 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5181 	}
5182       break;
5183 
5184     case WIDEN:
5185       /* In case the vectorization factor (VF) is bigger than the number
5186 	 of elements that we can fit in a vectype (nunits), we have to
5187 	 generate more than one vector stmt - i.e - we need to "unroll"
5188 	 the vector stmt by a factor VF/nunits.  */
5189       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5190 			 op0, &vec_oprnds0,
5191 			 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5192 			 &vec_oprnds1);
5193       if (code == WIDEN_LSHIFT_EXPR)
5194 	{
5195 	  int oprnds_size = vec_oprnds0.length ();
5196 	  vec_oprnds1.create (oprnds_size);
5197 	  for (i = 0; i < oprnds_size; ++i)
5198 	    vec_oprnds1.quick_push (op1);
5199 	}
5200       /* Arguments are ready.  Create the new vector stmts.  */
5201       for (i = multi_step_cvt; i >= 0; i--)
5202 	{
5203 	  tree this_dest = vec_dsts[i];
5204 	  enum tree_code c1 = code1, c2 = code2;
5205 	  if (i == 0 && codecvt2 != ERROR_MARK)
5206 	    {
5207 	      c1 = codecvt1;
5208 	      c2 = codecvt2;
5209 	    }
5210 	  if (known_eq (nunits_out, nunits_in))
5211 	    vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5212 						    &vec_oprnds1, stmt_info,
5213 						    this_dest, gsi,
5214 						    c1, op_type);
5215 	  else
5216 	    vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5217 						    &vec_oprnds1, stmt_info,
5218 						    this_dest, gsi,
5219 						    c1, c2, op_type);
5220 	}
5221 
5222       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5223 	{
5224 	  gimple *new_stmt;
5225 	  if (cvt_type)
5226 	    {
5227 	      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5228 	      new_temp = make_ssa_name (vec_dest);
5229 	      new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5230 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5231 	    }
5232 	  else
5233 	    new_stmt = SSA_NAME_DEF_STMT (vop0);
5234 
5235 	  if (slp_node)
5236 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5237 	  else
5238 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5239 	}
5240       break;
5241 
5242     case NARROW:
5243       /* In case the vectorization factor (VF) is bigger than the number
5244 	 of elements that we can fit in a vectype (nunits), we have to
5245 	 generate more than one vector stmt - i.e - we need to "unroll"
5246 	 the vector stmt by a factor VF/nunits.  */
5247       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5248 			 op0, &vec_oprnds0);
5249       /* Arguments are ready.  Create the new vector stmts.  */
5250       if (cvt_type)
5251 	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 	  {
5253 	    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5254 	    new_temp = make_ssa_name (vec_dest);
5255 	    gassign *new_stmt
5256 	      = gimple_build_assign (new_temp, codecvt1, vop0);
5257 	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5258 	    vec_oprnds0[i] = new_temp;
5259 	  }
5260 
5261       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5262 					     multi_step_cvt,
5263 					     stmt_info, vec_dsts, gsi,
5264 					     slp_node, code1);
5265       break;
5266     }
5267   if (!slp_node)
5268     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5269 
5270   vec_oprnds0.release ();
5271   vec_oprnds1.release ();
5272   interm_types.release ();
5273 
5274   return true;
5275 }
5276 
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278    neither the scalar nor the vector forms will generate code.  STMT_INFO
5279    is known not to involve a data reference.  */
5280 
5281 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5282 vect_nop_conversion_p (stmt_vec_info stmt_info)
5283 {
5284   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5285   if (!stmt)
5286     return false;
5287 
5288   tree lhs = gimple_assign_lhs (stmt);
5289   tree_code code = gimple_assign_rhs_code (stmt);
5290   tree rhs = gimple_assign_rhs1 (stmt);
5291 
5292   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5293     return true;
5294 
5295   if (CONVERT_EXPR_CODE_P (code))
5296     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5297 
5298   return false;
5299 }
5300 
5301 /* Function vectorizable_assignment.
5302 
5303    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306    Return true if STMT_INFO is vectorizable in this way.  */
5307 
5308 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5309 vectorizable_assignment (vec_info *vinfo,
5310 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5311 			 gimple **vec_stmt, slp_tree slp_node,
5312 			 stmt_vector_for_cost *cost_vec)
5313 {
5314   tree vec_dest;
5315   tree scalar_dest;
5316   tree op;
5317   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5318   tree new_temp;
5319   enum vect_def_type dt[1] = {vect_unknown_def_type};
5320   int ndts = 1;
5321   int ncopies;
5322   int i;
5323   vec<tree> vec_oprnds = vNULL;
5324   tree vop;
5325   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5326   enum tree_code code;
5327   tree vectype_in;
5328 
5329   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5330     return false;
5331 
5332   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5333       && ! vec_stmt)
5334     return false;
5335 
5336   /* Is vectorizable assignment?  */
5337   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5338   if (!stmt)
5339     return false;
5340 
5341   scalar_dest = gimple_assign_lhs (stmt);
5342   if (TREE_CODE (scalar_dest) != SSA_NAME)
5343     return false;
5344 
5345   if (STMT_VINFO_DATA_REF (stmt_info))
5346     return false;
5347 
5348   code = gimple_assign_rhs_code (stmt);
5349   if (!(gimple_assign_single_p (stmt)
5350 	|| code == PAREN_EXPR
5351 	|| CONVERT_EXPR_CODE_P (code)))
5352     return false;
5353 
5354   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5355   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5356 
5357   /* Multiple types in SLP are handled by creating the appropriate number of
5358      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5359      case of SLP.  */
5360   if (slp_node)
5361     ncopies = 1;
5362   else
5363     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5364 
5365   gcc_assert (ncopies >= 1);
5366 
5367   slp_tree slp_op;
5368   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5369 			   &dt[0], &vectype_in))
5370     {
5371       if (dump_enabled_p ())
5372         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373                          "use not simple.\n");
5374       return false;
5375     }
5376   if (!vectype_in)
5377     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5378 
5379   /* We can handle NOP_EXPR conversions that do not change the number
5380      of elements or the vector size.  */
5381   if ((CONVERT_EXPR_CODE_P (code)
5382        || code == VIEW_CONVERT_EXPR)
5383       && (!vectype_in
5384 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5385 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5386 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5387     return false;
5388 
5389   if (VECTOR_BOOLEAN_TYPE_P (vectype)
5390       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5391     {
5392       if (dump_enabled_p ())
5393 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 			 "can't convert between boolean and non "
5395 			 "boolean vectors %T\n", TREE_TYPE (op));
5396 
5397       return false;
5398     }
5399 
5400   /* We do not handle bit-precision changes.  */
5401   if ((CONVERT_EXPR_CODE_P (code)
5402        || code == VIEW_CONVERT_EXPR)
5403       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5404       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5405 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5406       /* But a conversion that does not change the bit-pattern is ok.  */
5407       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5408 	    > TYPE_PRECISION (TREE_TYPE (op)))
5409 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
5410     {
5411       if (dump_enabled_p ())
5412         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413                          "type conversion to/from bit-precision "
5414                          "unsupported.\n");
5415       return false;
5416     }
5417 
5418   if (!vec_stmt) /* transformation not required.  */
5419     {
5420       if (slp_node
5421 	  && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5422 	{
5423 	  if (dump_enabled_p ())
5424 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 			     "incompatible vector types for invariants\n");
5426 	  return false;
5427 	}
5428       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5429       DUMP_VECT_SCOPE ("vectorizable_assignment");
5430       if (!vect_nop_conversion_p (stmt_info))
5431 	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5432 				cost_vec);
5433       return true;
5434     }
5435 
5436   /* Transform.  */
5437   if (dump_enabled_p ())
5438     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5439 
5440   /* Handle def.  */
5441   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5442 
5443   /* Handle use.  */
5444   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5445 
5446   /* Arguments are ready. create the new vector stmt.  */
5447   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5448     {
5449       if (CONVERT_EXPR_CODE_P (code)
5450 	  || code == VIEW_CONVERT_EXPR)
5451 	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453       new_temp = make_ssa_name (vec_dest, new_stmt);
5454       gimple_assign_set_lhs (new_stmt, new_temp);
5455       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5456       if (slp_node)
5457 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5458       else
5459 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5460     }
5461   if (!slp_node)
5462     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5463 
5464   vec_oprnds.release ();
5465   return true;
5466 }
5467 
5468 
5469 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5470    either as shift by a scalar or by a vector.  */
5471 
5472 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5473 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5474 {
5475 
5476   machine_mode vec_mode;
5477   optab optab;
5478   int icode;
5479   tree vectype;
5480 
5481   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5482   if (!vectype)
5483     return false;
5484 
5485   optab = optab_for_tree_code (code, vectype, optab_scalar);
5486   if (!optab
5487       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5488     {
5489       optab = optab_for_tree_code (code, vectype, optab_vector);
5490       if (!optab
5491           || (optab_handler (optab, TYPE_MODE (vectype))
5492                       == CODE_FOR_nothing))
5493         return false;
5494     }
5495 
5496   vec_mode = TYPE_MODE (vectype);
5497   icode = (int) optab_handler (optab, vec_mode);
5498   if (icode == CODE_FOR_nothing)
5499     return false;
5500 
5501   return true;
5502 }
5503 
5504 
5505 /* Function vectorizable_shift.
5506 
5507    Check if STMT_INFO performs a shift operation that can be vectorized.
5508    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5509    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5510    Return true if STMT_INFO is vectorizable in this way.  */
5511 
5512 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5513 vectorizable_shift (vec_info *vinfo,
5514 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5515 		    gimple **vec_stmt, slp_tree slp_node,
5516 		    stmt_vector_for_cost *cost_vec)
5517 {
5518   tree vec_dest;
5519   tree scalar_dest;
5520   tree op0, op1 = NULL;
5521   tree vec_oprnd1 = NULL_TREE;
5522   tree vectype;
5523   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5524   enum tree_code code;
5525   machine_mode vec_mode;
5526   tree new_temp;
5527   optab optab;
5528   int icode;
5529   machine_mode optab_op2_mode;
5530   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5531   int ndts = 2;
5532   poly_uint64 nunits_in;
5533   poly_uint64 nunits_out;
5534   tree vectype_out;
5535   tree op1_vectype;
5536   int ncopies;
5537   int i;
5538   vec<tree> vec_oprnds0 = vNULL;
5539   vec<tree> vec_oprnds1 = vNULL;
5540   tree vop0, vop1;
5541   unsigned int k;
5542   bool scalar_shift_arg = true;
5543   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5544   bool incompatible_op1_vectype_p = false;
5545 
5546   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5547     return false;
5548 
5549   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5550       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5551       && ! vec_stmt)
5552     return false;
5553 
5554   /* Is STMT a vectorizable binary/unary operation?   */
5555   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5556   if (!stmt)
5557     return false;
5558 
5559   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5560     return false;
5561 
5562   code = gimple_assign_rhs_code (stmt);
5563 
5564   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5565       || code == RROTATE_EXPR))
5566     return false;
5567 
5568   scalar_dest = gimple_assign_lhs (stmt);
5569   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5570   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5571     {
5572       if (dump_enabled_p ())
5573         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574                          "bit-precision shifts not supported.\n");
5575       return false;
5576     }
5577 
5578   slp_tree slp_op0;
5579   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5580 			   0, &op0, &slp_op0, &dt[0], &vectype))
5581     {
5582       if (dump_enabled_p ())
5583         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5584                          "use not simple.\n");
5585       return false;
5586     }
5587   /* If op0 is an external or constant def, infer the vector type
5588      from the scalar type.  */
5589   if (!vectype)
5590     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5591   if (vec_stmt)
5592     gcc_assert (vectype);
5593   if (!vectype)
5594     {
5595       if (dump_enabled_p ())
5596         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597                          "no vectype for scalar type\n");
5598       return false;
5599     }
5600 
5601   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5602   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5603   if (maybe_ne (nunits_out, nunits_in))
5604     return false;
5605 
5606   stmt_vec_info op1_def_stmt_info;
5607   slp_tree slp_op1;
5608   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5609 			   &dt[1], &op1_vectype, &op1_def_stmt_info))
5610     {
5611       if (dump_enabled_p ())
5612         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613                          "use not simple.\n");
5614       return false;
5615     }
5616 
5617   /* Multiple types in SLP are handled by creating the appropriate number of
5618      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5619      case of SLP.  */
5620   if (slp_node)
5621     ncopies = 1;
5622   else
5623     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5624 
5625   gcc_assert (ncopies >= 1);
5626 
5627   /* Determine whether the shift amount is a vector, or scalar.  If the
5628      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5629 
5630   if ((dt[1] == vect_internal_def
5631        || dt[1] == vect_induction_def
5632        || dt[1] == vect_nested_cycle)
5633       && !slp_node)
5634     scalar_shift_arg = false;
5635   else if (dt[1] == vect_constant_def
5636 	   || dt[1] == vect_external_def
5637 	   || dt[1] == vect_internal_def)
5638     {
5639       /* In SLP, need to check whether the shift count is the same,
5640 	 in loops if it is a constant or invariant, it is always
5641 	 a scalar shift.  */
5642       if (slp_node)
5643 	{
5644 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5645 	  stmt_vec_info slpstmt_info;
5646 
5647 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5648 	    {
5649 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5650 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5651 		scalar_shift_arg = false;
5652 	    }
5653 
5654 	  /* For internal SLP defs we have to make sure we see scalar stmts
5655 	     for all vector elements.
5656 	     ???  For different vectors we could resort to a different
5657 	     scalar shift operand but code-generation below simply always
5658 	     takes the first.  */
5659 	  if (dt[1] == vect_internal_def
5660 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5661 			   stmts.length ()))
5662 	    scalar_shift_arg = false;
5663 	}
5664 
5665       /* If the shift amount is computed by a pattern stmt we cannot
5666          use the scalar amount directly thus give up and use a vector
5667 	 shift.  */
5668       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5669 	scalar_shift_arg = false;
5670     }
5671   else
5672     {
5673       if (dump_enabled_p ())
5674         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675                          "operand mode requires invariant argument.\n");
5676       return false;
5677     }
5678 
5679   /* Vector shifted by vector.  */
5680   bool was_scalar_shift_arg = scalar_shift_arg;
5681   if (!scalar_shift_arg)
5682     {
5683       optab = optab_for_tree_code (code, vectype, optab_vector);
5684       if (dump_enabled_p ())
5685         dump_printf_loc (MSG_NOTE, vect_location,
5686                          "vector/vector shift/rotate found.\n");
5687 
5688       if (!op1_vectype)
5689 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5690 						   slp_op1);
5691       incompatible_op1_vectype_p
5692 	= (op1_vectype == NULL_TREE
5693 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5694 			TYPE_VECTOR_SUBPARTS (vectype))
5695 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5696       if (incompatible_op1_vectype_p
5697 	  && (!slp_node
5698 	      || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5699 	      || slp_op1->refcnt != 1))
5700 	{
5701 	  if (dump_enabled_p ())
5702 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703                              "unusable type for last operand in"
5704                              " vector/vector shift/rotate.\n");
5705 	  return false;
5706 	}
5707     }
5708   /* See if the machine has a vector shifted by scalar insn and if not
5709      then see if it has a vector shifted by vector insn.  */
5710   else
5711     {
5712       optab = optab_for_tree_code (code, vectype, optab_scalar);
5713       if (optab
5714           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5715         {
5716           if (dump_enabled_p ())
5717             dump_printf_loc (MSG_NOTE, vect_location,
5718                              "vector/scalar shift/rotate found.\n");
5719         }
5720       else
5721         {
5722           optab = optab_for_tree_code (code, vectype, optab_vector);
5723           if (optab
5724                && (optab_handler (optab, TYPE_MODE (vectype))
5725                       != CODE_FOR_nothing))
5726             {
5727 	      scalar_shift_arg = false;
5728 
5729               if (dump_enabled_p ())
5730                 dump_printf_loc (MSG_NOTE, vect_location,
5731                                  "vector/vector shift/rotate found.\n");
5732 
5733 	      if (!op1_vectype)
5734 		op1_vectype = get_vectype_for_scalar_type (vinfo,
5735 							   TREE_TYPE (op1),
5736 							   slp_op1);
5737 
5738               /* Unlike the other binary operators, shifts/rotates have
5739                  the rhs being int, instead of the same type as the lhs,
5740                  so make sure the scalar is the right type if we are
5741 		 dealing with vectors of long long/long/short/char.  */
5742 	      incompatible_op1_vectype_p
5743 		= (!op1_vectype
5744 		   || !tree_nop_conversion_p (TREE_TYPE (vectype),
5745 					      TREE_TYPE (op1)));
5746 	      if (incompatible_op1_vectype_p
5747 		  && dt[1] == vect_internal_def)
5748 		{
5749 		  if (dump_enabled_p ())
5750 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 				     "unusable type for last operand in"
5752 				     " vector/vector shift/rotate.\n");
5753 		  return false;
5754 		}
5755             }
5756         }
5757     }
5758 
5759   /* Supportable by target?  */
5760   if (!optab)
5761     {
5762       if (dump_enabled_p ())
5763         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764                          "no optab.\n");
5765       return false;
5766     }
5767   vec_mode = TYPE_MODE (vectype);
5768   icode = (int) optab_handler (optab, vec_mode);
5769   if (icode == CODE_FOR_nothing)
5770     {
5771       if (dump_enabled_p ())
5772         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773                          "op not supported by target.\n");
5774       return false;
5775     }
5776   /* vector lowering cannot optimize vector shifts using word arithmetic.  */
5777   if (vect_emulated_vector_p (vectype))
5778     return false;
5779 
5780   if (!vec_stmt) /* transformation not required.  */
5781     {
5782       if (slp_node
5783 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5784 	      || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5785 		  && (!incompatible_op1_vectype_p
5786 		      || dt[1] == vect_constant_def)
5787 		  && !vect_maybe_update_slp_op_vectype
5788 			(slp_op1,
5789 			 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5790 	{
5791 	  if (dump_enabled_p ())
5792 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 			     "incompatible vector types for invariants\n");
5794 	  return false;
5795 	}
5796       /* Now adjust the constant shift amount in place.  */
5797       if (slp_node
5798 	  && incompatible_op1_vectype_p
5799 	  && dt[1] == vect_constant_def)
5800 	{
5801 	  for (unsigned i = 0;
5802 	       i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5803 	    {
5804 	      SLP_TREE_SCALAR_OPS (slp_op1)[i]
5805 		= fold_convert (TREE_TYPE (vectype),
5806 				SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5807 	      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5808 			   == INTEGER_CST));
5809 	    }
5810 	}
5811       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5812       DUMP_VECT_SCOPE ("vectorizable_shift");
5813       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5814 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5815       return true;
5816     }
5817 
5818   /* Transform.  */
5819 
5820   if (dump_enabled_p ())
5821     dump_printf_loc (MSG_NOTE, vect_location,
5822                      "transform binary/unary operation.\n");
5823 
5824   if (incompatible_op1_vectype_p && !slp_node)
5825     {
5826       gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5827       op1 = fold_convert (TREE_TYPE (vectype), op1);
5828       if (dt[1] != vect_constant_def)
5829 	op1 = vect_init_vector (vinfo, stmt_info, op1,
5830 				TREE_TYPE (vectype), NULL);
5831     }
5832 
5833   /* Handle def.  */
5834   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5835 
5836   if (scalar_shift_arg && dt[1] != vect_internal_def)
5837     {
5838       /* Vector shl and shr insn patterns can be defined with scalar
5839 	 operand 2 (shift operand).  In this case, use constant or loop
5840 	 invariant op1 directly, without extending it to vector mode
5841 	 first.  */
5842       optab_op2_mode = insn_data[icode].operand[2].mode;
5843       if (!VECTOR_MODE_P (optab_op2_mode))
5844 	{
5845 	  if (dump_enabled_p ())
5846 	    dump_printf_loc (MSG_NOTE, vect_location,
5847 			     "operand 1 using scalar mode.\n");
5848 	  vec_oprnd1 = op1;
5849 	  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5850 	  vec_oprnds1.quick_push (vec_oprnd1);
5851 	      /* Store vec_oprnd1 for every vector stmt to be created.
5852 		 We check during the analysis that all the shift arguments
5853 		 are the same.
5854 		 TODO: Allow different constants for different vector
5855 		 stmts generated for an SLP instance.  */
5856 	  for (k = 0;
5857 	       k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5858 	    vec_oprnds1.quick_push (vec_oprnd1);
5859 	}
5860     }
5861   else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5862     {
5863       if (was_scalar_shift_arg)
5864 	{
5865 	  /* If the argument was the same in all lanes create
5866 	     the correctly typed vector shift amount directly.  */
5867 	  op1 = fold_convert (TREE_TYPE (vectype), op1);
5868 	  op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5869 				  !loop_vinfo ? gsi : NULL);
5870 	  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5871 					 !loop_vinfo ? gsi : NULL);
5872 	  vec_oprnds1.create (slp_node->vec_stmts_size);
5873 	  for (k = 0; k < slp_node->vec_stmts_size; k++)
5874 	    vec_oprnds1.quick_push (vec_oprnd1);
5875 	}
5876       else if (dt[1] == vect_constant_def)
5877 	/* The constant shift amount has been adjusted in place.  */
5878 	;
5879       else
5880 	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5881     }
5882 
5883   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5884      (a special case for certain kind of vector shifts); otherwise,
5885      operand 1 should be of a vector type (the usual case).  */
5886   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5887 		     op0, &vec_oprnds0,
5888 		     vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5889 
5890   /* Arguments are ready.  Create the new vector stmt.  */
5891   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5892     {
5893       /* For internal defs where we need to use a scalar shift arg
5894 	 extract the first lane.  */
5895       if (scalar_shift_arg && dt[1] == vect_internal_def)
5896 	{
5897 	  vop1 = vec_oprnds1[0];
5898 	  new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5899 	  gassign *new_stmt
5900 	    = gimple_build_assign (new_temp,
5901 				   build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5902 					   vop1,
5903 					   TYPE_SIZE (TREE_TYPE (new_temp)),
5904 					   bitsize_zero_node));
5905 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5906 	  vop1 = new_temp;
5907 	}
5908       else
5909 	vop1 = vec_oprnds1[i];
5910       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5911       new_temp = make_ssa_name (vec_dest, new_stmt);
5912       gimple_assign_set_lhs (new_stmt, new_temp);
5913       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5914       if (slp_node)
5915 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5916       else
5917 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5918     }
5919 
5920   if (!slp_node)
5921     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5922 
5923   vec_oprnds0.release ();
5924   vec_oprnds1.release ();
5925 
5926   return true;
5927 }
5928 
5929 
5930 /* Function vectorizable_operation.
5931 
5932    Check if STMT_INFO performs a binary, unary or ternary operation that can
5933    be vectorized.
5934    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5935    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5936    Return true if STMT_INFO is vectorizable in this way.  */
5937 
5938 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5939 vectorizable_operation (vec_info *vinfo,
5940 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5941 			gimple **vec_stmt, slp_tree slp_node,
5942 			stmt_vector_for_cost *cost_vec)
5943 {
5944   tree vec_dest;
5945   tree scalar_dest;
5946   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5947   tree vectype;
5948   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5949   enum tree_code code, orig_code;
5950   machine_mode vec_mode;
5951   tree new_temp;
5952   int op_type;
5953   optab optab;
5954   bool target_support_p;
5955   enum vect_def_type dt[3]
5956     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5957   int ndts = 3;
5958   poly_uint64 nunits_in;
5959   poly_uint64 nunits_out;
5960   tree vectype_out;
5961   int ncopies, vec_num;
5962   int i;
5963   vec<tree> vec_oprnds0 = vNULL;
5964   vec<tree> vec_oprnds1 = vNULL;
5965   vec<tree> vec_oprnds2 = vNULL;
5966   tree vop0, vop1, vop2;
5967   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5968 
5969   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5970     return false;
5971 
5972   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5973       && ! vec_stmt)
5974     return false;
5975 
5976   /* Is STMT a vectorizable binary/unary operation?   */
5977   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5978   if (!stmt)
5979     return false;
5980 
5981   /* Loads and stores are handled in vectorizable_{load,store}.  */
5982   if (STMT_VINFO_DATA_REF (stmt_info))
5983     return false;
5984 
5985   orig_code = code = gimple_assign_rhs_code (stmt);
5986 
5987   /* Shifts are handled in vectorizable_shift.  */
5988   if (code == LSHIFT_EXPR
5989       || code == RSHIFT_EXPR
5990       || code == LROTATE_EXPR
5991       || code == RROTATE_EXPR)
5992    return false;
5993 
5994   /* Comparisons are handled in vectorizable_comparison.  */
5995   if (TREE_CODE_CLASS (code) == tcc_comparison)
5996     return false;
5997 
5998   /* Conditions are handled in vectorizable_condition.  */
5999   if (code == COND_EXPR)
6000     return false;
6001 
6002   /* For pointer addition and subtraction, we should use the normal
6003      plus and minus for the vector operation.  */
6004   if (code == POINTER_PLUS_EXPR)
6005     code = PLUS_EXPR;
6006   if (code == POINTER_DIFF_EXPR)
6007     code = MINUS_EXPR;
6008 
6009   /* Support only unary or binary operations.  */
6010   op_type = TREE_CODE_LENGTH (code);
6011   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6012     {
6013       if (dump_enabled_p ())
6014         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015                          "num. args = %d (not unary/binary/ternary op).\n",
6016                          op_type);
6017       return false;
6018     }
6019 
6020   scalar_dest = gimple_assign_lhs (stmt);
6021   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6022 
6023   /* Most operations cannot handle bit-precision types without extra
6024      truncations.  */
6025   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6026   if (!mask_op_p
6027       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6028       /* Exception are bitwise binary operations.  */
6029       && code != BIT_IOR_EXPR
6030       && code != BIT_XOR_EXPR
6031       && code != BIT_AND_EXPR)
6032     {
6033       if (dump_enabled_p ())
6034         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035                          "bit-precision arithmetic not supported.\n");
6036       return false;
6037     }
6038 
6039   slp_tree slp_op0;
6040   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6041 			   0, &op0, &slp_op0, &dt[0], &vectype))
6042     {
6043       if (dump_enabled_p ())
6044         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045                          "use not simple.\n");
6046       return false;
6047     }
6048   /* If op0 is an external or constant def, infer the vector type
6049      from the scalar type.  */
6050   if (!vectype)
6051     {
6052       /* For boolean type we cannot determine vectype by
6053 	 invariant value (don't know whether it is a vector
6054 	 of booleans or vector of integers).  We use output
6055 	 vectype because operations on boolean don't change
6056 	 type.  */
6057       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6058 	{
6059 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6060 	    {
6061 	      if (dump_enabled_p ())
6062 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 				 "not supported operation on bool value.\n");
6064 	      return false;
6065 	    }
6066 	  vectype = vectype_out;
6067 	}
6068       else
6069 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6070 					       slp_node);
6071     }
6072   if (vec_stmt)
6073     gcc_assert (vectype);
6074   if (!vectype)
6075     {
6076       if (dump_enabled_p ())
6077 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 			 "no vectype for scalar type %T\n",
6079 			 TREE_TYPE (op0));
6080 
6081       return false;
6082     }
6083 
6084   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6085   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6086   if (maybe_ne (nunits_out, nunits_in))
6087     return false;
6088 
6089   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6090   slp_tree slp_op1 = NULL, slp_op2 = NULL;
6091   if (op_type == binary_op || op_type == ternary_op)
6092     {
6093       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6094 			       1, &op1, &slp_op1, &dt[1], &vectype2))
6095 	{
6096 	  if (dump_enabled_p ())
6097 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6098                              "use not simple.\n");
6099 	  return false;
6100 	}
6101     }
6102   if (op_type == ternary_op)
6103     {
6104       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6105 			       2, &op2, &slp_op2, &dt[2], &vectype3))
6106 	{
6107 	  if (dump_enabled_p ())
6108 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109                              "use not simple.\n");
6110 	  return false;
6111 	}
6112     }
6113 
6114   /* Multiple types in SLP are handled by creating the appropriate number of
6115      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6116      case of SLP.  */
6117   if (slp_node)
6118     {
6119       ncopies = 1;
6120       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6121     }
6122   else
6123     {
6124       ncopies = vect_get_num_copies (loop_vinfo, vectype);
6125       vec_num = 1;
6126     }
6127 
6128   gcc_assert (ncopies >= 1);
6129 
6130   /* Reject attempts to combine mask types with nonmask types, e.g. if
6131      we have an AND between a (nonmask) boolean loaded from memory and
6132      a (mask) boolean result of a comparison.
6133 
6134      TODO: We could easily fix these cases up using pattern statements.  */
6135   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6136       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6137       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6138     {
6139       if (dump_enabled_p ())
6140 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6141 			 "mixed mask and nonmask vector types\n");
6142       return false;
6143     }
6144 
6145   /* Supportable by target?  */
6146 
6147   vec_mode = TYPE_MODE (vectype);
6148   if (code == MULT_HIGHPART_EXPR)
6149     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6150   else
6151     {
6152       optab = optab_for_tree_code (code, vectype, optab_default);
6153       if (!optab)
6154 	{
6155           if (dump_enabled_p ())
6156             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157                              "no optab.\n");
6158 	  return false;
6159 	}
6160       target_support_p = (optab_handler (optab, vec_mode)
6161 			  != CODE_FOR_nothing);
6162     }
6163 
6164   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6165   if (!target_support_p)
6166     {
6167       if (dump_enabled_p ())
6168 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169                          "op not supported by target.\n");
6170       /* Check only during analysis.  */
6171       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6172 	  || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6173         return false;
6174       if (dump_enabled_p ())
6175 	dump_printf_loc (MSG_NOTE, vect_location,
6176                          "proceeding using word mode.\n");
6177       using_emulated_vectors_p = true;
6178     }
6179 
6180   if (using_emulated_vectors_p
6181       && !vect_can_vectorize_without_simd_p (code))
6182     {
6183       if (dump_enabled_p ())
6184 	dump_printf (MSG_NOTE, "using word mode not possible.\n");
6185       return false;
6186     }
6187 
6188   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6189   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6190   internal_fn cond_fn = get_conditional_internal_fn (code);
6191 
6192   if (!vec_stmt) /* transformation not required.  */
6193     {
6194       /* If this operation is part of a reduction, a fully-masked loop
6195 	 should only change the active lanes of the reduction chain,
6196 	 keeping the inactive lanes as-is.  */
6197       if (loop_vinfo
6198 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6199 	  && reduc_idx >= 0)
6200 	{
6201 	  if (cond_fn == IFN_LAST
6202 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
6203 						  OPTIMIZE_FOR_SPEED))
6204 	    {
6205 	      if (dump_enabled_p ())
6206 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 				 "can't use a fully-masked loop because no"
6208 				 " conditional operation is available.\n");
6209 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6210 	    }
6211 	  else
6212 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6213 				   vectype, NULL);
6214 	}
6215 
6216       /* Put types on constant and invariant SLP children.  */
6217       if (slp_node
6218 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6219 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6220 	      || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6221 	{
6222 	  if (dump_enabled_p ())
6223 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 			     "incompatible vector types for invariants\n");
6225 	  return false;
6226 	}
6227 
6228       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6229       DUMP_VECT_SCOPE ("vectorizable_operation");
6230       vect_model_simple_cost (vinfo, stmt_info,
6231 			      ncopies, dt, ndts, slp_node, cost_vec);
6232       if (using_emulated_vectors_p)
6233 	{
6234 	  /* The above vect_model_simple_cost call handles constants
6235 	     in the prologue and (mis-)costs one of the stmts as
6236 	     vector stmt.  See tree-vect-generic.c:do_plus_minus/do_negate
6237 	     for the actual lowering that will be applied.  */
6238 	  unsigned n
6239 	    = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6240 	  switch (code)
6241 	    {
6242 	    case PLUS_EXPR:
6243 	      n *= 5;
6244 	      break;
6245 	    case MINUS_EXPR:
6246 	      n *= 6;
6247 	      break;
6248 	    case NEGATE_EXPR:
6249 	      n *= 4;
6250 	      break;
6251 	    default:;
6252 	    }
6253 	  record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6254 	}
6255       return true;
6256     }
6257 
6258   /* Transform.  */
6259 
6260   if (dump_enabled_p ())
6261     dump_printf_loc (MSG_NOTE, vect_location,
6262                      "transform binary/unary operation.\n");
6263 
6264   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6265 
6266   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6267      vectors with unsigned elements, but the result is signed.  So, we
6268      need to compute the MINUS_EXPR into vectype temporary and
6269      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6270   tree vec_cvt_dest = NULL_TREE;
6271   if (orig_code == POINTER_DIFF_EXPR)
6272     {
6273       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6274       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6275     }
6276   /* Handle def.  */
6277   else
6278     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6279 
6280   /* In case the vectorization factor (VF) is bigger than the number
6281      of elements that we can fit in a vectype (nunits), we have to generate
6282      more than one vector stmt - i.e - we need to "unroll" the
6283      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6284      from one copy of the vector stmt to the next, in the field
6285      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6286      stages to find the correct vector defs to be used when vectorizing
6287      stmts that use the defs of the current stmt.  The example below
6288      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6289      we need to create 4 vectorized stmts):
6290 
6291      before vectorization:
6292                                 RELATED_STMT    VEC_STMT
6293         S1:     x = memref      -               -
6294         S2:     z = x + 1       -               -
6295 
6296      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6297              there):
6298                                 RELATED_STMT    VEC_STMT
6299         VS1_0:  vx0 = memref0   VS1_1           -
6300         VS1_1:  vx1 = memref1   VS1_2           -
6301         VS1_2:  vx2 = memref2   VS1_3           -
6302         VS1_3:  vx3 = memref3   -               -
6303         S1:     x = load        -               VS1_0
6304         S2:     z = x + 1       -               -
6305 
6306      step2: vectorize stmt S2 (done here):
6307         To vectorize stmt S2 we first need to find the relevant vector
6308         def for the first operand 'x'.  This is, as usual, obtained from
6309         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6310         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6311         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6312         the vector stmt VS2_0, and as usual, record it in the
6313         STMT_VINFO_VEC_STMT of stmt S2.
6314         When creating the second copy (VS2_1), we obtain the relevant vector
6315         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6316         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6317         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6318         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6319         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6320         chain of stmts and pointers:
6321                                 RELATED_STMT    VEC_STMT
6322         VS1_0:  vx0 = memref0   VS1_1           -
6323         VS1_1:  vx1 = memref1   VS1_2           -
6324         VS1_2:  vx2 = memref2   VS1_3           -
6325         VS1_3:  vx3 = memref3   -               -
6326         S1:     x = load        -               VS1_0
6327         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6328         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6329         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6330         VS2_3:  vz3 = vx3 + v1  -               -
6331         S2:     z = x + 1       -               VS2_0  */
6332 
6333   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6334 		     op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6335   /* Arguments are ready.  Create the new vector stmt.  */
6336   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6337     {
6338       gimple *new_stmt = NULL;
6339       vop1 = ((op_type == binary_op || op_type == ternary_op)
6340 	      ? vec_oprnds1[i] : NULL_TREE);
6341       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6342       if (masked_loop_p && reduc_idx >= 0)
6343 	{
6344 	  /* Perform the operation on active elements only and take
6345 	     inactive elements from the reduction chain input.  */
6346 	  gcc_assert (!vop2);
6347 	  vop2 = reduc_idx == 1 ? vop1 : vop0;
6348 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6349 					  vectype, i);
6350 	  gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6351 						    vop0, vop1, vop2);
6352 	  new_temp = make_ssa_name (vec_dest, call);
6353 	  gimple_call_set_lhs (call, new_temp);
6354 	  gimple_call_set_nothrow (call, true);
6355 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6356 	  new_stmt = call;
6357 	}
6358       else
6359 	{
6360 	  tree mask = NULL_TREE;
6361 	  /* When combining two masks check if either of them is elsewhere
6362 	     combined with a loop mask, if that's the case we can mark that the
6363 	     new combined mask doesn't need to be combined with a loop mask.  */
6364 	  if (masked_loop_p && code == BIT_AND_EXPR)
6365 	    {
6366 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6367 								 ncopies}))
6368 		{
6369 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6370 					     vectype, i);
6371 
6372 		  vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6373 					   vop0, gsi);
6374 		}
6375 
6376 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6377 								 ncopies }))
6378 		{
6379 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6380 					     vectype, i);
6381 
6382 		  vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6383 					   vop1, gsi);
6384 		}
6385 	    }
6386 
6387 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6388 	  new_temp = make_ssa_name (vec_dest, new_stmt);
6389 	  gimple_assign_set_lhs (new_stmt, new_temp);
6390 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6391 
6392 	  /* Enter the combined value into the vector cond hash so we don't
6393 	     AND it with a loop mask again.  */
6394 	  if (mask)
6395 	    loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6396 
6397 	  if (vec_cvt_dest)
6398 	    {
6399 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6400 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6401 					      new_temp);
6402 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6403 	      gimple_assign_set_lhs (new_stmt, new_temp);
6404 	      vect_finish_stmt_generation (vinfo, stmt_info,
6405 					   new_stmt, gsi);
6406 	    }
6407 	}
6408       if (slp_node)
6409 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6410       else
6411 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6412     }
6413 
6414   if (!slp_node)
6415     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6416 
6417   vec_oprnds0.release ();
6418   vec_oprnds1.release ();
6419   vec_oprnds2.release ();
6420 
6421   return true;
6422 }
6423 
6424 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6425 
6426 static void
ensure_base_align(dr_vec_info * dr_info)6427 ensure_base_align (dr_vec_info *dr_info)
6428 {
6429   /* Alignment is only analyzed for the first element of a DR group,
6430      use that to look at base alignment we need to enforce.  */
6431   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6432     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6433 
6434   gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6435 
6436   if (dr_info->base_misaligned)
6437     {
6438       tree base_decl = dr_info->base_decl;
6439 
6440       // We should only be able to increase the alignment of a base object if
6441       // we know what its new alignment should be at compile time.
6442       unsigned HOST_WIDE_INT align_base_to =
6443 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6444 
6445       if (decl_in_symtab_p (base_decl))
6446 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6447       else if (DECL_ALIGN (base_decl) < align_base_to)
6448 	{
6449 	  SET_DECL_ALIGN (base_decl, align_base_to);
6450           DECL_USER_ALIGN (base_decl) = 1;
6451 	}
6452       dr_info->base_misaligned = false;
6453     }
6454 }
6455 
6456 
6457 /* Function get_group_alias_ptr_type.
6458 
6459    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6460 
6461 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6462 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6463 {
6464   struct data_reference *first_dr, *next_dr;
6465 
6466   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6467   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6468   while (next_stmt_info)
6469     {
6470       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6471       if (get_alias_set (DR_REF (first_dr))
6472 	  != get_alias_set (DR_REF (next_dr)))
6473 	{
6474 	  if (dump_enabled_p ())
6475 	    dump_printf_loc (MSG_NOTE, vect_location,
6476 			     "conflicting alias set types.\n");
6477 	  return ptr_type_node;
6478 	}
6479       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6480     }
6481   return reference_alias_ptr_type (DR_REF (first_dr));
6482 }
6483 
6484 
6485 /* Function scan_operand_equal_p.
6486 
6487    Helper function for check_scan_store.  Compare two references
6488    with .GOMP_SIMD_LANE bases.  */
6489 
6490 static bool
scan_operand_equal_p(tree ref1,tree ref2)6491 scan_operand_equal_p (tree ref1, tree ref2)
6492 {
6493   tree ref[2] = { ref1, ref2 };
6494   poly_int64 bitsize[2], bitpos[2];
6495   tree offset[2], base[2];
6496   for (int i = 0; i < 2; ++i)
6497     {
6498       machine_mode mode;
6499       int unsignedp, reversep, volatilep = 0;
6500       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6501       				     &offset[i], &mode, &unsignedp,
6502       				     &reversep, &volatilep);
6503       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6504 	return false;
6505       if (TREE_CODE (base[i]) == MEM_REF
6506 	  && offset[i] == NULL_TREE
6507 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6508 	{
6509 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6510 	  if (is_gimple_assign (def_stmt)
6511 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6512 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6513 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6514 	    {
6515 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
6516 		return false;
6517 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6518 	      offset[i] = gimple_assign_rhs2 (def_stmt);
6519 	    }
6520 	}
6521     }
6522 
6523   if (!operand_equal_p (base[0], base[1], 0))
6524     return false;
6525   if (maybe_ne (bitsize[0], bitsize[1]))
6526     return false;
6527   if (offset[0] != offset[1])
6528     {
6529       if (!offset[0] || !offset[1])
6530 	return false;
6531       if (!operand_equal_p (offset[0], offset[1], 0))
6532 	{
6533 	  tree step[2];
6534 	  for (int i = 0; i < 2; ++i)
6535 	    {
6536 	      step[i] = integer_one_node;
6537 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6538 		{
6539 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6540 		  if (is_gimple_assign (def_stmt)
6541 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6542 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6543 			  == INTEGER_CST))
6544 		    {
6545 		      step[i] = gimple_assign_rhs2 (def_stmt);
6546 		      offset[i] = gimple_assign_rhs1 (def_stmt);
6547 		    }
6548 		}
6549 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
6550 		{
6551 		  step[i] = TREE_OPERAND (offset[i], 1);
6552 		  offset[i] = TREE_OPERAND (offset[i], 0);
6553 		}
6554 	      tree rhs1 = NULL_TREE;
6555 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6556 		{
6557 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6558 		  if (gimple_assign_cast_p (def_stmt))
6559 		    rhs1 = gimple_assign_rhs1 (def_stmt);
6560 		}
6561 	      else if (CONVERT_EXPR_P (offset[i]))
6562 		rhs1 = TREE_OPERAND (offset[i], 0);
6563 	      if (rhs1
6564 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6565 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6566 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6567 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6568 		offset[i] = rhs1;
6569 	    }
6570 	  if (!operand_equal_p (offset[0], offset[1], 0)
6571 	      || !operand_equal_p (step[0], step[1], 0))
6572 	    return false;
6573 	}
6574     }
6575   return true;
6576 }
6577 
6578 
6579 enum scan_store_kind {
6580   /* Normal permutation.  */
6581   scan_store_kind_perm,
6582 
6583   /* Whole vector left shift permutation with zero init.  */
6584   scan_store_kind_lshift_zero,
6585 
6586   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6587   scan_store_kind_lshift_cond
6588 };
6589 
6590 /* Function check_scan_store.
6591 
6592    Verify if we can perform the needed permutations or whole vector shifts.
6593    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6594    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6595    to do at each step.  */
6596 
6597 static int
6598 scan_store_can_perm_p (tree vectype, tree init,
6599 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
6600 {
6601   enum machine_mode vec_mode = TYPE_MODE (vectype);
6602   unsigned HOST_WIDE_INT nunits;
6603   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6604     return -1;
6605   int units_log2 = exact_log2 (nunits);
6606   if (units_log2 <= 0)
6607     return -1;
6608 
6609   int i;
6610   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6611   for (i = 0; i <= units_log2; ++i)
6612     {
6613       unsigned HOST_WIDE_INT j, k;
6614       enum scan_store_kind kind = scan_store_kind_perm;
6615       vec_perm_builder sel (nunits, nunits, 1);
6616       sel.quick_grow (nunits);
6617       if (i == units_log2)
6618 	{
6619 	  for (j = 0; j < nunits; ++j)
6620 	    sel[j] = nunits - 1;
6621 	}
6622       else
6623 	{
6624 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6625 	    sel[j] = j;
6626 	  for (k = 0; j < nunits; ++j, ++k)
6627 	    sel[j] = nunits + k;
6628 	}
6629       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6630       if (!can_vec_perm_const_p (vec_mode, indices))
6631 	{
6632 	  if (i == units_log2)
6633 	    return -1;
6634 
6635 	  if (whole_vector_shift_kind == scan_store_kind_perm)
6636 	    {
6637 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6638 		return -1;
6639 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
6640 	      /* Whole vector shifts shift in zeros, so if init is all zero
6641 		 constant, there is no need to do anything further.  */
6642 	      if ((TREE_CODE (init) != INTEGER_CST
6643 		   && TREE_CODE (init) != REAL_CST)
6644 		  || !initializer_zerop (init))
6645 		{
6646 		  tree masktype = truth_type_for (vectype);
6647 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6648 		    return -1;
6649 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
6650 		}
6651 	    }
6652 	  kind = whole_vector_shift_kind;
6653 	}
6654       if (use_whole_vector)
6655 	{
6656 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6657 	    use_whole_vector->safe_grow_cleared (i, true);
6658 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6659 	    use_whole_vector->safe_push (kind);
6660 	}
6661     }
6662 
6663   return units_log2;
6664 }
6665 
6666 
6667 /* Function check_scan_store.
6668 
6669    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6670 
6671 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6672 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6673 		  enum vect_def_type rhs_dt, bool slp, tree mask,
6674 		  vect_memory_access_type memory_access_type)
6675 {
6676   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6677   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6678   tree ref_type;
6679 
6680   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6681   if (slp
6682       || mask
6683       || memory_access_type != VMAT_CONTIGUOUS
6684       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6685       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6686       || loop_vinfo == NULL
6687       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6688       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6689       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6690       || !integer_zerop (DR_INIT (dr_info->dr))
6691       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6692       || !alias_sets_conflict_p (get_alias_set (vectype),
6693 				 get_alias_set (TREE_TYPE (ref_type))))
6694     {
6695       if (dump_enabled_p ())
6696 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6697 			 "unsupported OpenMP scan store.\n");
6698       return false;
6699     }
6700 
6701   /* We need to pattern match code built by OpenMP lowering and simplified
6702      by following optimizations into something we can handle.
6703      #pragma omp simd reduction(inscan,+:r)
6704      for (...)
6705        {
6706 	 r += something ();
6707 	 #pragma omp scan inclusive (r)
6708 	 use (r);
6709        }
6710      shall have body with:
6711        // Initialization for input phase, store the reduction initializer:
6712        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6713        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6714        D.2042[_21] = 0;
6715        // Actual input phase:
6716        ...
6717        r.0_5 = D.2042[_20];
6718        _6 = _4 + r.0_5;
6719        D.2042[_20] = _6;
6720        // Initialization for scan phase:
6721        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6722        _26 = D.2043[_25];
6723        _27 = D.2042[_25];
6724        _28 = _26 + _27;
6725        D.2043[_25] = _28;
6726        D.2042[_25] = _28;
6727        // Actual scan phase:
6728        ...
6729        r.1_8 = D.2042[_20];
6730        ...
6731      The "omp simd array" variable D.2042 holds the privatized copy used
6732      inside of the loop and D.2043 is another one that holds copies of
6733      the current original list item.  The separate GOMP_SIMD_LANE ifn
6734      kinds are there in order to allow optimizing the initializer store
6735      and combiner sequence, e.g. if it is originally some C++ish user
6736      defined reduction, but allow the vectorizer to pattern recognize it
6737      and turn into the appropriate vectorized scan.
6738 
6739      For exclusive scan, this is slightly different:
6740      #pragma omp simd reduction(inscan,+:r)
6741      for (...)
6742        {
6743 	 use (r);
6744 	 #pragma omp scan exclusive (r)
6745 	 r += something ();
6746        }
6747      shall have body with:
6748        // Initialization for input phase, store the reduction initializer:
6749        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6750        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6751        D.2042[_21] = 0;
6752        // Actual input phase:
6753        ...
6754        r.0_5 = D.2042[_20];
6755        _6 = _4 + r.0_5;
6756        D.2042[_20] = _6;
6757        // Initialization for scan phase:
6758        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6759        _26 = D.2043[_25];
6760        D.2044[_25] = _26;
6761        _27 = D.2042[_25];
6762        _28 = _26 + _27;
6763        D.2043[_25] = _28;
6764        // Actual scan phase:
6765        ...
6766        r.1_8 = D.2044[_20];
6767        ...  */
6768 
6769   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6770     {
6771       /* Match the D.2042[_21] = 0; store above.  Just require that
6772 	 it is a constant or external definition store.  */
6773       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6774 	{
6775 	 fail_init:
6776 	  if (dump_enabled_p ())
6777 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6778 			     "unsupported OpenMP scan initializer store.\n");
6779 	  return false;
6780 	}
6781 
6782       if (! loop_vinfo->scan_map)
6783 	loop_vinfo->scan_map = new hash_map<tree, tree>;
6784       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6785       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6786       if (cached)
6787 	goto fail_init;
6788       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6789 
6790       /* These stores can be vectorized normally.  */
6791       return true;
6792     }
6793 
6794   if (rhs_dt != vect_internal_def)
6795     {
6796      fail:
6797       if (dump_enabled_p ())
6798 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6799 			 "unsupported OpenMP scan combiner pattern.\n");
6800       return false;
6801     }
6802 
6803   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6804   tree rhs = gimple_assign_rhs1 (stmt);
6805   if (TREE_CODE (rhs) != SSA_NAME)
6806     goto fail;
6807 
6808   gimple *other_store_stmt = NULL;
6809   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6810   bool inscan_var_store
6811     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6812 
6813   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6814     {
6815       if (!inscan_var_store)
6816 	{
6817 	  use_operand_p use_p;
6818 	  imm_use_iterator iter;
6819 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6820 	    {
6821 	      gimple *use_stmt = USE_STMT (use_p);
6822 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
6823 		continue;
6824 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
6825 		  || !is_gimple_assign (use_stmt)
6826 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6827 		  || other_store_stmt
6828 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6829 		goto fail;
6830 	      other_store_stmt = use_stmt;
6831 	    }
6832 	  if (other_store_stmt == NULL)
6833 	    goto fail;
6834 	  rhs = gimple_assign_lhs (other_store_stmt);
6835 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6836 	    goto fail;
6837 	}
6838     }
6839   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6840     {
6841       use_operand_p use_p;
6842       imm_use_iterator iter;
6843       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6844 	{
6845 	  gimple *use_stmt = USE_STMT (use_p);
6846 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6847 	    continue;
6848 	  if (other_store_stmt)
6849 	    goto fail;
6850 	  other_store_stmt = use_stmt;
6851 	}
6852     }
6853   else
6854     goto fail;
6855 
6856   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6857   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6858       || !is_gimple_assign (def_stmt)
6859       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6860     goto fail;
6861 
6862   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6863   /* For pointer addition, we should use the normal plus for the vector
6864      operation.  */
6865   switch (code)
6866     {
6867     case POINTER_PLUS_EXPR:
6868       code = PLUS_EXPR;
6869       break;
6870     case MULT_HIGHPART_EXPR:
6871       goto fail;
6872     default:
6873       break;
6874     }
6875   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6876     goto fail;
6877 
6878   tree rhs1 = gimple_assign_rhs1 (def_stmt);
6879   tree rhs2 = gimple_assign_rhs2 (def_stmt);
6880   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6881     goto fail;
6882 
6883   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6884   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6885   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6886       || !gimple_assign_load_p (load1_stmt)
6887       || gimple_bb (load2_stmt) != gimple_bb (stmt)
6888       || !gimple_assign_load_p (load2_stmt))
6889     goto fail;
6890 
6891   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6892   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6893   if (load1_stmt_info == NULL
6894       || load2_stmt_info == NULL
6895       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6896 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6897       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6898 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6899     goto fail;
6900 
6901   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6902     {
6903       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6904       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6905 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6906 	goto fail;
6907       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6908       tree lrhs;
6909       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6910 	lrhs = rhs1;
6911       else
6912 	lrhs = rhs2;
6913       use_operand_p use_p;
6914       imm_use_iterator iter;
6915       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6916 	{
6917 	  gimple *use_stmt = USE_STMT (use_p);
6918 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6919 	    continue;
6920 	  if (other_store_stmt)
6921 	    goto fail;
6922 	  other_store_stmt = use_stmt;
6923 	}
6924     }
6925 
6926   if (other_store_stmt == NULL)
6927     goto fail;
6928   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6929       || !gimple_store_p (other_store_stmt))
6930     goto fail;
6931 
6932   stmt_vec_info other_store_stmt_info
6933     = loop_vinfo->lookup_stmt (other_store_stmt);
6934   if (other_store_stmt_info == NULL
6935       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6936 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6937     goto fail;
6938 
6939   gimple *stmt1 = stmt;
6940   gimple *stmt2 = other_store_stmt;
6941   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6942     std::swap (stmt1, stmt2);
6943   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6944 			    gimple_assign_rhs1 (load2_stmt)))
6945     {
6946       std::swap (rhs1, rhs2);
6947       std::swap (load1_stmt, load2_stmt);
6948       std::swap (load1_stmt_info, load2_stmt_info);
6949     }
6950   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6951 			     gimple_assign_rhs1 (load1_stmt)))
6952     goto fail;
6953 
6954   tree var3 = NULL_TREE;
6955   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6956       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6957 				gimple_assign_rhs1 (load2_stmt)))
6958     goto fail;
6959   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6960     {
6961       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6962       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6963 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6964 	goto fail;
6965       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6966       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6967 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6968 	  || lookup_attribute ("omp simd inscan exclusive",
6969 			       DECL_ATTRIBUTES (var3)))
6970 	goto fail;
6971     }
6972 
6973   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6974   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6975       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6976     goto fail;
6977 
6978   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6979   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6980   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6981       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6982       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6983 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6984     goto fail;
6985 
6986   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6987     std::swap (var1, var2);
6988 
6989   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6990     {
6991       if (!lookup_attribute ("omp simd inscan exclusive",
6992 			     DECL_ATTRIBUTES (var1)))
6993 	goto fail;
6994       var1 = var3;
6995     }
6996 
6997   if (loop_vinfo->scan_map == NULL)
6998     goto fail;
6999   tree *init = loop_vinfo->scan_map->get (var1);
7000   if (init == NULL)
7001     goto fail;
7002 
7003   /* The IL is as expected, now check if we can actually vectorize it.
7004      Inclusive scan:
7005        _26 = D.2043[_25];
7006        _27 = D.2042[_25];
7007        _28 = _26 + _27;
7008        D.2043[_25] = _28;
7009        D.2042[_25] = _28;
7010      should be vectorized as (where _40 is the vectorized rhs
7011      from the D.2042[_21] = 0; store):
7012        _30 = MEM <vector(8) int> [(int *)&D.2043];
7013        _31 = MEM <vector(8) int> [(int *)&D.2042];
7014        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7015        _33 = _31 + _32;
7016        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7017        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7018        _35 = _33 + _34;
7019        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7020        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
7021        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7022        _37 = _35 + _36;
7023        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7024        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
7025        _38 = _30 + _37;
7026        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7027        MEM <vector(8) int> [(int *)&D.2043] = _39;
7028        MEM <vector(8) int> [(int *)&D.2042] = _38;
7029      Exclusive scan:
7030        _26 = D.2043[_25];
7031        D.2044[_25] = _26;
7032        _27 = D.2042[_25];
7033        _28 = _26 + _27;
7034        D.2043[_25] = _28;
7035      should be vectorized as (where _40 is the vectorized rhs
7036      from the D.2042[_21] = 0; store):
7037        _30 = MEM <vector(8) int> [(int *)&D.2043];
7038        _31 = MEM <vector(8) int> [(int *)&D.2042];
7039        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7040        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7041        _34 = _32 + _33;
7042        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7043        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
7044        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7045        _36 = _34 + _35;
7046        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7047        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
7048        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7049        _38 = _36 + _37;
7050        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7051        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
7052        _39 = _30 + _38;
7053        _50 = _31 + _39;
7054        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7055        MEM <vector(8) int> [(int *)&D.2044] = _39;
7056        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
7057   enum machine_mode vec_mode = TYPE_MODE (vectype);
7058   optab optab = optab_for_tree_code (code, vectype, optab_default);
7059   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7060     goto fail;
7061 
7062   int units_log2 = scan_store_can_perm_p (vectype, *init);
7063   if (units_log2 == -1)
7064     goto fail;
7065 
7066   return true;
7067 }
7068 
7069 
7070 /* Function vectorizable_scan_store.
7071 
7072    Helper of vectorizable_score, arguments like on vectorizable_store.
7073    Handle only the transformation, checking is done in check_scan_store.  */
7074 
7075 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)7076 vectorizable_scan_store (vec_info *vinfo,
7077 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7078 			 gimple **vec_stmt, int ncopies)
7079 {
7080   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7081   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7082   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7083   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7084 
7085   if (dump_enabled_p ())
7086     dump_printf_loc (MSG_NOTE, vect_location,
7087 		     "transform scan store. ncopies = %d\n", ncopies);
7088 
7089   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7090   tree rhs = gimple_assign_rhs1 (stmt);
7091   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7092 
7093   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7094   bool inscan_var_store
7095     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7096 
7097   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7098     {
7099       use_operand_p use_p;
7100       imm_use_iterator iter;
7101       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7102 	{
7103 	  gimple *use_stmt = USE_STMT (use_p);
7104 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
7105 	    continue;
7106 	  rhs = gimple_assign_lhs (use_stmt);
7107 	  break;
7108 	}
7109     }
7110 
7111   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7112   enum tree_code code = gimple_assign_rhs_code (def_stmt);
7113   if (code == POINTER_PLUS_EXPR)
7114     code = PLUS_EXPR;
7115   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7116 	      && commutative_tree_code (code));
7117   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7118   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7119   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7120   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7121   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7122   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7123   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7124   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7125   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7126   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7127   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7128 
7129   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7130     {
7131       std::swap (rhs1, rhs2);
7132       std::swap (var1, var2);
7133       std::swap (load1_dr_info, load2_dr_info);
7134     }
7135 
7136   tree *init = loop_vinfo->scan_map->get (var1);
7137   gcc_assert (init);
7138 
7139   unsigned HOST_WIDE_INT nunits;
7140   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7141     gcc_unreachable ();
7142   auto_vec<enum scan_store_kind, 16> use_whole_vector;
7143   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7144   gcc_assert (units_log2 > 0);
7145   auto_vec<tree, 16> perms;
7146   perms.quick_grow (units_log2 + 1);
7147   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7148   for (int i = 0; i <= units_log2; ++i)
7149     {
7150       unsigned HOST_WIDE_INT j, k;
7151       vec_perm_builder sel (nunits, nunits, 1);
7152       sel.quick_grow (nunits);
7153       if (i == units_log2)
7154 	for (j = 0; j < nunits; ++j)
7155 	  sel[j] = nunits - 1;
7156       else
7157 	{
7158 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7159 	    sel[j] = j;
7160 	  for (k = 0; j < nunits; ++j, ++k)
7161 	    sel[j] = nunits + k;
7162 	}
7163       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7164       if (!use_whole_vector.is_empty ()
7165 	  && use_whole_vector[i] != scan_store_kind_perm)
7166 	{
7167 	  if (zero_vec == NULL_TREE)
7168 	    zero_vec = build_zero_cst (vectype);
7169 	  if (masktype == NULL_TREE
7170 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
7171 	    masktype = truth_type_for (vectype);
7172 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
7173 	}
7174       else
7175 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7176     }
7177 
7178   tree vec_oprnd1 = NULL_TREE;
7179   tree vec_oprnd2 = NULL_TREE;
7180   tree vec_oprnd3 = NULL_TREE;
7181   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7182   tree dataref_offset = build_int_cst (ref_type, 0);
7183   tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7184 					   vectype, VMAT_CONTIGUOUS);
7185   tree ldataref_ptr = NULL_TREE;
7186   tree orig = NULL_TREE;
7187   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7188     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7189   auto_vec<tree> vec_oprnds1;
7190   auto_vec<tree> vec_oprnds2;
7191   auto_vec<tree> vec_oprnds3;
7192   vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7193 		     *init, &vec_oprnds1,
7194 		     ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7195 		     rhs2, &vec_oprnds3);
7196   for (int j = 0; j < ncopies; j++)
7197     {
7198       vec_oprnd1 = vec_oprnds1[j];
7199       if (ldataref_ptr == NULL)
7200 	vec_oprnd2 = vec_oprnds2[j];
7201       vec_oprnd3 = vec_oprnds3[j];
7202       if (j == 0)
7203 	orig = vec_oprnd3;
7204       else if (!inscan_var_store)
7205 	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7206 
7207       if (ldataref_ptr)
7208 	{
7209 	  vec_oprnd2 = make_ssa_name (vectype);
7210 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7211 				       unshare_expr (ldataref_ptr),
7212 				       dataref_offset);
7213 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7214 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7215 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7216 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7217 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7218 	}
7219 
7220       tree v = vec_oprnd2;
7221       for (int i = 0; i < units_log2; ++i)
7222 	{
7223 	  tree new_temp = make_ssa_name (vectype);
7224 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7225 					   (zero_vec
7226 					    && (use_whole_vector[i]
7227 						!= scan_store_kind_perm))
7228 					   ? zero_vec : vec_oprnd1, v,
7229 					   perms[i]);
7230 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7231 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7232 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7233 
7234 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7235 	    {
7236 	      /* Whole vector shift shifted in zero bits, but if *init
7237 		 is not initializer_zerop, we need to replace those elements
7238 		 with elements from vec_oprnd1.  */
7239 	      tree_vector_builder vb (masktype, nunits, 1);
7240 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7241 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7242 			       ? boolean_false_node : boolean_true_node);
7243 
7244 	      tree new_temp2 = make_ssa_name (vectype);
7245 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7246 				       new_temp, vec_oprnd1);
7247 	      vect_finish_stmt_generation (vinfo, stmt_info,
7248 							   g, gsi);
7249 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7250 	      new_temp = new_temp2;
7251 	    }
7252 
7253 	  /* For exclusive scan, perform the perms[i] permutation once
7254 	     more.  */
7255 	  if (i == 0
7256 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7257 	      && v == vec_oprnd2)
7258 	    {
7259 	      v = new_temp;
7260 	      --i;
7261 	      continue;
7262 	    }
7263 
7264 	  tree new_temp2 = make_ssa_name (vectype);
7265 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
7266 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7267 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7268 
7269 	  v = new_temp2;
7270 	}
7271 
7272       tree new_temp = make_ssa_name (vectype);
7273       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7274       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7275       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7276 
7277       tree last_perm_arg = new_temp;
7278       /* For exclusive scan, new_temp computed above is the exclusive scan
7279 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
7280 	 of the last element into orig.  */
7281       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7282 	{
7283 	  last_perm_arg = make_ssa_name (vectype);
7284 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7285 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7286 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7287 	}
7288 
7289       orig = make_ssa_name (vectype);
7290       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7291 			       last_perm_arg, perms[units_log2]);
7292       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7293       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7294 
7295       if (!inscan_var_store)
7296 	{
7297 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7298 				       unshare_expr (dataref_ptr),
7299 				       dataref_offset);
7300 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7301 	  g = gimple_build_assign (data_ref, new_temp);
7302 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7303 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7304 	}
7305     }
7306 
7307   if (inscan_var_store)
7308     for (int j = 0; j < ncopies; j++)
7309       {
7310 	if (j != 0)
7311 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7312 
7313 	tree data_ref = fold_build2 (MEM_REF, vectype,
7314 				     unshare_expr (dataref_ptr),
7315 				     dataref_offset);
7316 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7317 	gimple *g = gimple_build_assign (data_ref, orig);
7318 	vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7319 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7320       }
7321   return true;
7322 }
7323 
7324 
7325 /* Function vectorizable_store.
7326 
7327    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7328    that can be vectorized.
7329    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7330    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7331    Return true if STMT_INFO is vectorizable in this way.  */
7332 
7333 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7334 vectorizable_store (vec_info *vinfo,
7335 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7336 		    gimple **vec_stmt, slp_tree slp_node,
7337 		    stmt_vector_for_cost *cost_vec)
7338 {
7339   tree data_ref;
7340   tree op;
7341   tree vec_oprnd = NULL_TREE;
7342   tree elem_type;
7343   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7344   class loop *loop = NULL;
7345   machine_mode vec_mode;
7346   tree dummy;
7347   enum vect_def_type rhs_dt = vect_unknown_def_type;
7348   enum vect_def_type mask_dt = vect_unknown_def_type;
7349   tree dataref_ptr = NULL_TREE;
7350   tree dataref_offset = NULL_TREE;
7351   gimple *ptr_incr = NULL;
7352   int ncopies;
7353   int j;
7354   stmt_vec_info first_stmt_info;
7355   bool grouped_store;
7356   unsigned int group_size, i;
7357   vec<tree> oprnds = vNULL;
7358   vec<tree> result_chain = vNULL;
7359   vec<tree> vec_oprnds = vNULL;
7360   bool slp = (slp_node != NULL);
7361   unsigned int vec_num;
7362   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7363   tree aggr_type;
7364   gather_scatter_info gs_info;
7365   poly_uint64 vf;
7366   vec_load_store_type vls_type;
7367   tree ref_type;
7368 
7369   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7370     return false;
7371 
7372   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7373       && ! vec_stmt)
7374     return false;
7375 
7376   /* Is vectorizable store? */
7377 
7378   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7379   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7380     {
7381       tree scalar_dest = gimple_assign_lhs (assign);
7382       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7383 	  && is_pattern_stmt_p (stmt_info))
7384 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
7385       if (TREE_CODE (scalar_dest) != ARRAY_REF
7386 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7387 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
7388 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
7389 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7390 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
7391 	  && TREE_CODE (scalar_dest) != MEM_REF)
7392 	return false;
7393     }
7394   else
7395     {
7396       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7397       if (!call || !gimple_call_internal_p (call))
7398 	return false;
7399 
7400       internal_fn ifn = gimple_call_internal_fn (call);
7401       if (!internal_store_fn_p (ifn))
7402 	return false;
7403 
7404       if (slp_node != NULL)
7405 	{
7406 	  if (dump_enabled_p ())
7407 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7408 			     "SLP of masked stores not supported.\n");
7409 	  return false;
7410 	}
7411 
7412       int mask_index = internal_fn_mask_index (ifn);
7413       if (mask_index >= 0
7414 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7415 				      &mask, NULL, &mask_dt, &mask_vectype))
7416 	return false;
7417     }
7418 
7419   op = vect_get_store_rhs (stmt_info);
7420 
7421   /* Cannot have hybrid store SLP -- that would mean storing to the
7422      same location twice.  */
7423   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7424 
7425   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7426   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7427 
7428   if (loop_vinfo)
7429     {
7430       loop = LOOP_VINFO_LOOP (loop_vinfo);
7431       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7432     }
7433   else
7434     vf = 1;
7435 
7436   /* Multiple types in SLP are handled by creating the appropriate number of
7437      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7438      case of SLP.  */
7439   if (slp)
7440     ncopies = 1;
7441   else
7442     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7443 
7444   gcc_assert (ncopies >= 1);
7445 
7446   /* FORNOW.  This restriction should be relaxed.  */
7447   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7448     {
7449       if (dump_enabled_p ())
7450 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7451 			 "multiple types in nested loop.\n");
7452       return false;
7453     }
7454 
7455   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7456 			     op, &rhs_dt, &rhs_vectype, &vls_type))
7457     return false;
7458 
7459   elem_type = TREE_TYPE (vectype);
7460   vec_mode = TYPE_MODE (vectype);
7461 
7462   if (!STMT_VINFO_DATA_REF (stmt_info))
7463     return false;
7464 
7465   vect_memory_access_type memory_access_type;
7466   enum dr_alignment_support alignment_support_scheme;
7467   int misalignment;
7468   poly_int64 poffset;
7469   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7470 			    ncopies, &memory_access_type, &poffset,
7471 			    &alignment_support_scheme, &misalignment, &gs_info))
7472     return false;
7473 
7474   if (mask)
7475     {
7476       if (memory_access_type == VMAT_CONTIGUOUS)
7477 	{
7478 	  if (!VECTOR_MODE_P (vec_mode)
7479 	      || !can_vec_mask_load_store_p (vec_mode,
7480 					     TYPE_MODE (mask_vectype), false))
7481 	    return false;
7482 	}
7483       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7484 	       && (memory_access_type != VMAT_GATHER_SCATTER
7485 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7486 	{
7487 	  if (dump_enabled_p ())
7488 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7489 			     "unsupported access type for masked store.\n");
7490 	  return false;
7491 	}
7492     }
7493   else
7494     {
7495       /* FORNOW. In some cases can vectorize even if data-type not supported
7496 	 (e.g. - array initialization with 0).  */
7497       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7498 	return false;
7499     }
7500 
7501   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7502   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7503 		   && memory_access_type != VMAT_GATHER_SCATTER
7504 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
7505   if (grouped_store)
7506     {
7507       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7508       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7509       group_size = DR_GROUP_SIZE (first_stmt_info);
7510     }
7511   else
7512     {
7513       first_stmt_info = stmt_info;
7514       first_dr_info = dr_info;
7515       group_size = vec_num = 1;
7516     }
7517 
7518   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7519     {
7520       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7521 			     memory_access_type))
7522 	return false;
7523     }
7524 
7525   if (!vec_stmt) /* transformation not required.  */
7526     {
7527       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7528 
7529       if (loop_vinfo
7530 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7531 	check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7532 					      group_size, memory_access_type,
7533 					      ncopies, &gs_info, mask);
7534 
7535       if (slp_node
7536 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7537 						vectype))
7538 	{
7539 	  if (dump_enabled_p ())
7540 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 			     "incompatible vector types for invariants\n");
7542 	  return false;
7543 	}
7544 
7545       if (dump_enabled_p ()
7546 	  && memory_access_type != VMAT_ELEMENTWISE
7547 	  && memory_access_type != VMAT_GATHER_SCATTER
7548 	  && alignment_support_scheme != dr_aligned)
7549 	dump_printf_loc (MSG_NOTE, vect_location,
7550 			 "Vectorizing an unaligned access.\n");
7551 
7552       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7553       vect_model_store_cost (vinfo, stmt_info, ncopies,
7554 			     memory_access_type, alignment_support_scheme,
7555 			     misalignment, vls_type, slp_node, cost_vec);
7556       return true;
7557     }
7558   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7559 
7560   /* Transform.  */
7561 
7562   ensure_base_align (dr_info);
7563 
7564   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7565     {
7566       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7567       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7568       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7569       tree ptr, var, scale, vec_mask;
7570       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7571       tree mask_halfvectype = mask_vectype;
7572       edge pe = loop_preheader_edge (loop);
7573       gimple_seq seq;
7574       basic_block new_bb;
7575       enum { NARROW, NONE, WIDEN } modifier;
7576       poly_uint64 scatter_off_nunits
7577 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7578 
7579       if (known_eq (nunits, scatter_off_nunits))
7580 	modifier = NONE;
7581       else if (known_eq (nunits * 2, scatter_off_nunits))
7582 	{
7583 	  modifier = WIDEN;
7584 
7585 	  /* Currently gathers and scatters are only supported for
7586 	     fixed-length vectors.  */
7587 	  unsigned int count = scatter_off_nunits.to_constant ();
7588 	  vec_perm_builder sel (count, count, 1);
7589 	  for (i = 0; i < (unsigned int) count; ++i)
7590 	    sel.quick_push (i | (count / 2));
7591 
7592 	  vec_perm_indices indices (sel, 1, count);
7593 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7594 						  indices);
7595 	  gcc_assert (perm_mask != NULL_TREE);
7596 	}
7597       else if (known_eq (nunits, scatter_off_nunits * 2))
7598 	{
7599 	  modifier = NARROW;
7600 
7601 	  /* Currently gathers and scatters are only supported for
7602 	     fixed-length vectors.  */
7603 	  unsigned int count = nunits.to_constant ();
7604 	  vec_perm_builder sel (count, count, 1);
7605 	  for (i = 0; i < (unsigned int) count; ++i)
7606 	    sel.quick_push (i | (count / 2));
7607 
7608 	  vec_perm_indices indices (sel, 2, count);
7609 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7610 	  gcc_assert (perm_mask != NULL_TREE);
7611 	  ncopies *= 2;
7612 
7613 	  if (mask)
7614 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7615 	}
7616       else
7617 	gcc_unreachable ();
7618 
7619       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7620       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7621       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7622       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7623       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7624       scaletype = TREE_VALUE (arglist);
7625 
7626       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7627 			   && TREE_CODE (rettype) == VOID_TYPE);
7628 
7629       ptr = fold_convert (ptrtype, gs_info.base);
7630       if (!is_gimple_min_invariant (ptr))
7631 	{
7632 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7633 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7634 	  gcc_assert (!new_bb);
7635 	}
7636 
7637       if (mask == NULL_TREE)
7638 	{
7639 	  mask_arg = build_int_cst (masktype, -1);
7640 	  mask_arg = vect_init_vector (vinfo, stmt_info,
7641 				       mask_arg, masktype, NULL);
7642 	}
7643 
7644       scale = build_int_cst (scaletype, gs_info.scale);
7645 
7646       auto_vec<tree> vec_oprnds0;
7647       auto_vec<tree> vec_oprnds1;
7648       auto_vec<tree> vec_masks;
7649       if (mask)
7650 	{
7651 	  tree mask_vectype = truth_type_for (vectype);
7652 	  vect_get_vec_defs_for_operand (vinfo, stmt_info,
7653 					 modifier == NARROW
7654 					 ? ncopies / 2 : ncopies,
7655 					 mask, &vec_masks, mask_vectype);
7656 	}
7657       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7658 				     modifier == WIDEN
7659 				     ? ncopies / 2 : ncopies,
7660 				     gs_info.offset, &vec_oprnds0);
7661       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7662 				     modifier == NARROW
7663 				     ? ncopies / 2 : ncopies,
7664 				     op, &vec_oprnds1);
7665       for (j = 0; j < ncopies; ++j)
7666 	{
7667 	  if (modifier == WIDEN)
7668 	    {
7669 	      if (j & 1)
7670 		op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7671 					   perm_mask, stmt_info, gsi);
7672 	      else
7673 		op = vec_oprnd0 = vec_oprnds0[j / 2];
7674 	      src = vec_oprnd1 = vec_oprnds1[j];
7675 	      if (mask)
7676 		mask_op = vec_mask = vec_masks[j];
7677 	    }
7678 	  else if (modifier == NARROW)
7679 	    {
7680 	      if (j & 1)
7681 		src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7682 					    perm_mask, stmt_info, gsi);
7683 	      else
7684 		src = vec_oprnd1 = vec_oprnds1[j / 2];
7685 	      op = vec_oprnd0 = vec_oprnds0[j];
7686 	      if (mask)
7687 		mask_op = vec_mask = vec_masks[j / 2];
7688 	    }
7689 	  else
7690 	    {
7691 	      op = vec_oprnd0 = vec_oprnds0[j];
7692 	      src = vec_oprnd1 = vec_oprnds1[j];
7693 	      if (mask)
7694 		mask_op = vec_mask = vec_masks[j];
7695 	    }
7696 
7697 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7698 	    {
7699 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7700 				    TYPE_VECTOR_SUBPARTS (srctype)));
7701 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
7702 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7703 	      gassign *new_stmt
7704 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7705 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7706 	      src = var;
7707 	    }
7708 
7709 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7710 	    {
7711 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7712 				    TYPE_VECTOR_SUBPARTS (idxtype)));
7713 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7714 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7715 	      gassign *new_stmt
7716 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7717 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7718 	      op = var;
7719 	    }
7720 
7721 	  if (mask)
7722 	    {
7723 	      tree utype;
7724 	      mask_arg = mask_op;
7725 	      if (modifier == NARROW)
7726 		{
7727 		  var = vect_get_new_ssa_name (mask_halfvectype,
7728 					       vect_simple_var);
7729 		  gassign *new_stmt
7730 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7731 							: VEC_UNPACK_LO_EXPR,
7732 					   mask_op);
7733 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7734 		  mask_arg = var;
7735 		}
7736 	      tree optype = TREE_TYPE (mask_arg);
7737 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7738 		utype = masktype;
7739 	      else
7740 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7741 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
7742 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7743 	      gassign *new_stmt
7744 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7745 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7746 	      mask_arg = var;
7747 	      if (!useless_type_conversion_p (masktype, utype))
7748 		{
7749 		  gcc_assert (TYPE_PRECISION (utype)
7750 			      <= TYPE_PRECISION (masktype));
7751 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7752 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7753 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7754 		  mask_arg = var;
7755 		}
7756 	    }
7757 
7758 	  gcall *new_stmt
7759 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7760 	   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7761 
7762 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7763 	}
7764       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7765       return true;
7766     }
7767   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7768     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7769 
7770   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7771     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7772 
7773   if (grouped_store)
7774     {
7775       /* FORNOW */
7776       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7777 
7778       /* We vectorize all the stmts of the interleaving group when we
7779 	 reach the last stmt in the group.  */
7780       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7781 	  < DR_GROUP_SIZE (first_stmt_info)
7782 	  && !slp)
7783 	{
7784 	  *vec_stmt = NULL;
7785 	  return true;
7786 	}
7787 
7788       if (slp)
7789         {
7790           grouped_store = false;
7791           /* VEC_NUM is the number of vect stmts to be created for this
7792              group.  */
7793           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7794 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7795 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7796 		      == first_stmt_info);
7797 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7798 	  op = vect_get_store_rhs (first_stmt_info);
7799         }
7800       else
7801         /* VEC_NUM is the number of vect stmts to be created for this
7802            group.  */
7803 	vec_num = group_size;
7804 
7805       ref_type = get_group_alias_ptr_type (first_stmt_info);
7806     }
7807   else
7808     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7809 
7810   if (dump_enabled_p ())
7811     dump_printf_loc (MSG_NOTE, vect_location,
7812                      "transform store. ncopies = %d\n", ncopies);
7813 
7814   if (memory_access_type == VMAT_ELEMENTWISE
7815       || memory_access_type == VMAT_STRIDED_SLP)
7816     {
7817       gimple_stmt_iterator incr_gsi;
7818       bool insert_after;
7819       gimple *incr;
7820       tree offvar;
7821       tree ivstep;
7822       tree running_off;
7823       tree stride_base, stride_step, alias_off;
7824       tree vec_oprnd;
7825       tree dr_offset;
7826       unsigned int g;
7827       /* Checked by get_load_store_type.  */
7828       unsigned int const_nunits = nunits.to_constant ();
7829 
7830       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7831       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7832 
7833       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7834       stride_base
7835 	= fold_build_pointer_plus
7836 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7837 	     size_binop (PLUS_EXPR,
7838 			 convert_to_ptrofftype (dr_offset),
7839 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7840       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7841 
7842       /* For a store with loop-invariant (but other than power-of-2)
7843          stride (i.e. not a grouped access) like so:
7844 
7845 	   for (i = 0; i < n; i += stride)
7846 	     array[i] = ...;
7847 
7848 	 we generate a new induction variable and new stores from
7849 	 the components of the (vectorized) rhs:
7850 
7851 	   for (j = 0; ; j += VF*stride)
7852 	     vectemp = ...;
7853 	     tmp1 = vectemp[0];
7854 	     array[j] = tmp1;
7855 	     tmp2 = vectemp[1];
7856 	     array[j + stride] = tmp2;
7857 	     ...
7858          */
7859 
7860       unsigned nstores = const_nunits;
7861       unsigned lnel = 1;
7862       tree ltype = elem_type;
7863       tree lvectype = vectype;
7864       if (slp)
7865 	{
7866 	  if (group_size < const_nunits
7867 	      && const_nunits % group_size == 0)
7868 	    {
7869 	      nstores = const_nunits / group_size;
7870 	      lnel = group_size;
7871 	      ltype = build_vector_type (elem_type, group_size);
7872 	      lvectype = vectype;
7873 
7874 	      /* First check if vec_extract optab doesn't support extraction
7875 		 of vector elts directly.  */
7876 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7877 	      machine_mode vmode;
7878 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7879 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
7880 					   group_size).exists (&vmode)
7881 		  || (convert_optab_handler (vec_extract_optab,
7882 					     TYPE_MODE (vectype), vmode)
7883 		      == CODE_FOR_nothing))
7884 		{
7885 		  /* Try to avoid emitting an extract of vector elements
7886 		     by performing the extracts using an integer type of the
7887 		     same size, extracting from a vector of those and then
7888 		     re-interpreting it as the original vector type if
7889 		     supported.  */
7890 		  unsigned lsize
7891 		    = group_size * GET_MODE_BITSIZE (elmode);
7892 		  unsigned int lnunits = const_nunits / group_size;
7893 		  /* If we can't construct such a vector fall back to
7894 		     element extracts from the original vector type and
7895 		     element size stores.  */
7896 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7897 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
7898 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
7899 					      lnunits).exists (&vmode)
7900 		      && (convert_optab_handler (vec_extract_optab,
7901 						 vmode, elmode)
7902 			  != CODE_FOR_nothing))
7903 		    {
7904 		      nstores = lnunits;
7905 		      lnel = group_size;
7906 		      ltype = build_nonstandard_integer_type (lsize, 1);
7907 		      lvectype = build_vector_type (ltype, nstores);
7908 		    }
7909 		  /* Else fall back to vector extraction anyway.
7910 		     Fewer stores are more important than avoiding spilling
7911 		     of the vector we extract from.  Compared to the
7912 		     construction case in vectorizable_load no store-forwarding
7913 		     issue exists here for reasonable archs.  */
7914 		}
7915 	    }
7916 	  else if (group_size >= const_nunits
7917 		   && group_size % const_nunits == 0)
7918 	    {
7919 	      nstores = 1;
7920 	      lnel = const_nunits;
7921 	      ltype = vectype;
7922 	      lvectype = vectype;
7923 	    }
7924 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7925 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7926 	}
7927 
7928       ivstep = stride_step;
7929       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7930 			    build_int_cst (TREE_TYPE (ivstep), vf));
7931 
7932       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7933 
7934       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7935       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7936       create_iv (stride_base, ivstep, NULL,
7937 		 loop, &incr_gsi, insert_after,
7938 		 &offvar, NULL);
7939       incr = gsi_stmt (incr_gsi);
7940 
7941       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7942 
7943       alias_off = build_int_cst (ref_type, 0);
7944       stmt_vec_info next_stmt_info = first_stmt_info;
7945       for (g = 0; g < group_size; g++)
7946 	{
7947 	  running_off = offvar;
7948 	  if (g)
7949 	    {
7950 	      tree size = TYPE_SIZE_UNIT (ltype);
7951 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7952 				      size);
7953 	      tree newoff = copy_ssa_name (running_off, NULL);
7954 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7955 					  running_off, pos);
7956 	      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7957 	      running_off = newoff;
7958 	    }
7959 	  if (!slp)
7960 	    op = vect_get_store_rhs (next_stmt_info);
7961 	  vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7962 			     op, &vec_oprnds);
7963 	  unsigned int group_el = 0;
7964 	  unsigned HOST_WIDE_INT
7965 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7966 	  for (j = 0; j < ncopies; j++)
7967 	    {
7968 	      vec_oprnd = vec_oprnds[j];
7969 	      /* Pun the vector to extract from if necessary.  */
7970 	      if (lvectype != vectype)
7971 		{
7972 		  tree tem = make_ssa_name (lvectype);
7973 		  gimple *pun
7974 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7975 							lvectype, vec_oprnd));
7976 		  vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7977 		  vec_oprnd = tem;
7978 		}
7979 	      for (i = 0; i < nstores; i++)
7980 		{
7981 		  tree newref, newoff;
7982 		  gimple *incr, *assign;
7983 		  tree size = TYPE_SIZE (ltype);
7984 		  /* Extract the i'th component.  */
7985 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7986 					  bitsize_int (i), size);
7987 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7988 					   size, pos);
7989 
7990 		  elem = force_gimple_operand_gsi (gsi, elem, true,
7991 						   NULL_TREE, true,
7992 						   GSI_SAME_STMT);
7993 
7994 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
7995 						 group_el * elsz);
7996 		  newref = build2 (MEM_REF, ltype,
7997 				   running_off, this_off);
7998 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7999 
8000 		  /* And store it to *running_off.  */
8001 		  assign = gimple_build_assign (newref, elem);
8002 		  vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8003 
8004 		  group_el += lnel;
8005 		  if (! slp
8006 		      || group_el == group_size)
8007 		    {
8008 		      newoff = copy_ssa_name (running_off, NULL);
8009 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8010 						  running_off, stride_step);
8011 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8012 
8013 		      running_off = newoff;
8014 		      group_el = 0;
8015 		    }
8016 		  if (g == group_size - 1
8017 		      && !slp)
8018 		    {
8019 		      if (j == 0 && i == 0)
8020 			*vec_stmt = assign;
8021 		      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8022 		    }
8023 		}
8024 	    }
8025 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8026 	  vec_oprnds.release ();
8027 	  if (slp)
8028 	    break;
8029 	}
8030 
8031       return true;
8032     }
8033 
8034   auto_vec<tree> dr_chain (group_size);
8035   oprnds.create (group_size);
8036 
8037   gcc_assert (alignment_support_scheme);
8038   vec_loop_masks *loop_masks
8039     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8040        ? &LOOP_VINFO_MASKS (loop_vinfo)
8041        : NULL);
8042   vec_loop_lens *loop_lens
8043     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8044        ? &LOOP_VINFO_LENS (loop_vinfo)
8045        : NULL);
8046 
8047   /* Shouldn't go with length-based approach if fully masked.  */
8048   gcc_assert (!loop_lens || !loop_masks);
8049 
8050   /* Targets with store-lane instructions must not require explicit
8051      realignment.  vect_supportable_dr_alignment always returns either
8052      dr_aligned or dr_unaligned_supported for masked operations.  */
8053   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8054 	       && !mask
8055 	       && !loop_masks)
8056 	      || alignment_support_scheme == dr_aligned
8057 	      || alignment_support_scheme == dr_unaligned_supported);
8058 
8059   tree offset = NULL_TREE;
8060   if (!known_eq (poffset, 0))
8061     offset = size_int (poffset);
8062 
8063   tree bump;
8064   tree vec_offset = NULL_TREE;
8065   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8066     {
8067       aggr_type = NULL_TREE;
8068       bump = NULL_TREE;
8069     }
8070   else if (memory_access_type == VMAT_GATHER_SCATTER)
8071     {
8072       aggr_type = elem_type;
8073       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8074 				       &bump, &vec_offset);
8075     }
8076   else
8077     {
8078       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8079 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8080       else
8081 	aggr_type = vectype;
8082       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8083 					  memory_access_type);
8084     }
8085 
8086   if (mask)
8087     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8088 
8089   /* In case the vectorization factor (VF) is bigger than the number
8090      of elements that we can fit in a vectype (nunits), we have to generate
8091      more than one vector stmt - i.e - we need to "unroll" the
8092      vector stmt by a factor VF/nunits.  */
8093 
8094   /* In case of interleaving (non-unit grouped access):
8095 
8096         S1:  &base + 2 = x2
8097         S2:  &base = x0
8098         S3:  &base + 1 = x1
8099         S4:  &base + 3 = x3
8100 
8101      We create vectorized stores starting from base address (the access of the
8102      first stmt in the chain (S2 in the above example), when the last store stmt
8103      of the chain (S4) is reached:
8104 
8105         VS1: &base = vx2
8106 	VS2: &base + vec_size*1 = vx0
8107 	VS3: &base + vec_size*2 = vx1
8108 	VS4: &base + vec_size*3 = vx3
8109 
8110      Then permutation statements are generated:
8111 
8112 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8113 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8114 	...
8115 
8116      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8117      (the order of the data-refs in the output of vect_permute_store_chain
8118      corresponds to the order of scalar stmts in the interleaving chain - see
8119      the documentation of vect_permute_store_chain()).
8120 
8121      In case of both multiple types and interleaving, above vector stores and
8122      permutation stmts are created for every copy.  The result vector stmts are
8123      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8124      STMT_VINFO_RELATED_STMT for the next copies.
8125   */
8126 
8127   auto_vec<tree> vec_masks;
8128   tree vec_mask = NULL;
8129   auto_vec<tree> vec_offsets;
8130   auto_vec<vec<tree> > gvec_oprnds;
8131   gvec_oprnds.safe_grow_cleared (group_size, true);
8132   for (j = 0; j < ncopies; j++)
8133     {
8134       gimple *new_stmt;
8135       if (j == 0)
8136 	{
8137           if (slp)
8138             {
8139 	      /* Get vectorized arguments for SLP_NODE.  */
8140 	      vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8141 				 op, &vec_oprnds);
8142               vec_oprnd = vec_oprnds[0];
8143             }
8144           else
8145             {
8146 	      /* For interleaved stores we collect vectorized defs for all the
8147 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8148 		 used as an input to vect_permute_store_chain().
8149 
8150 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8151 		 and OPRNDS are of size 1.  */
8152 	      stmt_vec_info next_stmt_info = first_stmt_info;
8153 	      for (i = 0; i < group_size; i++)
8154 		{
8155 		  /* Since gaps are not supported for interleaved stores,
8156 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
8157 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
8158 		     that there is no interleaving, DR_GROUP_SIZE is 1,
8159 		     and only one iteration of the loop will be executed.  */
8160 		  op = vect_get_store_rhs (next_stmt_info);
8161 		  vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8162 						 ncopies, op, &gvec_oprnds[i]);
8163 		  vec_oprnd = gvec_oprnds[i][0];
8164 		  dr_chain.quick_push (gvec_oprnds[i][0]);
8165 		  oprnds.quick_push (gvec_oprnds[i][0]);
8166 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8167 		}
8168 	      if (mask)
8169 		{
8170 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8171 						 mask, &vec_masks, mask_vectype);
8172 		  vec_mask = vec_masks[0];
8173 		}
8174 	    }
8175 
8176 	  /* We should have catched mismatched types earlier.  */
8177 	  gcc_assert (useless_type_conversion_p (vectype,
8178 						 TREE_TYPE (vec_oprnd)));
8179 	  bool simd_lane_access_p
8180 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8181 	  if (simd_lane_access_p
8182 	      && !loop_masks
8183 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8184 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8185 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8186 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8187 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8188 					get_alias_set (TREE_TYPE (ref_type))))
8189 	    {
8190 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8191 	      dataref_offset = build_int_cst (ref_type, 0);
8192 	    }
8193 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8194 	    {
8195 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8196 					   slp_node, &gs_info, &dataref_ptr,
8197 					   &vec_offsets);
8198 	      vec_offset = vec_offsets[0];
8199 	    }
8200 	  else
8201 	    dataref_ptr
8202 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8203 					  simd_lane_access_p ? loop : NULL,
8204 					  offset, &dummy, gsi, &ptr_incr,
8205 					  simd_lane_access_p, bump);
8206 	}
8207       else
8208 	{
8209 	  /* For interleaved stores we created vectorized defs for all the
8210 	     defs stored in OPRNDS in the previous iteration (previous copy).
8211 	     DR_CHAIN is then used as an input to vect_permute_store_chain().
8212 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8213 	     OPRNDS are of size 1.  */
8214 	  for (i = 0; i < group_size; i++)
8215 	    {
8216 	      vec_oprnd = gvec_oprnds[i][j];
8217 	      dr_chain[i] = gvec_oprnds[i][j];
8218 	      oprnds[i] = gvec_oprnds[i][j];
8219 	    }
8220 	  if (mask)
8221 	    vec_mask = vec_masks[j];
8222 	  if (dataref_offset)
8223 	    dataref_offset
8224 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8225 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8226 	    vec_offset = vec_offsets[j];
8227 	  else
8228 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8229 					   stmt_info, bump);
8230 	}
8231 
8232       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8233 	{
8234 	  tree vec_array;
8235 
8236 	  /* Get an array into which we can store the individual vectors.  */
8237 	  vec_array = create_vector_array (vectype, vec_num);
8238 
8239 	  /* Invalidate the current contents of VEC_ARRAY.  This should
8240 	     become an RTL clobber too, which prevents the vector registers
8241 	     from being upward-exposed.  */
8242 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8243 
8244 	  /* Store the individual vectors into the array.  */
8245 	  for (i = 0; i < vec_num; i++)
8246 	    {
8247 	      vec_oprnd = dr_chain[i];
8248 	      write_vector_array (vinfo, stmt_info,
8249 				  gsi, vec_oprnd, vec_array, i);
8250 	    }
8251 
8252 	  tree final_mask = NULL;
8253 	  if (loop_masks)
8254 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8255 					     vectype, j);
8256 	  if (vec_mask)
8257 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8258 					   final_mask, vec_mask, gsi);
8259 
8260 	  gcall *call;
8261 	  if (final_mask)
8262 	    {
8263 	      /* Emit:
8264 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8265 				     VEC_ARRAY).  */
8266 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8267 	      tree alias_ptr = build_int_cst (ref_type, align);
8268 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8269 						 dataref_ptr, alias_ptr,
8270 						 final_mask, vec_array);
8271 	    }
8272 	  else
8273 	    {
8274 	      /* Emit:
8275 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8276 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8277 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8278 						 vec_array);
8279 	      gimple_call_set_lhs (call, data_ref);
8280 	    }
8281 	  gimple_call_set_nothrow (call, true);
8282 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8283 	  new_stmt = call;
8284 
8285 	  /* Record that VEC_ARRAY is now dead.  */
8286 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8287 	}
8288       else
8289 	{
8290 	  new_stmt = NULL;
8291 	  if (grouped_store)
8292 	    {
8293 	      if (j == 0)
8294 		result_chain.create (group_size);
8295 	      /* Permute.  */
8296 	      vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8297 					gsi, &result_chain);
8298 	    }
8299 
8300 	  stmt_vec_info next_stmt_info = first_stmt_info;
8301 	  for (i = 0; i < vec_num; i++)
8302 	    {
8303 	      unsigned misalign;
8304 	      unsigned HOST_WIDE_INT align;
8305 
8306 	      tree final_mask = NULL_TREE;
8307 	      if (loop_masks)
8308 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8309 						 vec_num * ncopies,
8310 						 vectype, vec_num * j + i);
8311 	      if (vec_mask)
8312 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8313 					       final_mask, vec_mask, gsi);
8314 
8315 	      if (memory_access_type == VMAT_GATHER_SCATTER)
8316 		{
8317 		  tree scale = size_int (gs_info.scale);
8318 		  gcall *call;
8319 		  if (final_mask)
8320 		    call = gimple_build_call_internal
8321 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8322 		       scale, vec_oprnd, final_mask);
8323 		  else
8324 		    call = gimple_build_call_internal
8325 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8326 		       scale, vec_oprnd);
8327 		  gimple_call_set_nothrow (call, true);
8328 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8329 		  new_stmt = call;
8330 		  break;
8331 		}
8332 
8333 	      if (i > 0)
8334 		/* Bump the vector pointer.  */
8335 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8336 					       gsi, stmt_info, bump);
8337 
8338 	      if (slp)
8339 		vec_oprnd = vec_oprnds[i];
8340 	      else if (grouped_store)
8341 		/* For grouped stores vectorized defs are interleaved in
8342 		   vect_permute_store_chain().  */
8343 		vec_oprnd = result_chain[i];
8344 
8345 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8346 	      if (alignment_support_scheme == dr_aligned)
8347 		misalign = 0;
8348 	      else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8349 		{
8350 		  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8351 		  misalign = 0;
8352 		}
8353 	      else
8354 		misalign = misalignment;
8355 	      if (dataref_offset == NULL_TREE
8356 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
8357 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8358 					misalign);
8359 	      align = least_bit_hwi (misalign | align);
8360 
8361 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8362 		{
8363 		  tree perm_mask = perm_mask_for_reverse (vectype);
8364 		  tree perm_dest = vect_create_destination_var
8365 		    (vect_get_store_rhs (stmt_info), vectype);
8366 		  tree new_temp = make_ssa_name (perm_dest);
8367 
8368 		  /* Generate the permute statement.  */
8369 		  gimple *perm_stmt
8370 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8371 					   vec_oprnd, perm_mask);
8372 		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8373 
8374 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8375 		  vec_oprnd = new_temp;
8376 		}
8377 
8378 	      /* Arguments are ready.  Create the new vector stmt.  */
8379 	      if (final_mask)
8380 		{
8381 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8382 		  gcall *call
8383 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
8384 						  dataref_ptr, ptr,
8385 						  final_mask, vec_oprnd);
8386 		  gimple_call_set_nothrow (call, true);
8387 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8388 		  new_stmt = call;
8389 		}
8390 	      else if (loop_lens)
8391 		{
8392 		  tree final_len
8393 		    = vect_get_loop_len (loop_vinfo, loop_lens,
8394 					 vec_num * ncopies, vec_num * j + i);
8395 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8396 		  machine_mode vmode = TYPE_MODE (vectype);
8397 		  opt_machine_mode new_ovmode
8398 		    = get_len_load_store_mode (vmode, false);
8399 		  machine_mode new_vmode = new_ovmode.require ();
8400 		  /* Need conversion if it's wrapped with VnQI.  */
8401 		  if (vmode != new_vmode)
8402 		    {
8403 		      tree new_vtype
8404 			= build_vector_type_for_mode (unsigned_intQI_type_node,
8405 						      new_vmode);
8406 		      tree var
8407 			= vect_get_new_ssa_name (new_vtype, vect_simple_var);
8408 		      vec_oprnd
8409 			= build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8410 		      gassign *new_stmt
8411 			= gimple_build_assign (var, VIEW_CONVERT_EXPR,
8412 					       vec_oprnd);
8413 		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8414 						   gsi);
8415 		      vec_oprnd = var;
8416 		    }
8417 		  gcall *call
8418 		    = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8419 						  ptr, final_len, vec_oprnd);
8420 		  gimple_call_set_nothrow (call, true);
8421 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8422 		  new_stmt = call;
8423 		}
8424 	      else
8425 		{
8426 		  data_ref = fold_build2 (MEM_REF, vectype,
8427 					  dataref_ptr,
8428 					  dataref_offset
8429 					  ? dataref_offset
8430 					  : build_int_cst (ref_type, 0));
8431 		  if (alignment_support_scheme == dr_aligned)
8432 		    ;
8433 		  else
8434 		    TREE_TYPE (data_ref)
8435 		      = build_aligned_type (TREE_TYPE (data_ref),
8436 					    align * BITS_PER_UNIT);
8437 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8438 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8439 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8440 		}
8441 
8442 	      if (slp)
8443 		continue;
8444 
8445 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8446 	      if (!next_stmt_info)
8447 		break;
8448 	    }
8449 	}
8450       if (!slp)
8451 	{
8452 	  if (j == 0)
8453 	    *vec_stmt = new_stmt;
8454 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8455 	}
8456     }
8457 
8458   for (i = 0; i < group_size; ++i)
8459     {
8460       vec<tree> oprndsi = gvec_oprnds[i];
8461       oprndsi.release ();
8462     }
8463   oprnds.release ();
8464   result_chain.release ();
8465   vec_oprnds.release ();
8466 
8467   return true;
8468 }
8469 
8470 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8471    VECTOR_CST mask.  No checks are made that the target platform supports the
8472    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8473    vect_gen_perm_mask_checked.  */
8474 
8475 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8476 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8477 {
8478   tree mask_type;
8479 
8480   poly_uint64 nunits = sel.length ();
8481   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8482 
8483   mask_type = build_vector_type (ssizetype, nunits);
8484   return vec_perm_indices_to_tree (mask_type, sel);
8485 }
8486 
8487 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8488    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8489 
8490 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8491 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8492 {
8493   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8494   return vect_gen_perm_mask_any (vectype, sel);
8495 }
8496 
8497 /* Given a vector variable X and Y, that was generated for the scalar
8498    STMT_INFO, generate instructions to permute the vector elements of X and Y
8499    using permutation mask MASK_VEC, insert them at *GSI and return the
8500    permuted vector variable.  */
8501 
8502 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8503 permute_vec_elements (vec_info *vinfo,
8504 		      tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8505 		      gimple_stmt_iterator *gsi)
8506 {
8507   tree vectype = TREE_TYPE (x);
8508   tree perm_dest, data_ref;
8509   gimple *perm_stmt;
8510 
8511   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8512   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8513     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8514   else
8515     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8516   data_ref = make_ssa_name (perm_dest);
8517 
8518   /* Generate the permute statement.  */
8519   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8520   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8521 
8522   return data_ref;
8523 }
8524 
8525 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8526    inserting them on the loops preheader edge.  Returns true if we
8527    were successful in doing so (and thus STMT_INFO can be moved then),
8528    otherwise returns false.  */
8529 
8530 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8531 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8532 {
8533   ssa_op_iter i;
8534   tree op;
8535   bool any = false;
8536 
8537   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8538     {
8539       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8540       if (!gimple_nop_p (def_stmt)
8541 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8542 	{
8543 	  /* Make sure we don't need to recurse.  While we could do
8544 	     so in simple cases when there are more complex use webs
8545 	     we don't have an easy way to preserve stmt order to fulfil
8546 	     dependencies within them.  */
8547 	  tree op2;
8548 	  ssa_op_iter i2;
8549 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
8550 	    return false;
8551 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8552 	    {
8553 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8554 	      if (!gimple_nop_p (def_stmt2)
8555 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8556 		return false;
8557 	    }
8558 	  any = true;
8559 	}
8560     }
8561 
8562   if (!any)
8563     return true;
8564 
8565   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8566     {
8567       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8568       if (!gimple_nop_p (def_stmt)
8569 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8570 	{
8571 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8572 	  gsi_remove (&gsi, false);
8573 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8574 	}
8575     }
8576 
8577   return true;
8578 }
8579 
8580 /* vectorizable_load.
8581 
8582    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8583    that can be vectorized.
8584    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8585    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8586    Return true if STMT_INFO is vectorizable in this way.  */
8587 
8588 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8589 vectorizable_load (vec_info *vinfo,
8590 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8591 		   gimple **vec_stmt, slp_tree slp_node,
8592 		   stmt_vector_for_cost *cost_vec)
8593 {
8594   tree scalar_dest;
8595   tree vec_dest = NULL;
8596   tree data_ref = NULL;
8597   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8598   class loop *loop = NULL;
8599   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8600   bool nested_in_vect_loop = false;
8601   tree elem_type;
8602   tree new_temp;
8603   machine_mode mode;
8604   tree dummy;
8605   tree dataref_ptr = NULL_TREE;
8606   tree dataref_offset = NULL_TREE;
8607   gimple *ptr_incr = NULL;
8608   int ncopies;
8609   int i, j;
8610   unsigned int group_size;
8611   poly_uint64 group_gap_adj;
8612   tree msq = NULL_TREE, lsq;
8613   tree realignment_token = NULL_TREE;
8614   gphi *phi = NULL;
8615   vec<tree> dr_chain = vNULL;
8616   bool grouped_load = false;
8617   stmt_vec_info first_stmt_info;
8618   stmt_vec_info first_stmt_info_for_drptr = NULL;
8619   bool compute_in_loop = false;
8620   class loop *at_loop;
8621   int vec_num;
8622   bool slp = (slp_node != NULL);
8623   bool slp_perm = false;
8624   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8625   poly_uint64 vf;
8626   tree aggr_type;
8627   gather_scatter_info gs_info;
8628   tree ref_type;
8629   enum vect_def_type mask_dt = vect_unknown_def_type;
8630 
8631   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8632     return false;
8633 
8634   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8635       && ! vec_stmt)
8636     return false;
8637 
8638   if (!STMT_VINFO_DATA_REF (stmt_info))
8639     return false;
8640 
8641   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8642   int mask_index = -1;
8643   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8644     {
8645       scalar_dest = gimple_assign_lhs (assign);
8646       if (TREE_CODE (scalar_dest) != SSA_NAME)
8647 	return false;
8648 
8649       tree_code code = gimple_assign_rhs_code (assign);
8650       if (code != ARRAY_REF
8651 	  && code != BIT_FIELD_REF
8652 	  && code != INDIRECT_REF
8653 	  && code != COMPONENT_REF
8654 	  && code != IMAGPART_EXPR
8655 	  && code != REALPART_EXPR
8656 	  && code != MEM_REF
8657 	  && TREE_CODE_CLASS (code) != tcc_declaration)
8658 	return false;
8659     }
8660   else
8661     {
8662       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8663       if (!call || !gimple_call_internal_p (call))
8664 	return false;
8665 
8666       internal_fn ifn = gimple_call_internal_fn (call);
8667       if (!internal_load_fn_p (ifn))
8668 	return false;
8669 
8670       scalar_dest = gimple_call_lhs (call);
8671       if (!scalar_dest)
8672 	return false;
8673 
8674       mask_index = internal_fn_mask_index (ifn);
8675       /* ??? For SLP the mask operand is always last.  */
8676       if (mask_index >= 0 && slp_node)
8677 	mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8678       if (mask_index >= 0
8679 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8680 				      &mask, NULL, &mask_dt, &mask_vectype))
8681 	return false;
8682     }
8683 
8684   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8685   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8686 
8687   if (loop_vinfo)
8688     {
8689       loop = LOOP_VINFO_LOOP (loop_vinfo);
8690       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8691       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8692     }
8693   else
8694     vf = 1;
8695 
8696   /* Multiple types in SLP are handled by creating the appropriate number of
8697      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8698      case of SLP.  */
8699   if (slp)
8700     ncopies = 1;
8701   else
8702     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8703 
8704   gcc_assert (ncopies >= 1);
8705 
8706   /* FORNOW. This restriction should be relaxed.  */
8707   if (nested_in_vect_loop && ncopies > 1)
8708     {
8709       if (dump_enabled_p ())
8710         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8711                          "multiple types in nested loop.\n");
8712       return false;
8713     }
8714 
8715   /* Invalidate assumptions made by dependence analysis when vectorization
8716      on the unrolled body effectively re-orders stmts.  */
8717   if (ncopies > 1
8718       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8719       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8720 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8721     {
8722       if (dump_enabled_p ())
8723 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8724 			 "cannot perform implicit CSE when unrolling "
8725 			 "with negative dependence distance\n");
8726       return false;
8727     }
8728 
8729   elem_type = TREE_TYPE (vectype);
8730   mode = TYPE_MODE (vectype);
8731 
8732   /* FORNOW. In some cases can vectorize even if data-type not supported
8733     (e.g. - data copies).  */
8734   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8735     {
8736       if (dump_enabled_p ())
8737         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8738                          "Aligned load, but unsupported type.\n");
8739       return false;
8740     }
8741 
8742   /* Check if the load is a part of an interleaving chain.  */
8743   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8744     {
8745       grouped_load = true;
8746       /* FORNOW */
8747       gcc_assert (!nested_in_vect_loop);
8748       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8749 
8750       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8751       group_size = DR_GROUP_SIZE (first_stmt_info);
8752 
8753       /* Refuse non-SLP vectorization of SLP-only groups.  */
8754       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8755 	{
8756 	  if (dump_enabled_p ())
8757 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8758 			     "cannot vectorize load in non-SLP mode.\n");
8759 	  return false;
8760 	}
8761 
8762       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8763 	{
8764 	  slp_perm = true;
8765 
8766 	  if (!loop_vinfo)
8767 	    {
8768 	      /* In BB vectorization we may not actually use a loaded vector
8769 		 accessing elements in excess of DR_GROUP_SIZE.  */
8770 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8771 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8772 	      unsigned HOST_WIDE_INT nunits;
8773 	      unsigned j, k, maxk = 0;
8774 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8775 		if (k > maxk)
8776 		  maxk = k;
8777 	      tree vectype = SLP_TREE_VECTYPE (slp_node);
8778 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8779 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8780 		{
8781 		  if (dump_enabled_p ())
8782 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8783 				     "BB vectorization with gaps at the end of "
8784 				     "a load is not supported\n");
8785 		  return false;
8786 		}
8787 	    }
8788 
8789 	  auto_vec<tree> tem;
8790 	  unsigned n_perms;
8791 	  if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8792 					     true, &n_perms))
8793 	    {
8794 	      if (dump_enabled_p ())
8795 		dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8796 				 vect_location,
8797 				 "unsupported load permutation\n");
8798 	      return false;
8799 	    }
8800 	}
8801 
8802       /* Invalidate assumptions made by dependence analysis when vectorization
8803 	 on the unrolled body effectively re-orders stmts.  */
8804       if (!PURE_SLP_STMT (stmt_info)
8805 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8806 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8807 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8808 	{
8809 	  if (dump_enabled_p ())
8810 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8811 			     "cannot perform implicit CSE when performing "
8812 			     "group loads with negative dependence distance\n");
8813 	  return false;
8814 	}
8815     }
8816   else
8817     group_size = 1;
8818 
8819   vect_memory_access_type memory_access_type;
8820   enum dr_alignment_support alignment_support_scheme;
8821   int misalignment;
8822   poly_int64 poffset;
8823   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8824 			    ncopies, &memory_access_type, &poffset,
8825 			    &alignment_support_scheme, &misalignment, &gs_info))
8826     return false;
8827 
8828   if (mask)
8829     {
8830       if (memory_access_type == VMAT_CONTIGUOUS)
8831 	{
8832 	  machine_mode vec_mode = TYPE_MODE (vectype);
8833 	  if (!VECTOR_MODE_P (vec_mode)
8834 	      || !can_vec_mask_load_store_p (vec_mode,
8835 					     TYPE_MODE (mask_vectype), true))
8836 	    return false;
8837 	}
8838       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8839 	       && memory_access_type != VMAT_GATHER_SCATTER)
8840 	{
8841 	  if (dump_enabled_p ())
8842 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8843 			     "unsupported access type for masked load.\n");
8844 	  return false;
8845 	}
8846       else if (memory_access_type == VMAT_GATHER_SCATTER
8847 	       && gs_info.ifn == IFN_LAST
8848 	       && !gs_info.decl)
8849 	{
8850 	  if (dump_enabled_p ())
8851 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8852 			     "unsupported masked emulated gather.\n");
8853 	  return false;
8854 	}
8855     }
8856 
8857   if (!vec_stmt) /* transformation not required.  */
8858     {
8859       if (slp_node
8860 	  && mask
8861 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8862 						mask_vectype))
8863 	{
8864 	  if (dump_enabled_p ())
8865 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8866 			     "incompatible vector types for invariants\n");
8867 	  return false;
8868 	}
8869 
8870       if (!slp)
8871 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8872 
8873       if (loop_vinfo
8874 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8875 	check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8876 					      group_size, memory_access_type,
8877 					      ncopies, &gs_info, mask);
8878 
8879       if (dump_enabled_p ()
8880 	  && memory_access_type != VMAT_ELEMENTWISE
8881 	  && memory_access_type != VMAT_GATHER_SCATTER
8882 	  && alignment_support_scheme != dr_aligned)
8883 	dump_printf_loc (MSG_NOTE, vect_location,
8884 			 "Vectorizing an unaligned access.\n");
8885 
8886       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8887       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8888 			    alignment_support_scheme, misalignment,
8889 			    &gs_info, slp_node, cost_vec);
8890       return true;
8891     }
8892 
8893   if (!slp)
8894     gcc_assert (memory_access_type
8895 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8896 
8897   if (dump_enabled_p ())
8898     dump_printf_loc (MSG_NOTE, vect_location,
8899                      "transform load. ncopies = %d\n", ncopies);
8900 
8901   /* Transform.  */
8902 
8903   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8904   ensure_base_align (dr_info);
8905 
8906   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8907     {
8908       vect_build_gather_load_calls (vinfo,
8909 				    stmt_info, gsi, vec_stmt, &gs_info, mask);
8910       return true;
8911     }
8912 
8913   if (memory_access_type == VMAT_INVARIANT)
8914     {
8915       gcc_assert (!grouped_load && !mask && !bb_vinfo);
8916       /* If we have versioned for aliasing or the loop doesn't
8917 	 have any data dependencies that would preclude this,
8918 	 then we are sure this is a loop invariant load and
8919 	 thus we can insert it on the preheader edge.  */
8920       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8921 		      && !nested_in_vect_loop
8922 		      && hoist_defs_of_uses (stmt_info, loop));
8923       if (hoist_p)
8924 	{
8925 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8926 	  if (dump_enabled_p ())
8927 	    dump_printf_loc (MSG_NOTE, vect_location,
8928 			     "hoisting out of the vectorized loop: %G", stmt);
8929 	  scalar_dest = copy_ssa_name (scalar_dest);
8930 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8931 	  gsi_insert_on_edge_immediate
8932 	    (loop_preheader_edge (loop),
8933 	     gimple_build_assign (scalar_dest, rhs));
8934 	}
8935       /* These copies are all equivalent, but currently the representation
8936 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
8937       gimple_stmt_iterator gsi2 = *gsi;
8938       gsi_next (&gsi2);
8939       for (j = 0; j < ncopies; j++)
8940 	{
8941 	  if (hoist_p)
8942 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8943 					 vectype, NULL);
8944 	  else
8945 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8946 					 vectype, &gsi2);
8947 	  gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8948 	  if (slp)
8949 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8950 	  else
8951 	    {
8952 	      if (j == 0)
8953 		*vec_stmt = new_stmt;
8954 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8955 	    }
8956 	}
8957       return true;
8958     }
8959 
8960   if (memory_access_type == VMAT_ELEMENTWISE
8961       || memory_access_type == VMAT_STRIDED_SLP)
8962     {
8963       gimple_stmt_iterator incr_gsi;
8964       bool insert_after;
8965       tree offvar;
8966       tree ivstep;
8967       tree running_off;
8968       vec<constructor_elt, va_gc> *v = NULL;
8969       tree stride_base, stride_step, alias_off;
8970       /* Checked by get_load_store_type.  */
8971       unsigned int const_nunits = nunits.to_constant ();
8972       unsigned HOST_WIDE_INT cst_offset = 0;
8973       tree dr_offset;
8974 
8975       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8976       gcc_assert (!nested_in_vect_loop);
8977 
8978       if (grouped_load)
8979 	{
8980 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8981 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8982 	}
8983       else
8984 	{
8985 	  first_stmt_info = stmt_info;
8986 	  first_dr_info = dr_info;
8987 	}
8988       if (slp && grouped_load)
8989 	{
8990 	  group_size = DR_GROUP_SIZE (first_stmt_info);
8991 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
8992 	}
8993       else
8994 	{
8995 	  if (grouped_load)
8996 	    cst_offset
8997 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8998 		 * vect_get_place_in_interleaving_chain (stmt_info,
8999 							 first_stmt_info));
9000 	  group_size = 1;
9001 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9002 	}
9003 
9004       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9005       stride_base
9006 	= fold_build_pointer_plus
9007 	    (DR_BASE_ADDRESS (first_dr_info->dr),
9008 	     size_binop (PLUS_EXPR,
9009 			 convert_to_ptrofftype (dr_offset),
9010 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9011       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9012 
9013       /* For a load with loop-invariant (but other than power-of-2)
9014          stride (i.e. not a grouped access) like so:
9015 
9016 	   for (i = 0; i < n; i += stride)
9017 	     ... = array[i];
9018 
9019 	 we generate a new induction variable and new accesses to
9020 	 form a new vector (or vectors, depending on ncopies):
9021 
9022 	   for (j = 0; ; j += VF*stride)
9023 	     tmp1 = array[j];
9024 	     tmp2 = array[j + stride];
9025 	     ...
9026 	     vectemp = {tmp1, tmp2, ...}
9027          */
9028 
9029       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9030 			    build_int_cst (TREE_TYPE (stride_step), vf));
9031 
9032       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9033 
9034       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9035       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9036       create_iv (stride_base, ivstep, NULL,
9037 		 loop, &incr_gsi, insert_after,
9038 		 &offvar, NULL);
9039 
9040       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9041 
9042       running_off = offvar;
9043       alias_off = build_int_cst (ref_type, 0);
9044       int nloads = const_nunits;
9045       int lnel = 1;
9046       tree ltype = TREE_TYPE (vectype);
9047       tree lvectype = vectype;
9048       auto_vec<tree> dr_chain;
9049       if (memory_access_type == VMAT_STRIDED_SLP)
9050 	{
9051 	  if (group_size < const_nunits)
9052 	    {
9053 	      /* First check if vec_init optab supports construction from vector
9054 		 elts directly.  Otherwise avoid emitting a constructor of
9055 		 vector elements by performing the loads using an integer type
9056 		 of the same size, constructing a vector of those and then
9057 		 re-interpreting it as the original vector type.  This avoids a
9058 		 huge runtime penalty due to the general inability to perform
9059 		 store forwarding from smaller stores to a larger load.  */
9060 	      tree ptype;
9061 	      tree vtype
9062 		= vector_vector_composition_type (vectype,
9063 						  const_nunits / group_size,
9064 						  &ptype);
9065 	      if (vtype != NULL_TREE)
9066 		{
9067 		  nloads = const_nunits / group_size;
9068 		  lnel = group_size;
9069 		  lvectype = vtype;
9070 		  ltype = ptype;
9071 		}
9072 	    }
9073 	  else
9074 	    {
9075 	      nloads = 1;
9076 	      lnel = const_nunits;
9077 	      ltype = vectype;
9078 	    }
9079 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9080 	}
9081       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
9082       else if (nloads == 1)
9083 	ltype = vectype;
9084 
9085       if (slp)
9086 	{
9087 	  /* For SLP permutation support we need to load the whole group,
9088 	     not only the number of vector stmts the permutation result
9089 	     fits in.  */
9090 	  if (slp_perm)
9091 	    {
9092 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9093 		 variable VF.  */
9094 	      unsigned int const_vf = vf.to_constant ();
9095 	      ncopies = CEIL (group_size * const_vf, const_nunits);
9096 	      dr_chain.create (ncopies);
9097 	    }
9098 	  else
9099 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9100 	}
9101       unsigned int group_el = 0;
9102       unsigned HOST_WIDE_INT
9103 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9104       for (j = 0; j < ncopies; j++)
9105 	{
9106 	  if (nloads > 1)
9107 	    vec_alloc (v, nloads);
9108 	  gimple *new_stmt = NULL;
9109 	  for (i = 0; i < nloads; i++)
9110 	    {
9111 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
9112 					     group_el * elsz + cst_offset);
9113 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9114 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9115 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9116 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9117 	      if (nloads > 1)
9118 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9119 					gimple_assign_lhs (new_stmt));
9120 
9121 	      group_el += lnel;
9122 	      if (! slp
9123 		  || group_el == group_size)
9124 		{
9125 		  tree newoff = copy_ssa_name (running_off);
9126 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9127 						      running_off, stride_step);
9128 		  vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9129 
9130 		  running_off = newoff;
9131 		  group_el = 0;
9132 		}
9133 	    }
9134 	  if (nloads > 1)
9135 	    {
9136 	      tree vec_inv = build_constructor (lvectype, v);
9137 	      new_temp = vect_init_vector (vinfo, stmt_info,
9138 					   vec_inv, lvectype, gsi);
9139 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
9140 	      if (lvectype != vectype)
9141 		{
9142 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
9143 						  VIEW_CONVERT_EXPR,
9144 						  build1 (VIEW_CONVERT_EXPR,
9145 							  vectype, new_temp));
9146 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9147 		}
9148 	    }
9149 
9150 	  if (slp)
9151 	    {
9152 	      if (slp_perm)
9153 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9154 	      else
9155 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9156 	    }
9157 	  else
9158 	    {
9159 	      if (j == 0)
9160 		*vec_stmt = new_stmt;
9161 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9162 	    }
9163 	}
9164       if (slp_perm)
9165 	{
9166 	  unsigned n_perms;
9167 	  vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9168 					false, &n_perms);
9169 	}
9170       return true;
9171     }
9172 
9173   if (memory_access_type == VMAT_GATHER_SCATTER
9174       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9175     grouped_load = false;
9176 
9177   if (grouped_load)
9178     {
9179       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9180       group_size = DR_GROUP_SIZE (first_stmt_info);
9181       /* For SLP vectorization we directly vectorize a subchain
9182          without permutation.  */
9183       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9184 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9185       /* For BB vectorization always use the first stmt to base
9186 	 the data ref pointer on.  */
9187       if (bb_vinfo)
9188 	first_stmt_info_for_drptr
9189 	  = vect_find_first_scalar_stmt_in_slp (slp_node);
9190 
9191       /* Check if the chain of loads is already vectorized.  */
9192       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9193 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9194 	     ???  But we can only do so if there is exactly one
9195 	     as we have no way to get at the rest.  Leave the CSE
9196 	     opportunity alone.
9197 	     ???  With the group load eventually participating
9198 	     in multiple different permutations (having multiple
9199 	     slp nodes which refer to the same group) the CSE
9200 	     is even wrong code.  See PR56270.  */
9201 	  && !slp)
9202 	{
9203 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9204 	  return true;
9205 	}
9206       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9207       group_gap_adj = 0;
9208 
9209       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9210       if (slp)
9211 	{
9212 	  grouped_load = false;
9213 	  /* If an SLP permutation is from N elements to N elements,
9214 	     and if one vector holds a whole number of N, we can load
9215 	     the inputs to the permutation in the same way as an
9216 	     unpermuted sequence.  In other cases we need to load the
9217 	     whole group, not only the number of vector stmts the
9218 	     permutation result fits in.  */
9219 	  unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9220 	  if (slp_perm
9221 	      && (group_size != scalar_lanes
9222 		  || !multiple_p (nunits, group_size)))
9223 	    {
9224 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9225 		 variable VF; see vect_transform_slp_perm_load.  */
9226 	      unsigned int const_vf = vf.to_constant ();
9227 	      unsigned int const_nunits = nunits.to_constant ();
9228 	      vec_num = CEIL (group_size * const_vf, const_nunits);
9229 	      group_gap_adj = vf * group_size - nunits * vec_num;
9230 	    }
9231 	  else
9232 	    {
9233 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9234 	      group_gap_adj
9235 		= group_size - scalar_lanes;
9236 	    }
9237     	}
9238       else
9239 	vec_num = group_size;
9240 
9241       ref_type = get_group_alias_ptr_type (first_stmt_info);
9242     }
9243   else
9244     {
9245       first_stmt_info = stmt_info;
9246       first_dr_info = dr_info;
9247       group_size = vec_num = 1;
9248       group_gap_adj = 0;
9249       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9250     }
9251 
9252   gcc_assert (alignment_support_scheme);
9253   vec_loop_masks *loop_masks
9254     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9255        ? &LOOP_VINFO_MASKS (loop_vinfo)
9256        : NULL);
9257   vec_loop_lens *loop_lens
9258     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9259        ? &LOOP_VINFO_LENS (loop_vinfo)
9260        : NULL);
9261 
9262   /* Shouldn't go with length-based approach if fully masked.  */
9263   gcc_assert (!loop_lens || !loop_masks);
9264 
9265   /* Targets with store-lane instructions must not require explicit
9266      realignment.  vect_supportable_dr_alignment always returns either
9267      dr_aligned or dr_unaligned_supported for masked operations.  */
9268   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9269 	       && !mask
9270 	       && !loop_masks)
9271 	      || alignment_support_scheme == dr_aligned
9272 	      || alignment_support_scheme == dr_unaligned_supported);
9273 
9274   /* In case the vectorization factor (VF) is bigger than the number
9275      of elements that we can fit in a vectype (nunits), we have to generate
9276      more than one vector stmt - i.e - we need to "unroll" the
9277      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9278      from one copy of the vector stmt to the next, in the field
9279      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9280      stages to find the correct vector defs to be used when vectorizing
9281      stmts that use the defs of the current stmt.  The example below
9282      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9283      need to create 4 vectorized stmts):
9284 
9285      before vectorization:
9286                                 RELATED_STMT    VEC_STMT
9287         S1:     x = memref      -               -
9288         S2:     z = x + 1       -               -
9289 
9290      step 1: vectorize stmt S1:
9291         We first create the vector stmt VS1_0, and, as usual, record a
9292         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9293         Next, we create the vector stmt VS1_1, and record a pointer to
9294         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9295         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9296         stmts and pointers:
9297                                 RELATED_STMT    VEC_STMT
9298         VS1_0:  vx0 = memref0   VS1_1           -
9299         VS1_1:  vx1 = memref1   VS1_2           -
9300         VS1_2:  vx2 = memref2   VS1_3           -
9301         VS1_3:  vx3 = memref3   -               -
9302         S1:     x = load        -               VS1_0
9303         S2:     z = x + 1       -               -
9304   */
9305 
9306   /* In case of interleaving (non-unit grouped access):
9307 
9308      S1:  x2 = &base + 2
9309      S2:  x0 = &base
9310      S3:  x1 = &base + 1
9311      S4:  x3 = &base + 3
9312 
9313      Vectorized loads are created in the order of memory accesses
9314      starting from the access of the first stmt of the chain:
9315 
9316      VS1: vx0 = &base
9317      VS2: vx1 = &base + vec_size*1
9318      VS3: vx3 = &base + vec_size*2
9319      VS4: vx4 = &base + vec_size*3
9320 
9321      Then permutation statements are generated:
9322 
9323      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9324      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9325        ...
9326 
9327      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9328      (the order of the data-refs in the output of vect_permute_load_chain
9329      corresponds to the order of scalar stmts in the interleaving chain - see
9330      the documentation of vect_permute_load_chain()).
9331      The generation of permutation stmts and recording them in
9332      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9333 
9334      In case of both multiple types and interleaving, the vector loads and
9335      permutation stmts above are created for every copy.  The result vector
9336      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9337      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9338 
9339   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9340      on a target that supports unaligned accesses (dr_unaligned_supported)
9341      we generate the following code:
9342          p = initial_addr;
9343          indx = 0;
9344          loop {
9345 	   p = p + indx * vectype_size;
9346            vec_dest = *(p);
9347            indx = indx + 1;
9348          }
9349 
9350      Otherwise, the data reference is potentially unaligned on a target that
9351      does not support unaligned accesses (dr_explicit_realign_optimized) -
9352      then generate the following code, in which the data in each iteration is
9353      obtained by two vector loads, one from the previous iteration, and one
9354      from the current iteration:
9355          p1 = initial_addr;
9356          msq_init = *(floor(p1))
9357          p2 = initial_addr + VS - 1;
9358          realignment_token = call target_builtin;
9359          indx = 0;
9360          loop {
9361            p2 = p2 + indx * vectype_size
9362            lsq = *(floor(p2))
9363            vec_dest = realign_load (msq, lsq, realignment_token)
9364            indx = indx + 1;
9365            msq = lsq;
9366          }   */
9367 
9368   /* If the misalignment remains the same throughout the execution of the
9369      loop, we can create the init_addr and permutation mask at the loop
9370      preheader.  Otherwise, it needs to be created inside the loop.
9371      This can only occur when vectorizing memory accesses in the inner-loop
9372      nested within an outer-loop that is being vectorized.  */
9373 
9374   if (nested_in_vect_loop
9375       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9376 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
9377     {
9378       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9379       compute_in_loop = true;
9380     }
9381 
9382   bool diff_first_stmt_info
9383     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9384 
9385   tree offset = NULL_TREE;
9386   if ((alignment_support_scheme == dr_explicit_realign_optimized
9387        || alignment_support_scheme == dr_explicit_realign)
9388       && !compute_in_loop)
9389     {
9390       /* If we have different first_stmt_info, we can't set up realignment
9391 	 here, since we can't guarantee first_stmt_info DR has been
9392 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9393 	 distance from first_stmt_info DR instead as below.  */
9394       if (!diff_first_stmt_info)
9395 	msq = vect_setup_realignment (vinfo,
9396 				      first_stmt_info, gsi, &realignment_token,
9397 				      alignment_support_scheme, NULL_TREE,
9398 				      &at_loop);
9399       if (alignment_support_scheme == dr_explicit_realign_optimized)
9400 	{
9401 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9402 	  offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9403 			       size_one_node);
9404 	  gcc_assert (!first_stmt_info_for_drptr);
9405 	}
9406     }
9407   else
9408     at_loop = loop;
9409 
9410   if (!known_eq (poffset, 0))
9411     offset = (offset
9412 	      ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9413 	      : size_int (poffset));
9414 
9415   tree bump;
9416   tree vec_offset = NULL_TREE;
9417   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9418     {
9419       aggr_type = NULL_TREE;
9420       bump = NULL_TREE;
9421     }
9422   else if (memory_access_type == VMAT_GATHER_SCATTER)
9423     {
9424       aggr_type = elem_type;
9425       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9426 				       &bump, &vec_offset);
9427     }
9428   else
9429     {
9430       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9431 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9432       else
9433 	aggr_type = vectype;
9434       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9435 					  memory_access_type);
9436     }
9437 
9438   vec<tree> vec_offsets = vNULL;
9439   auto_vec<tree> vec_masks;
9440   if (mask)
9441     {
9442       if (slp_node)
9443 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9444 			   &vec_masks);
9445       else
9446 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9447 				       &vec_masks, mask_vectype);
9448     }
9449   tree vec_mask = NULL_TREE;
9450   poly_uint64 group_elt = 0;
9451   for (j = 0; j < ncopies; j++)
9452     {
9453       /* 1. Create the vector or array pointer update chain.  */
9454       if (j == 0)
9455 	{
9456 	  bool simd_lane_access_p
9457 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9458 	  if (simd_lane_access_p
9459 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9460 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9461 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9462 	      && integer_zerop (DR_INIT (first_dr_info->dr))
9463 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
9464 					get_alias_set (TREE_TYPE (ref_type)))
9465 	      && (alignment_support_scheme == dr_aligned
9466 		  || alignment_support_scheme == dr_unaligned_supported))
9467 	    {
9468 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9469 	      dataref_offset = build_int_cst (ref_type, 0);
9470 	    }
9471 	  else if (diff_first_stmt_info)
9472 	    {
9473 	      dataref_ptr
9474 		= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9475 					    aggr_type, at_loop, offset, &dummy,
9476 					    gsi, &ptr_incr, simd_lane_access_p,
9477 					    bump);
9478 	      /* Adjust the pointer by the difference to first_stmt.  */
9479 	      data_reference_p ptrdr
9480 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9481 	      tree diff
9482 		= fold_convert (sizetype,
9483 				size_binop (MINUS_EXPR,
9484 					    DR_INIT (first_dr_info->dr),
9485 					    DR_INIT (ptrdr)));
9486 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9487 					     stmt_info, diff);
9488 	      if (alignment_support_scheme == dr_explicit_realign)
9489 		{
9490 		  msq = vect_setup_realignment (vinfo,
9491 						first_stmt_info_for_drptr, gsi,
9492 						&realignment_token,
9493 						alignment_support_scheme,
9494 						dataref_ptr, &at_loop);
9495 		  gcc_assert (!compute_in_loop);
9496 		}
9497 	    }
9498 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9499 	    {
9500 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9501 					   slp_node, &gs_info, &dataref_ptr,
9502 					   &vec_offsets);
9503 	    }
9504 	  else
9505 	    dataref_ptr
9506 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9507 					  at_loop,
9508 					  offset, &dummy, gsi, &ptr_incr,
9509 					  simd_lane_access_p, bump);
9510 	  if (mask)
9511 	    vec_mask = vec_masks[0];
9512 	}
9513       else
9514 	{
9515 	  if (dataref_offset)
9516 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9517 					      bump);
9518 	  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9519 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9520 					   stmt_info, bump);
9521 	  if (mask)
9522 	    vec_mask = vec_masks[j];
9523 	}
9524 
9525       if (grouped_load || slp_perm)
9526 	dr_chain.create (vec_num);
9527 
9528       gimple *new_stmt = NULL;
9529       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9530 	{
9531 	  tree vec_array;
9532 
9533 	  vec_array = create_vector_array (vectype, vec_num);
9534 
9535 	  tree final_mask = NULL_TREE;
9536 	  if (loop_masks)
9537 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9538 					     vectype, j);
9539 	  if (vec_mask)
9540 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9541 					   final_mask, vec_mask, gsi);
9542 
9543 	  gcall *call;
9544 	  if (final_mask)
9545 	    {
9546 	      /* Emit:
9547 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9548 		                                VEC_MASK).  */
9549 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9550 	      tree alias_ptr = build_int_cst (ref_type, align);
9551 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9552 						 dataref_ptr, alias_ptr,
9553 						 final_mask);
9554 	    }
9555 	  else
9556 	    {
9557 	      /* Emit:
9558 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9559 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9560 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9561 	    }
9562 	  gimple_call_set_lhs (call, vec_array);
9563 	  gimple_call_set_nothrow (call, true);
9564 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9565 	  new_stmt = call;
9566 
9567 	  /* Extract each vector into an SSA_NAME.  */
9568 	  for (i = 0; i < vec_num; i++)
9569 	    {
9570 	      new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9571 					    vec_array, i);
9572 	      dr_chain.quick_push (new_temp);
9573 	    }
9574 
9575 	  /* Record the mapping between SSA_NAMEs and statements.  */
9576 	  vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9577 
9578 	  /* Record that VEC_ARRAY is now dead.  */
9579 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9580 	}
9581       else
9582 	{
9583 	  for (i = 0; i < vec_num; i++)
9584 	    {
9585 	      tree final_mask = NULL_TREE;
9586 	      if (loop_masks
9587 		  && memory_access_type != VMAT_INVARIANT)
9588 		final_mask = vect_get_loop_mask (gsi, loop_masks,
9589 						 vec_num * ncopies,
9590 						 vectype, vec_num * j + i);
9591 	      if (vec_mask)
9592 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9593 					       final_mask, vec_mask, gsi);
9594 
9595 	      if (i > 0)
9596 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9597 					       gsi, stmt_info, bump);
9598 
9599 	      /* 2. Create the vector-load in the loop.  */
9600 	      switch (alignment_support_scheme)
9601 		{
9602 		case dr_aligned:
9603 		case dr_unaligned_supported:
9604 		  {
9605 		    unsigned int misalign;
9606 		    unsigned HOST_WIDE_INT align;
9607 
9608 		    if (memory_access_type == VMAT_GATHER_SCATTER
9609 			&& gs_info.ifn != IFN_LAST)
9610 		      {
9611 			if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9612 			  vec_offset = vec_offsets[j];
9613 			tree zero = build_zero_cst (vectype);
9614 			tree scale = size_int (gs_info.scale);
9615 			gcall *call;
9616 			if (final_mask)
9617 			  call = gimple_build_call_internal
9618 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9619 			     vec_offset, scale, zero, final_mask);
9620 			else
9621 			  call = gimple_build_call_internal
9622 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
9623 			     vec_offset, scale, zero);
9624 			gimple_call_set_nothrow (call, true);
9625 			new_stmt = call;
9626 			data_ref = NULL_TREE;
9627 			break;
9628 		      }
9629 		    else if (memory_access_type == VMAT_GATHER_SCATTER)
9630 		      {
9631 			/* Emulated gather-scatter.  */
9632 			gcc_assert (!final_mask);
9633 			unsigned HOST_WIDE_INT const_nunits
9634 			  = nunits.to_constant ();
9635 			unsigned HOST_WIDE_INT const_offset_nunits
9636 			  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9637 			      .to_constant ();
9638 			vec<constructor_elt, va_gc> *ctor_elts;
9639 			vec_alloc (ctor_elts, const_nunits);
9640 			gimple_seq stmts = NULL;
9641 			/* We support offset vectors with more elements
9642 			   than the data vector for now.  */
9643 			unsigned HOST_WIDE_INT factor
9644 			  = const_offset_nunits / const_nunits;
9645 			vec_offset = vec_offsets[j / factor];
9646 			unsigned elt_offset = (j % factor) * const_nunits;
9647 			tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9648 			tree scale = size_int (gs_info.scale);
9649 			align
9650 			  = get_object_alignment (DR_REF (first_dr_info->dr));
9651 			tree ltype = build_aligned_type (TREE_TYPE (vectype),
9652 							 align);
9653 			for (unsigned k = 0; k < const_nunits; ++k)
9654 			  {
9655 			    tree boff = size_binop (MULT_EXPR,
9656 						    TYPE_SIZE (idx_type),
9657 						    bitsize_int
9658 						      (k + elt_offset));
9659 			    tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9660 						     idx_type, vec_offset,
9661 						     TYPE_SIZE (idx_type),
9662 						     boff);
9663 			    idx = gimple_convert (&stmts, sizetype, idx);
9664 			    idx = gimple_build (&stmts, MULT_EXPR,
9665 						sizetype, idx, scale);
9666 			    tree ptr = gimple_build (&stmts, PLUS_EXPR,
9667 						     TREE_TYPE (dataref_ptr),
9668 						     dataref_ptr, idx);
9669 			    ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9670 			    tree elt = make_ssa_name (TREE_TYPE (vectype));
9671 			    tree ref = build2 (MEM_REF, ltype, ptr,
9672 					       build_int_cst (ref_type, 0));
9673 			    new_stmt = gimple_build_assign (elt, ref);
9674 			    gimple_seq_add_stmt (&stmts, new_stmt);
9675 			    CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9676 			  }
9677 			gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9678 			new_stmt = gimple_build_assign (NULL_TREE,
9679 							build_constructor
9680 							  (vectype, ctor_elts));
9681 			data_ref = NULL_TREE;
9682 			break;
9683 		      }
9684 
9685 		    align =
9686 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9687 		    if (alignment_support_scheme == dr_aligned)
9688 		      misalign = 0;
9689 		    else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9690 		      {
9691 			align = dr_alignment
9692 			  (vect_dr_behavior (vinfo, first_dr_info));
9693 			misalign = 0;
9694 		      }
9695 		    else
9696 		      misalign = misalignment;
9697 		    if (dataref_offset == NULL_TREE
9698 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
9699 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9700 					      align, misalign);
9701 		    align = least_bit_hwi (misalign | align);
9702 
9703 		    if (final_mask)
9704 		      {
9705 			tree ptr = build_int_cst (ref_type,
9706 						  align * BITS_PER_UNIT);
9707 			gcall *call
9708 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9709 							dataref_ptr, ptr,
9710 							final_mask);
9711 			gimple_call_set_nothrow (call, true);
9712 			new_stmt = call;
9713 			data_ref = NULL_TREE;
9714 		      }
9715 		    else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9716 		      {
9717 			tree final_len
9718 			  = vect_get_loop_len (loop_vinfo, loop_lens,
9719 					       vec_num * ncopies,
9720 					       vec_num * j + i);
9721 			tree ptr = build_int_cst (ref_type,
9722 						  align * BITS_PER_UNIT);
9723 			gcall *call
9724 			  = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9725 							dataref_ptr, ptr,
9726 							final_len);
9727 			gimple_call_set_nothrow (call, true);
9728 			new_stmt = call;
9729 			data_ref = NULL_TREE;
9730 
9731 			/* Need conversion if it's wrapped with VnQI.  */
9732 			machine_mode vmode = TYPE_MODE (vectype);
9733 			opt_machine_mode new_ovmode
9734 			  = get_len_load_store_mode (vmode, true);
9735 			machine_mode new_vmode = new_ovmode.require ();
9736 			if (vmode != new_vmode)
9737 			  {
9738 			    tree qi_type = unsigned_intQI_type_node;
9739 			    tree new_vtype
9740 			      = build_vector_type_for_mode (qi_type, new_vmode);
9741 			    tree var = vect_get_new_ssa_name (new_vtype,
9742 							      vect_simple_var);
9743 			    gimple_set_lhs (call, var);
9744 			    vect_finish_stmt_generation (vinfo, stmt_info, call,
9745 							 gsi);
9746 			    tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9747 			    new_stmt
9748 			      = gimple_build_assign (vec_dest,
9749 						     VIEW_CONVERT_EXPR, op);
9750 			  }
9751 		      }
9752 		    else
9753 		      {
9754 			tree ltype = vectype;
9755 			tree new_vtype = NULL_TREE;
9756 			unsigned HOST_WIDE_INT gap
9757 			  = DR_GROUP_GAP (first_stmt_info);
9758 			unsigned int vect_align
9759 			  = vect_known_alignment_in_bytes (first_dr_info,
9760 							   vectype);
9761 			unsigned int scalar_dr_size
9762 			  = vect_get_scalar_dr_size (first_dr_info);
9763 			/* If there's no peeling for gaps but we have a gap
9764 			   with slp loads then load the lower half of the
9765 			   vector only.  See get_group_load_store_type for
9766 			   when we apply this optimization.  */
9767 			if (slp
9768 			    && loop_vinfo
9769 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9770 			    && gap != 0
9771 			    && known_eq (nunits, (group_size - gap) * 2)
9772 			    && known_eq (nunits, group_size)
9773 			    && gap >= (vect_align / scalar_dr_size))
9774 			  {
9775 			    tree half_vtype;
9776 			    new_vtype
9777 			      = vector_vector_composition_type (vectype, 2,
9778 								&half_vtype);
9779 			    if (new_vtype != NULL_TREE)
9780 			      ltype = half_vtype;
9781 			  }
9782 			tree offset
9783 			  = (dataref_offset ? dataref_offset
9784 					    : build_int_cst (ref_type, 0));
9785 			if (ltype != vectype
9786 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9787 			  {
9788 			    unsigned HOST_WIDE_INT gap_offset
9789 			      = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9790 			    tree gapcst = build_int_cst (ref_type, gap_offset);
9791 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
9792 			  }
9793 			data_ref
9794 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9795 			if (alignment_support_scheme == dr_aligned)
9796 			  ;
9797 			else
9798 			  TREE_TYPE (data_ref)
9799 			    = build_aligned_type (TREE_TYPE (data_ref),
9800 						  align * BITS_PER_UNIT);
9801 			if (ltype != vectype)
9802 			  {
9803 			    vect_copy_ref_info (data_ref,
9804 						DR_REF (first_dr_info->dr));
9805 			    tree tem = make_ssa_name (ltype);
9806 			    new_stmt = gimple_build_assign (tem, data_ref);
9807 			    vect_finish_stmt_generation (vinfo, stmt_info,
9808 							 new_stmt, gsi);
9809 			    data_ref = NULL;
9810 			    vec<constructor_elt, va_gc> *v;
9811 			    vec_alloc (v, 2);
9812 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9813 			      {
9814 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9815 							build_zero_cst (ltype));
9816 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9817 			      }
9818 			    else
9819 			      {
9820 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9821 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9822 							build_zero_cst (ltype));
9823 			      }
9824 			    gcc_assert (new_vtype != NULL_TREE);
9825 			    if (new_vtype == vectype)
9826 			      new_stmt = gimple_build_assign (
9827 				vec_dest, build_constructor (vectype, v));
9828 			    else
9829 			      {
9830 				tree new_vname = make_ssa_name (new_vtype);
9831 				new_stmt = gimple_build_assign (
9832 				  new_vname, build_constructor (new_vtype, v));
9833 				vect_finish_stmt_generation (vinfo, stmt_info,
9834 							     new_stmt, gsi);
9835 				new_stmt = gimple_build_assign (
9836 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9837 						    new_vname));
9838 			      }
9839 			  }
9840 		      }
9841 		    break;
9842 		  }
9843 		case dr_explicit_realign:
9844 		  {
9845 		    tree ptr, bump;
9846 
9847 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9848 
9849 		    if (compute_in_loop)
9850 		      msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9851 						    &realignment_token,
9852 						    dr_explicit_realign,
9853 						    dataref_ptr, NULL);
9854 
9855 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9856 		      ptr = copy_ssa_name (dataref_ptr);
9857 		    else
9858 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9859 		    // For explicit realign the target alignment should be
9860 		    // known at compile time.
9861 		    unsigned HOST_WIDE_INT align =
9862 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9863 		    new_stmt = gimple_build_assign
9864 				 (ptr, BIT_AND_EXPR, dataref_ptr,
9865 				  build_int_cst
9866 				  (TREE_TYPE (dataref_ptr),
9867 				   -(HOST_WIDE_INT) align));
9868 		    vect_finish_stmt_generation (vinfo, stmt_info,
9869 						 new_stmt, gsi);
9870 		    data_ref
9871 		      = build2 (MEM_REF, vectype, ptr,
9872 				build_int_cst (ref_type, 0));
9873 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9874 		    vec_dest = vect_create_destination_var (scalar_dest,
9875 							    vectype);
9876 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
9877 		    new_temp = make_ssa_name (vec_dest, new_stmt);
9878 		    gimple_assign_set_lhs (new_stmt, new_temp);
9879 		    gimple_move_vops (new_stmt, stmt_info->stmt);
9880 		    vect_finish_stmt_generation (vinfo, stmt_info,
9881 						 new_stmt, gsi);
9882 		    msq = new_temp;
9883 
9884 		    bump = size_binop (MULT_EXPR, vs,
9885 				       TYPE_SIZE_UNIT (elem_type));
9886 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
9887 		    ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9888 					   stmt_info, bump);
9889 		    new_stmt = gimple_build_assign
9890 				 (NULL_TREE, BIT_AND_EXPR, ptr,
9891 				  build_int_cst
9892 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9893 		    ptr = copy_ssa_name (ptr, new_stmt);
9894 		    gimple_assign_set_lhs (new_stmt, ptr);
9895 		    vect_finish_stmt_generation (vinfo, stmt_info,
9896 						 new_stmt, gsi);
9897 		    data_ref
9898 		      = build2 (MEM_REF, vectype, ptr,
9899 				build_int_cst (ref_type, 0));
9900 		    break;
9901 		  }
9902 		case dr_explicit_realign_optimized:
9903 		  {
9904 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9905 		      new_temp = copy_ssa_name (dataref_ptr);
9906 		    else
9907 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9908 		    // We should only be doing this if we know the target
9909 		    // alignment at compile time.
9910 		    unsigned HOST_WIDE_INT align =
9911 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9912 		    new_stmt = gimple_build_assign
9913 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
9914 		       build_int_cst (TREE_TYPE (dataref_ptr),
9915 				     -(HOST_WIDE_INT) align));
9916 		    vect_finish_stmt_generation (vinfo, stmt_info,
9917 						 new_stmt, gsi);
9918 		    data_ref
9919 		      = build2 (MEM_REF, vectype, new_temp,
9920 				build_int_cst (ref_type, 0));
9921 		    break;
9922 		  }
9923 		default:
9924 		  gcc_unreachable ();
9925 		}
9926 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
9927 	      /* DATA_REF is null if we've already built the statement.  */
9928 	      if (data_ref)
9929 		{
9930 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9931 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
9932 		}
9933 	      new_temp = make_ssa_name (vec_dest, new_stmt);
9934 	      gimple_set_lhs (new_stmt, new_temp);
9935 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9936 
9937 	      /* 3. Handle explicit realignment if necessary/supported.
9938 		 Create in loop:
9939 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
9940 	      if (alignment_support_scheme == dr_explicit_realign_optimized
9941 		  || alignment_support_scheme == dr_explicit_realign)
9942 		{
9943 		  lsq = gimple_assign_lhs (new_stmt);
9944 		  if (!realignment_token)
9945 		    realignment_token = dataref_ptr;
9946 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
9947 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9948 						  msq, lsq, realignment_token);
9949 		  new_temp = make_ssa_name (vec_dest, new_stmt);
9950 		  gimple_assign_set_lhs (new_stmt, new_temp);
9951 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9952 
9953 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
9954 		    {
9955 		      gcc_assert (phi);
9956 		      if (i == vec_num - 1 && j == ncopies - 1)
9957 			add_phi_arg (phi, lsq,
9958 				     loop_latch_edge (containing_loop),
9959 				     UNKNOWN_LOCATION);
9960 		      msq = lsq;
9961 		    }
9962 		}
9963 
9964 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9965 		{
9966 		  tree perm_mask = perm_mask_for_reverse (vectype);
9967 		  new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9968 						   perm_mask, stmt_info, gsi);
9969 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
9970 		}
9971 
9972 	      /* Collect vector loads and later create their permutation in
9973 		 vect_transform_grouped_load ().  */
9974 	      if (grouped_load || slp_perm)
9975 		dr_chain.quick_push (new_temp);
9976 
9977 	      /* Store vector loads in the corresponding SLP_NODE.  */
9978 	      if (slp && !slp_perm)
9979 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9980 
9981 	      /* With SLP permutation we load the gaps as well, without
9982 	         we need to skip the gaps after we manage to fully load
9983 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
9984 	      group_elt += nunits;
9985 	      if (maybe_ne (group_gap_adj, 0U)
9986 		  && !slp_perm
9987 		  && known_eq (group_elt, group_size - group_gap_adj))
9988 		{
9989 		  poly_wide_int bump_val
9990 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9991 		       * group_gap_adj);
9992 		  if (tree_int_cst_sgn
9993 			(vect_dr_behavior (vinfo, dr_info)->step) == -1)
9994 		    bump_val = -bump_val;
9995 		  tree bump = wide_int_to_tree (sizetype, bump_val);
9996 		  dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9997 						 gsi, stmt_info, bump);
9998 		  group_elt = 0;
9999 		}
10000 	    }
10001 	  /* Bump the vector pointer to account for a gap or for excess
10002 	     elements loaded for a permuted SLP load.  */
10003 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10004 	    {
10005 	      poly_wide_int bump_val
10006 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10007 		   * group_gap_adj);
10008 	      if (tree_int_cst_sgn
10009 		    (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10010 		bump_val = -bump_val;
10011 	      tree bump = wide_int_to_tree (sizetype, bump_val);
10012 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10013 					     stmt_info, bump);
10014 	    }
10015 	}
10016 
10017       if (slp && !slp_perm)
10018 	continue;
10019 
10020       if (slp_perm)
10021         {
10022 	  unsigned n_perms;
10023 	  /* For SLP we know we've seen all possible uses of dr_chain so
10024 	     direct vect_transform_slp_perm_load to DCE the unused parts.
10025 	     ???  This is a hack to prevent compile-time issues as seen
10026 	     in PR101120 and friends.  */
10027 	  bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10028 						  gsi, vf, false, &n_perms,
10029 						  nullptr, true);
10030 	  gcc_assert (ok);
10031         }
10032       else
10033         {
10034           if (grouped_load)
10035   	    {
10036 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
10037 		vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10038 					     group_size, gsi);
10039 	      *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10040 	    }
10041           else
10042 	    {
10043 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10044 	    }
10045         }
10046       dr_chain.release ();
10047     }
10048   if (!slp)
10049     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10050 
10051   return true;
10052 }
10053 
10054 /* Function vect_is_simple_cond.
10055 
10056    Input:
10057    LOOP - the loop that is being vectorized.
10058    COND - Condition that is checked for simple use.
10059 
10060    Output:
10061    *COMP_VECTYPE - the vector type for the comparison.
10062    *DTS - The def types for the arguments of the comparison
10063 
10064    Returns whether a COND can be vectorized.  Checks whether
10065    condition operands are supportable using vec_is_simple_use.  */
10066 
10067 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)10068 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10069 		     slp_tree slp_node, tree *comp_vectype,
10070 		     enum vect_def_type *dts, tree vectype)
10071 {
10072   tree lhs, rhs;
10073   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10074   slp_tree slp_op;
10075 
10076   /* Mask case.  */
10077   if (TREE_CODE (cond) == SSA_NAME
10078       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10079     {
10080       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10081 			       &slp_op, &dts[0], comp_vectype)
10082 	  || !*comp_vectype
10083 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10084 	return false;
10085       return true;
10086     }
10087 
10088   if (!COMPARISON_CLASS_P (cond))
10089     return false;
10090 
10091   lhs = TREE_OPERAND (cond, 0);
10092   rhs = TREE_OPERAND (cond, 1);
10093 
10094   if (TREE_CODE (lhs) == SSA_NAME)
10095     {
10096       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10097 			       &lhs, &slp_op, &dts[0], &vectype1))
10098 	return false;
10099     }
10100   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10101 	   || TREE_CODE (lhs) == FIXED_CST)
10102     dts[0] = vect_constant_def;
10103   else
10104     return false;
10105 
10106   if (TREE_CODE (rhs) == SSA_NAME)
10107     {
10108       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10109 			       &rhs, &slp_op, &dts[1], &vectype2))
10110 	return false;
10111     }
10112   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10113 	   || TREE_CODE (rhs) == FIXED_CST)
10114     dts[1] = vect_constant_def;
10115   else
10116     return false;
10117 
10118   if (vectype1 && vectype2
10119       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10120 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10121     return false;
10122 
10123   *comp_vectype = vectype1 ? vectype1 : vectype2;
10124   /* Invariant comparison.  */
10125   if (! *comp_vectype)
10126     {
10127       tree scalar_type = TREE_TYPE (lhs);
10128       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10129 	*comp_vectype = truth_type_for (vectype);
10130       else
10131 	{
10132 	  /* If we can widen the comparison to match vectype do so.  */
10133 	  if (INTEGRAL_TYPE_P (scalar_type)
10134 	      && !slp_node
10135 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10136 				  TYPE_SIZE (TREE_TYPE (vectype))))
10137 	    scalar_type = build_nonstandard_integer_type
10138 	      (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10139 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10140 						       slp_node);
10141 	}
10142     }
10143 
10144   return true;
10145 }
10146 
10147 /* vectorizable_condition.
10148 
10149    Check if STMT_INFO is conditional modify expression that can be vectorized.
10150    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10151    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
10152    at GSI.
10153 
10154    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10155 
10156    Return true if STMT_INFO is vectorizable in this way.  */
10157 
10158 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10159 vectorizable_condition (vec_info *vinfo,
10160 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10161 			gimple **vec_stmt,
10162 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10163 {
10164   tree scalar_dest = NULL_TREE;
10165   tree vec_dest = NULL_TREE;
10166   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10167   tree then_clause, else_clause;
10168   tree comp_vectype = NULL_TREE;
10169   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10170   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10171   tree vec_compare;
10172   tree new_temp;
10173   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10174   enum vect_def_type dts[4]
10175     = {vect_unknown_def_type, vect_unknown_def_type,
10176        vect_unknown_def_type, vect_unknown_def_type};
10177   int ndts = 4;
10178   int ncopies;
10179   int vec_num;
10180   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10181   int i;
10182   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10183   vec<tree> vec_oprnds0 = vNULL;
10184   vec<tree> vec_oprnds1 = vNULL;
10185   vec<tree> vec_oprnds2 = vNULL;
10186   vec<tree> vec_oprnds3 = vNULL;
10187   tree vec_cmp_type;
10188   bool masked = false;
10189 
10190   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10191     return false;
10192 
10193   /* Is vectorizable conditional operation?  */
10194   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10195   if (!stmt)
10196     return false;
10197 
10198   code = gimple_assign_rhs_code (stmt);
10199   if (code != COND_EXPR)
10200     return false;
10201 
10202   stmt_vec_info reduc_info = NULL;
10203   int reduc_index = -1;
10204   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10205   bool for_reduction
10206     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10207   if (for_reduction)
10208     {
10209       if (STMT_SLP_TYPE (stmt_info))
10210 	return false;
10211       reduc_info = info_for_reduction (vinfo, stmt_info);
10212       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10213       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10214       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10215 		  || reduc_index != -1);
10216     }
10217   else
10218     {
10219       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10220 	return false;
10221     }
10222 
10223   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10224   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10225 
10226   if (slp_node)
10227     {
10228       ncopies = 1;
10229       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10230     }
10231   else
10232     {
10233       ncopies = vect_get_num_copies (loop_vinfo, vectype);
10234       vec_num = 1;
10235     }
10236 
10237   gcc_assert (ncopies >= 1);
10238   if (for_reduction && ncopies > 1)
10239     return false; /* FORNOW */
10240 
10241   cond_expr = gimple_assign_rhs1 (stmt);
10242 
10243   if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10244 			    &comp_vectype, &dts[0], vectype)
10245       || !comp_vectype)
10246     return false;
10247 
10248   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10249   slp_tree then_slp_node, else_slp_node;
10250   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10251 			   &then_clause, &then_slp_node, &dts[2], &vectype1))
10252     return false;
10253   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10254 			   &else_clause, &else_slp_node, &dts[3], &vectype2))
10255     return false;
10256 
10257   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10258     return false;
10259 
10260   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10261     return false;
10262 
10263   masked = !COMPARISON_CLASS_P (cond_expr);
10264   vec_cmp_type = truth_type_for (comp_vectype);
10265 
10266   if (vec_cmp_type == NULL_TREE)
10267     return false;
10268 
10269   cond_code = TREE_CODE (cond_expr);
10270   if (!masked)
10271     {
10272       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10273       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10274     }
10275 
10276   /* For conditional reductions, the "then" value needs to be the candidate
10277      value calculated by this iteration while the "else" value needs to be
10278      the result carried over from previous iterations.  If the COND_EXPR
10279      is the other way around, we need to swap it.  */
10280   bool must_invert_cmp_result = false;
10281   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10282     {
10283       if (masked)
10284 	must_invert_cmp_result = true;
10285       else
10286 	{
10287 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10288 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10289 	  if (new_code == ERROR_MARK)
10290 	    must_invert_cmp_result = true;
10291 	  else
10292 	    {
10293 	      cond_code = new_code;
10294 	      /* Make sure we don't accidentally use the old condition.  */
10295 	      cond_expr = NULL_TREE;
10296 	    }
10297 	}
10298       std::swap (then_clause, else_clause);
10299     }
10300 
10301   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10302     {
10303       /* Boolean values may have another representation in vectors
10304 	 and therefore we prefer bit operations over comparison for
10305 	 them (which also works for scalar masks).  We store opcodes
10306 	 to use in bitop1 and bitop2.  Statement is vectorized as
10307 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10308 	 depending on bitop1 and bitop2 arity.  */
10309       switch (cond_code)
10310 	{
10311 	case GT_EXPR:
10312 	  bitop1 = BIT_NOT_EXPR;
10313 	  bitop2 = BIT_AND_EXPR;
10314 	  break;
10315 	case GE_EXPR:
10316 	  bitop1 = BIT_NOT_EXPR;
10317 	  bitop2 = BIT_IOR_EXPR;
10318 	  break;
10319 	case LT_EXPR:
10320 	  bitop1 = BIT_NOT_EXPR;
10321 	  bitop2 = BIT_AND_EXPR;
10322 	  std::swap (cond_expr0, cond_expr1);
10323 	  break;
10324 	case LE_EXPR:
10325 	  bitop1 = BIT_NOT_EXPR;
10326 	  bitop2 = BIT_IOR_EXPR;
10327 	  std::swap (cond_expr0, cond_expr1);
10328 	  break;
10329 	case NE_EXPR:
10330 	  bitop1 = BIT_XOR_EXPR;
10331 	  break;
10332 	case EQ_EXPR:
10333 	  bitop1 = BIT_XOR_EXPR;
10334 	  bitop2 = BIT_NOT_EXPR;
10335 	  break;
10336 	default:
10337 	  return false;
10338 	}
10339       cond_code = SSA_NAME;
10340     }
10341 
10342   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10343       && reduction_type == EXTRACT_LAST_REDUCTION
10344       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10345     {
10346       if (dump_enabled_p ())
10347 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10348 			 "reduction comparison operation not supported.\n");
10349       return false;
10350     }
10351 
10352   if (!vec_stmt)
10353     {
10354       if (bitop1 != NOP_EXPR)
10355 	{
10356 	  machine_mode mode = TYPE_MODE (comp_vectype);
10357 	  optab optab;
10358 
10359 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10360 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10361 	    return false;
10362 
10363 	  if (bitop2 != NOP_EXPR)
10364 	    {
10365 	      optab = optab_for_tree_code (bitop2, comp_vectype,
10366 					   optab_default);
10367 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10368 		return false;
10369 	    }
10370 	}
10371 
10372       vect_cost_for_stmt kind = vector_stmt;
10373       if (reduction_type == EXTRACT_LAST_REDUCTION)
10374 	/* Count one reduction-like operation per vector.  */
10375 	kind = vec_to_scalar;
10376       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10377 	return false;
10378 
10379       if (slp_node
10380 	  && (!vect_maybe_update_slp_op_vectype
10381 		 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10382 	      || (op_adjust == 1
10383 		  && !vect_maybe_update_slp_op_vectype
10384 			(SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10385 	      || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10386 	      || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10387 	{
10388 	  if (dump_enabled_p ())
10389 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10390 			     "incompatible vector types for invariants\n");
10391 	  return false;
10392 	}
10393 
10394       if (loop_vinfo && for_reduction
10395 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10396 	{
10397 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10398 	    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10399 				   ncopies * vec_num, vectype, NULL);
10400 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
10401 	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10402 	    {
10403 	      if (dump_enabled_p ())
10404 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10405 				 "conditional reduction prevents the use"
10406 				 " of partial vectors.\n");
10407 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10408 	    }
10409 	}
10410 
10411       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10412       vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10413 			      cost_vec, kind);
10414       return true;
10415     }
10416 
10417   /* Transform.  */
10418 
10419   /* Handle def.  */
10420   scalar_dest = gimple_assign_lhs (stmt);
10421   if (reduction_type != EXTRACT_LAST_REDUCTION)
10422     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10423 
10424   bool swap_cond_operands = false;
10425 
10426   /* See whether another part of the vectorized code applies a loop
10427      mask to the condition, or to its inverse.  */
10428 
10429   vec_loop_masks *masks = NULL;
10430   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10431     {
10432       if (reduction_type == EXTRACT_LAST_REDUCTION)
10433 	masks = &LOOP_VINFO_MASKS (loop_vinfo);
10434       else
10435 	{
10436 	  scalar_cond_masked_key cond (cond_expr, ncopies);
10437 	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10438 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
10439 	  else
10440 	    {
10441 	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10442 	      tree_code orig_code = cond.code;
10443 	      cond.code = invert_tree_comparison (cond.code, honor_nans);
10444 	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10445 		{
10446 		  masks = &LOOP_VINFO_MASKS (loop_vinfo);
10447 		  cond_code = cond.code;
10448 		  swap_cond_operands = true;
10449 		}
10450 	      else
10451 		{
10452 		  /* Try the inverse of the current mask.  We check if the
10453 		     inverse mask is live and if so we generate a negate of
10454 		     the current mask such that we still honor NaNs.  */
10455 		  cond.inverted_p = true;
10456 		  cond.code = orig_code;
10457 		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10458 		    {
10459 		      bitop1 = orig_code;
10460 		      bitop2 = BIT_NOT_EXPR;
10461 		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
10462 		      cond_code = cond.code;
10463 		      swap_cond_operands = true;
10464 		    }
10465 		}
10466 	    }
10467 	}
10468     }
10469 
10470   /* Handle cond expr.  */
10471   if (masked)
10472     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10473 		       cond_expr, &vec_oprnds0, comp_vectype,
10474 		       then_clause, &vec_oprnds2, vectype,
10475 		       reduction_type != EXTRACT_LAST_REDUCTION
10476 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10477   else
10478     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10479 		       cond_expr0, &vec_oprnds0, comp_vectype,
10480 		       cond_expr1, &vec_oprnds1, comp_vectype,
10481 		       then_clause, &vec_oprnds2, vectype,
10482 		       reduction_type != EXTRACT_LAST_REDUCTION
10483 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10484 
10485   /* Arguments are ready.  Create the new vector stmt.  */
10486   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10487     {
10488       vec_then_clause = vec_oprnds2[i];
10489       if (reduction_type != EXTRACT_LAST_REDUCTION)
10490 	vec_else_clause = vec_oprnds3[i];
10491 
10492       if (swap_cond_operands)
10493 	std::swap (vec_then_clause, vec_else_clause);
10494 
10495       if (masked)
10496 	vec_compare = vec_cond_lhs;
10497       else
10498 	{
10499 	  vec_cond_rhs = vec_oprnds1[i];
10500 	  if (bitop1 == NOP_EXPR)
10501 	    {
10502 	      gimple_seq stmts = NULL;
10503 	      vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10504 					   vec_cond_lhs, vec_cond_rhs);
10505 	      gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10506 	    }
10507 	  else
10508 	    {
10509 	      new_temp = make_ssa_name (vec_cmp_type);
10510 	      gassign *new_stmt;
10511 	      if (bitop1 == BIT_NOT_EXPR)
10512 		new_stmt = gimple_build_assign (new_temp, bitop1,
10513 						vec_cond_rhs);
10514 	      else
10515 		new_stmt
10516 		  = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10517 					 vec_cond_rhs);
10518 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10519 	      if (bitop2 == NOP_EXPR)
10520 		vec_compare = new_temp;
10521 	      else if (bitop2 == BIT_NOT_EXPR)
10522 		{
10523 		  /* Instead of doing ~x ? y : z do x ? z : y.  */
10524 		  vec_compare = new_temp;
10525 		  std::swap (vec_then_clause, vec_else_clause);
10526 		}
10527 	      else
10528 		{
10529 		  vec_compare = make_ssa_name (vec_cmp_type);
10530 		  new_stmt
10531 		    = gimple_build_assign (vec_compare, bitop2,
10532 					   vec_cond_lhs, new_temp);
10533 		  vect_finish_stmt_generation (vinfo, stmt_info,
10534 					       new_stmt, gsi);
10535 		}
10536 	    }
10537 	}
10538 
10539       /* If we decided to apply a loop mask to the result of the vector
10540 	 comparison, AND the comparison with the mask now.  Later passes
10541 	 should then be able to reuse the AND results between mulitple
10542 	 vector statements.
10543 
10544 	 For example:
10545 	 for (int i = 0; i < 100; ++i)
10546 	 x[i] = y[i] ? z[i] : 10;
10547 
10548 	 results in following optimized GIMPLE:
10549 
10550 	 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10551 	 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10552 	 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10553 	 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10554 	 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10555 	 vect_iftmp.11_47, { 10, ... }>;
10556 
10557 	 instead of using a masked and unmasked forms of
10558 	 vec != { 0, ... } (masked in the MASK_LOAD,
10559 	 unmasked in the VEC_COND_EXPR).  */
10560 
10561       /* Force vec_compare to be an SSA_NAME rather than a comparison,
10562 	 in cases where that's necessary.  */
10563 
10564       if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10565 	{
10566 	  if (!is_gimple_val (vec_compare))
10567 	    {
10568 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10569 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10570 						       vec_compare);
10571 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10572 	      vec_compare = vec_compare_name;
10573 	    }
10574 
10575 	  if (must_invert_cmp_result)
10576 	    {
10577 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10578 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10579 						       BIT_NOT_EXPR,
10580 						       vec_compare);
10581 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10582 	      vec_compare = vec_compare_name;
10583 	    }
10584 
10585 	  if (masks)
10586 	    {
10587 	      tree loop_mask
10588 		= vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10589 				      vectype, i);
10590 	      tree tmp2 = make_ssa_name (vec_cmp_type);
10591 	      gassign *g
10592 		= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10593 				       loop_mask);
10594 	      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10595 	      vec_compare = tmp2;
10596 	    }
10597 	}
10598 
10599       gimple *new_stmt;
10600       if (reduction_type == EXTRACT_LAST_REDUCTION)
10601 	{
10602 	  gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10603 	  tree lhs = gimple_get_lhs (old_stmt);
10604 	  new_stmt = gimple_build_call_internal
10605 	      (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10606 	       vec_then_clause);
10607 	  gimple_call_set_lhs (new_stmt, lhs);
10608 	  SSA_NAME_DEF_STMT (lhs) = new_stmt;
10609 	  if (old_stmt == gsi_stmt (*gsi))
10610 	    vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10611 	  else
10612 	    {
10613 	      /* In this case we're moving the definition to later in the
10614 		 block.  That doesn't matter because the only uses of the
10615 		 lhs are in phi statements.  */
10616 	      gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10617 	      gsi_remove (&old_gsi, true);
10618 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10619 	    }
10620 	}
10621       else
10622 	{
10623 	  new_temp = make_ssa_name (vec_dest);
10624 	  new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10625 					  vec_then_clause, vec_else_clause);
10626 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10627 	}
10628       if (slp_node)
10629 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10630       else
10631 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10632     }
10633 
10634   if (!slp_node)
10635     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10636 
10637   vec_oprnds0.release ();
10638   vec_oprnds1.release ();
10639   vec_oprnds2.release ();
10640   vec_oprnds3.release ();
10641 
10642   return true;
10643 }
10644 
10645 /* vectorizable_comparison.
10646 
10647    Check if STMT_INFO is comparison expression that can be vectorized.
10648    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10649    comparison, put it in VEC_STMT, and insert it at GSI.
10650 
10651    Return true if STMT_INFO is vectorizable in this way.  */
10652 
10653 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10654 vectorizable_comparison (vec_info *vinfo,
10655 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10656 			 gimple **vec_stmt,
10657 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10658 {
10659   tree lhs, rhs1, rhs2;
10660   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10661   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10662   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10663   tree new_temp;
10664   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10665   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10666   int ndts = 2;
10667   poly_uint64 nunits;
10668   int ncopies;
10669   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10670   int i;
10671   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10672   vec<tree> vec_oprnds0 = vNULL;
10673   vec<tree> vec_oprnds1 = vNULL;
10674   tree mask_type;
10675   tree mask;
10676 
10677   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10678     return false;
10679 
10680   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10681     return false;
10682 
10683   mask_type = vectype;
10684   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10685 
10686   if (slp_node)
10687     ncopies = 1;
10688   else
10689     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10690 
10691   gcc_assert (ncopies >= 1);
10692   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10693     return false;
10694 
10695   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10696   if (!stmt)
10697     return false;
10698 
10699   code = gimple_assign_rhs_code (stmt);
10700 
10701   if (TREE_CODE_CLASS (code) != tcc_comparison)
10702     return false;
10703 
10704   slp_tree slp_rhs1, slp_rhs2;
10705   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10706 			   0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10707     return false;
10708 
10709   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10710 			   1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10711     return false;
10712 
10713   if (vectype1 && vectype2
10714       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10715 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10716     return false;
10717 
10718   vectype = vectype1 ? vectype1 : vectype2;
10719 
10720   /* Invariant comparison.  */
10721   if (!vectype)
10722     {
10723       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10724 	vectype = mask_type;
10725       else
10726 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10727 					       slp_node);
10728       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10729 	return false;
10730     }
10731   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10732     return false;
10733 
10734   /* Can't compare mask and non-mask types.  */
10735   if (vectype1 && vectype2
10736       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10737     return false;
10738 
10739   /* Boolean values may have another representation in vectors
10740      and therefore we prefer bit operations over comparison for
10741      them (which also works for scalar masks).  We store opcodes
10742      to use in bitop1 and bitop2.  Statement is vectorized as
10743        BITOP2 (rhs1 BITOP1 rhs2) or
10744        rhs1 BITOP2 (BITOP1 rhs2)
10745      depending on bitop1 and bitop2 arity.  */
10746   bool swap_p = false;
10747   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10748     {
10749       if (code == GT_EXPR)
10750 	{
10751 	  bitop1 = BIT_NOT_EXPR;
10752 	  bitop2 = BIT_AND_EXPR;
10753 	}
10754       else if (code == GE_EXPR)
10755 	{
10756 	  bitop1 = BIT_NOT_EXPR;
10757 	  bitop2 = BIT_IOR_EXPR;
10758 	}
10759       else if (code == LT_EXPR)
10760 	{
10761 	  bitop1 = BIT_NOT_EXPR;
10762 	  bitop2 = BIT_AND_EXPR;
10763 	  swap_p = true;
10764 	}
10765       else if (code == LE_EXPR)
10766 	{
10767 	  bitop1 = BIT_NOT_EXPR;
10768 	  bitop2 = BIT_IOR_EXPR;
10769 	  swap_p = true;
10770 	}
10771       else
10772 	{
10773 	  bitop1 = BIT_XOR_EXPR;
10774 	  if (code == EQ_EXPR)
10775 	    bitop2 = BIT_NOT_EXPR;
10776 	}
10777     }
10778 
10779   if (!vec_stmt)
10780     {
10781       if (bitop1 == NOP_EXPR)
10782 	{
10783 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10784 	    return false;
10785 	}
10786       else
10787 	{
10788 	  machine_mode mode = TYPE_MODE (vectype);
10789 	  optab optab;
10790 
10791 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
10792 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10793 	    return false;
10794 
10795 	  if (bitop2 != NOP_EXPR)
10796 	    {
10797 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
10798 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10799 		return false;
10800 	    }
10801 	}
10802 
10803       /* Put types on constant and invariant SLP children.  */
10804       if (slp_node
10805 	  && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10806 	      || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10807 	{
10808 	  if (dump_enabled_p ())
10809 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10810 			     "incompatible vector types for invariants\n");
10811 	  return false;
10812 	}
10813 
10814       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10815       vect_model_simple_cost (vinfo, stmt_info,
10816 			      ncopies * (1 + (bitop2 != NOP_EXPR)),
10817 			      dts, ndts, slp_node, cost_vec);
10818       return true;
10819     }
10820 
10821   /* Transform.  */
10822 
10823   /* Handle def.  */
10824   lhs = gimple_assign_lhs (stmt);
10825   mask = vect_create_destination_var (lhs, mask_type);
10826 
10827   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10828 		     rhs1, &vec_oprnds0, vectype,
10829 		     rhs2, &vec_oprnds1, vectype);
10830   if (swap_p)
10831     std::swap (vec_oprnds0, vec_oprnds1);
10832 
10833   /* Arguments are ready.  Create the new vector stmt.  */
10834   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10835     {
10836       gimple *new_stmt;
10837       vec_rhs2 = vec_oprnds1[i];
10838 
10839       new_temp = make_ssa_name (mask);
10840       if (bitop1 == NOP_EXPR)
10841 	{
10842 	  new_stmt = gimple_build_assign (new_temp, code,
10843 					  vec_rhs1, vec_rhs2);
10844 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10845 	}
10846       else
10847 	{
10848 	  if (bitop1 == BIT_NOT_EXPR)
10849 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10850 	  else
10851 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10852 					    vec_rhs2);
10853 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10854 	  if (bitop2 != NOP_EXPR)
10855 	    {
10856 	      tree res = make_ssa_name (mask);
10857 	      if (bitop2 == BIT_NOT_EXPR)
10858 		new_stmt = gimple_build_assign (res, bitop2, new_temp);
10859 	      else
10860 		new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10861 						new_temp);
10862 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10863 	    }
10864 	}
10865       if (slp_node)
10866 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10867       else
10868 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10869     }
10870 
10871   if (!slp_node)
10872     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10873 
10874   vec_oprnds0.release ();
10875   vec_oprnds1.release ();
10876 
10877   return true;
10878 }
10879 
10880 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10881    can handle all live statements in the node.  Otherwise return true
10882    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10883    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
10884 
10885 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10886 can_vectorize_live_stmts (vec_info *vinfo,
10887 			  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10888 			  slp_tree slp_node, slp_instance slp_node_instance,
10889 			  bool vec_stmt_p,
10890 			  stmt_vector_for_cost *cost_vec)
10891 {
10892   if (slp_node)
10893     {
10894       stmt_vec_info slp_stmt_info;
10895       unsigned int i;
10896       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10897 	{
10898 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
10899 	      && !vectorizable_live_operation (vinfo,
10900 					       slp_stmt_info, gsi, slp_node,
10901 					       slp_node_instance, i,
10902 					       vec_stmt_p, cost_vec))
10903 	    return false;
10904 	}
10905     }
10906   else if (STMT_VINFO_LIVE_P (stmt_info)
10907 	   && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10908 					    slp_node, slp_node_instance, -1,
10909 					    vec_stmt_p, cost_vec))
10910     return false;
10911 
10912   return true;
10913 }
10914 
10915 /* Make sure the statement is vectorizable.  */
10916 
10917 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10918 vect_analyze_stmt (vec_info *vinfo,
10919 		   stmt_vec_info stmt_info, bool *need_to_vectorize,
10920 		   slp_tree node, slp_instance node_instance,
10921 		   stmt_vector_for_cost *cost_vec)
10922 {
10923   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10924   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10925   bool ok;
10926   gimple_seq pattern_def_seq;
10927 
10928   if (dump_enabled_p ())
10929     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10930 		     stmt_info->stmt);
10931 
10932   if (gimple_has_volatile_ops (stmt_info->stmt))
10933     return opt_result::failure_at (stmt_info->stmt,
10934 				   "not vectorized:"
10935 				   " stmt has volatile operands: %G\n",
10936 				   stmt_info->stmt);
10937 
10938   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10939       && node == NULL
10940       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10941     {
10942       gimple_stmt_iterator si;
10943 
10944       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10945 	{
10946 	  stmt_vec_info pattern_def_stmt_info
10947 	    = vinfo->lookup_stmt (gsi_stmt (si));
10948 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10949 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10950 	    {
10951 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
10952 	      if (dump_enabled_p ())
10953 		dump_printf_loc (MSG_NOTE, vect_location,
10954 				 "==> examining pattern def statement: %G",
10955 				 pattern_def_stmt_info->stmt);
10956 
10957 	      opt_result res
10958 		= vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10959 				     need_to_vectorize, node, node_instance,
10960 				     cost_vec);
10961 	      if (!res)
10962 		return res;
10963 	    }
10964 	}
10965     }
10966 
10967   /* Skip stmts that do not need to be vectorized. In loops this is expected
10968      to include:
10969      - the COND_EXPR which is the loop exit condition
10970      - any LABEL_EXPRs in the loop
10971      - computations that are used only for array indexing or loop control.
10972      In basic blocks we only analyze statements that are a part of some SLP
10973      instance, therefore, all the statements are relevant.
10974 
10975      Pattern statement needs to be analyzed instead of the original statement
10976      if the original statement is not relevant.  Otherwise, we analyze both
10977      statements.  In basic blocks we are called from some SLP instance
10978      traversal, don't analyze pattern stmts instead, the pattern stmts
10979      already will be part of SLP instance.  */
10980 
10981   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10982   if (!STMT_VINFO_RELEVANT_P (stmt_info)
10983       && !STMT_VINFO_LIVE_P (stmt_info))
10984     {
10985       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10986 	  && pattern_stmt_info
10987 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10988 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10989         {
10990           /* Analyze PATTERN_STMT instead of the original stmt.  */
10991 	  stmt_info = pattern_stmt_info;
10992           if (dump_enabled_p ())
10993 	    dump_printf_loc (MSG_NOTE, vect_location,
10994 			     "==> examining pattern statement: %G",
10995 			     stmt_info->stmt);
10996         }
10997       else
10998         {
10999           if (dump_enabled_p ())
11000             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11001 
11002           return opt_result::success ();
11003         }
11004     }
11005   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11006 	   && node == NULL
11007 	   && pattern_stmt_info
11008 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11009 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11010     {
11011       /* Analyze PATTERN_STMT too.  */
11012       if (dump_enabled_p ())
11013 	dump_printf_loc (MSG_NOTE, vect_location,
11014 			 "==> examining pattern statement: %G",
11015 			 pattern_stmt_info->stmt);
11016 
11017       opt_result res
11018 	= vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11019 			     node_instance, cost_vec);
11020       if (!res)
11021 	return res;
11022    }
11023 
11024   switch (STMT_VINFO_DEF_TYPE (stmt_info))
11025     {
11026       case vect_internal_def:
11027         break;
11028 
11029       case vect_reduction_def:
11030       case vect_nested_cycle:
11031          gcc_assert (!bb_vinfo
11032 		     && (relevance == vect_used_in_outer
11033 			 || relevance == vect_used_in_outer_by_reduction
11034 			 || relevance == vect_used_by_reduction
11035 			 || relevance == vect_unused_in_scope
11036 			 || relevance == vect_used_only_live));
11037          break;
11038 
11039       case vect_induction_def:
11040 	gcc_assert (!bb_vinfo);
11041 	break;
11042 
11043       case vect_constant_def:
11044       case vect_external_def:
11045       case vect_unknown_def_type:
11046       default:
11047         gcc_unreachable ();
11048     }
11049 
11050   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11051   if (node)
11052     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11053 
11054   if (STMT_VINFO_RELEVANT_P (stmt_info))
11055     {
11056       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11057       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11058 		  || (call && gimple_call_lhs (call) == NULL_TREE));
11059       *need_to_vectorize = true;
11060     }
11061 
11062   if (PURE_SLP_STMT (stmt_info) && !node)
11063     {
11064       if (dump_enabled_p ())
11065 	dump_printf_loc (MSG_NOTE, vect_location,
11066 			 "handled only by SLP analysis\n");
11067       return opt_result::success ();
11068     }
11069 
11070   ok = true;
11071   if (!bb_vinfo
11072       && (STMT_VINFO_RELEVANT_P (stmt_info)
11073 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11074     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11075        -mveclibabi= takes preference over library functions with
11076        the simd attribute.  */
11077     ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11078 	  || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11079 					   cost_vec)
11080 	  || vectorizable_conversion (vinfo, stmt_info,
11081 				      NULL, NULL, node, cost_vec)
11082 	  || vectorizable_operation (vinfo, stmt_info,
11083 				     NULL, NULL, node, cost_vec)
11084 	  || vectorizable_assignment (vinfo, stmt_info,
11085 				      NULL, NULL, node, cost_vec)
11086 	  || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11087 	  || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11088 	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11089 				     node, node_instance, cost_vec)
11090 	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11091 				     NULL, node, cost_vec)
11092 	  || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11093 	  || vectorizable_condition (vinfo, stmt_info,
11094 				     NULL, NULL, node, cost_vec)
11095 	  || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11096 				      cost_vec)
11097 	  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11098 				  stmt_info, NULL, node));
11099   else
11100     {
11101       if (bb_vinfo)
11102 	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11103 	      || vectorizable_simd_clone_call (vinfo, stmt_info,
11104 					       NULL, NULL, node, cost_vec)
11105 	      || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11106 					  cost_vec)
11107 	      || vectorizable_shift (vinfo, stmt_info,
11108 				     NULL, NULL, node, cost_vec)
11109 	      || vectorizable_operation (vinfo, stmt_info,
11110 					 NULL, NULL, node, cost_vec)
11111 	      || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11112 					  cost_vec)
11113 	      || vectorizable_load (vinfo, stmt_info,
11114 				    NULL, NULL, node, cost_vec)
11115 	      || vectorizable_store (vinfo, stmt_info,
11116 				     NULL, NULL, node, cost_vec)
11117 	      || vectorizable_condition (vinfo, stmt_info,
11118 					 NULL, NULL, node, cost_vec)
11119 	      || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11120 					  cost_vec)
11121 	      || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11122     }
11123 
11124   if (node)
11125     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11126 
11127   if (!ok)
11128     return opt_result::failure_at (stmt_info->stmt,
11129 				   "not vectorized:"
11130 				   " relevant stmt not supported: %G",
11131 				   stmt_info->stmt);
11132 
11133   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11134       need extra handling, except for vectorizable reductions.  */
11135   if (!bb_vinfo
11136       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11137       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11138       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11139 				    stmt_info, NULL, node, node_instance,
11140 				    false, cost_vec))
11141     return opt_result::failure_at (stmt_info->stmt,
11142 				   "not vectorized:"
11143 				   " live stmt not supported: %G",
11144 				   stmt_info->stmt);
11145 
11146   return opt_result::success ();
11147 }
11148 
11149 
11150 /* Function vect_transform_stmt.
11151 
11152    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
11153 
11154 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11155 vect_transform_stmt (vec_info *vinfo,
11156 		     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11157 		     slp_tree slp_node, slp_instance slp_node_instance)
11158 {
11159   bool is_store = false;
11160   gimple *vec_stmt = NULL;
11161   bool done;
11162 
11163   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11164 
11165   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11166   if (slp_node)
11167     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11168 
11169   switch (STMT_VINFO_TYPE (stmt_info))
11170     {
11171     case type_demotion_vec_info_type:
11172     case type_promotion_vec_info_type:
11173     case type_conversion_vec_info_type:
11174       done = vectorizable_conversion (vinfo, stmt_info,
11175 				      gsi, &vec_stmt, slp_node, NULL);
11176       gcc_assert (done);
11177       break;
11178 
11179     case induc_vec_info_type:
11180       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11181 				     stmt_info, &vec_stmt, slp_node,
11182 				     NULL);
11183       gcc_assert (done);
11184       break;
11185 
11186     case shift_vec_info_type:
11187       done = vectorizable_shift (vinfo, stmt_info,
11188 				 gsi, &vec_stmt, slp_node, NULL);
11189       gcc_assert (done);
11190       break;
11191 
11192     case op_vec_info_type:
11193       done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11194 				     NULL);
11195       gcc_assert (done);
11196       break;
11197 
11198     case assignment_vec_info_type:
11199       done = vectorizable_assignment (vinfo, stmt_info,
11200 				      gsi, &vec_stmt, slp_node, NULL);
11201       gcc_assert (done);
11202       break;
11203 
11204     case load_vec_info_type:
11205       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11206 				NULL);
11207       gcc_assert (done);
11208       break;
11209 
11210     case store_vec_info_type:
11211       done = vectorizable_store (vinfo, stmt_info,
11212 				 gsi, &vec_stmt, slp_node, NULL);
11213       gcc_assert (done);
11214       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11215 	{
11216 	  /* In case of interleaving, the whole chain is vectorized when the
11217 	     last store in the chain is reached.  Store stmts before the last
11218 	     one are skipped, and there vec_stmt_info shouldn't be freed
11219 	     meanwhile.  */
11220 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11221 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11222 	    is_store = true;
11223 	}
11224       else
11225 	is_store = true;
11226       break;
11227 
11228     case condition_vec_info_type:
11229       done = vectorizable_condition (vinfo, stmt_info,
11230 				     gsi, &vec_stmt, slp_node, NULL);
11231       gcc_assert (done);
11232       break;
11233 
11234     case comparison_vec_info_type:
11235       done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11236 				      slp_node, NULL);
11237       gcc_assert (done);
11238       break;
11239 
11240     case call_vec_info_type:
11241       done = vectorizable_call (vinfo, stmt_info,
11242 				gsi, &vec_stmt, slp_node, NULL);
11243       break;
11244 
11245     case call_simd_clone_vec_info_type:
11246       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11247 					   slp_node, NULL);
11248       break;
11249 
11250     case reduc_vec_info_type:
11251       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11252 				       gsi, &vec_stmt, slp_node);
11253       gcc_assert (done);
11254       break;
11255 
11256     case cycle_phi_info_type:
11257       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11258 				       &vec_stmt, slp_node, slp_node_instance);
11259       gcc_assert (done);
11260       break;
11261 
11262     case lc_phi_info_type:
11263       done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11264 				  stmt_info, &vec_stmt, slp_node);
11265       gcc_assert (done);
11266       break;
11267 
11268     case phi_info_type:
11269       done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11270       gcc_assert (done);
11271       break;
11272 
11273     default:
11274       if (!STMT_VINFO_LIVE_P (stmt_info))
11275 	{
11276 	  if (dump_enabled_p ())
11277 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11278                              "stmt not supported.\n");
11279 	  gcc_unreachable ();
11280 	}
11281       done = true;
11282     }
11283 
11284   if (!slp_node && vec_stmt)
11285     gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11286 
11287   if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11288     {
11289       /* Handle stmts whose DEF is used outside the loop-nest that is
11290 	 being vectorized.  */
11291       done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11292 				       slp_node_instance, true, NULL);
11293       gcc_assert (done);
11294     }
11295 
11296   if (slp_node)
11297     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11298 
11299   return is_store;
11300 }
11301 
11302 
11303 /* Remove a group of stores (for SLP or interleaving), free their
11304    stmt_vec_info.  */
11305 
11306 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11307 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11308 {
11309   stmt_vec_info next_stmt_info = first_stmt_info;
11310 
11311   while (next_stmt_info)
11312     {
11313       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11314       next_stmt_info = vect_orig_stmt (next_stmt_info);
11315       /* Free the attached stmt_vec_info and remove the stmt.  */
11316       vinfo->remove_stmt (next_stmt_info);
11317       next_stmt_info = tmp;
11318     }
11319 }
11320 
11321 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11322    elements of type SCALAR_TYPE, or null if the target doesn't support
11323    such a type.
11324 
11325    If NUNITS is zero, return a vector type that contains elements of
11326    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11327 
11328    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11329    for this vectorization region and want to "autodetect" the best choice.
11330    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11331    and we want the new type to be interoperable with it.   PREVAILING_MODE
11332    in this case can be a scalar integer mode or a vector mode; when it
11333    is a vector mode, the function acts like a tree-level version of
11334    related_vector_mode.  */
11335 
11336 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11337 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11338 				     tree scalar_type, poly_uint64 nunits)
11339 {
11340   tree orig_scalar_type = scalar_type;
11341   scalar_mode inner_mode;
11342   machine_mode simd_mode;
11343   tree vectype;
11344 
11345   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11346       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11347     return NULL_TREE;
11348 
11349   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11350 
11351   /* For vector types of elements whose mode precision doesn't
11352      match their types precision we use a element type of mode
11353      precision.  The vectorization routines will have to make sure
11354      they support the proper result truncation/extension.
11355      We also make sure to build vector types with INTEGER_TYPE
11356      component type only.  */
11357   if (INTEGRAL_TYPE_P (scalar_type)
11358       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11359 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
11360     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11361 						  TYPE_UNSIGNED (scalar_type));
11362 
11363   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11364      When the component mode passes the above test simply use a type
11365      corresponding to that mode.  The theory is that any use that
11366      would cause problems with this will disable vectorization anyway.  */
11367   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11368 	   && !INTEGRAL_TYPE_P (scalar_type))
11369     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11370 
11371   /* We can't build a vector type of elements with alignment bigger than
11372      their size.  */
11373   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11374     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11375 						  TYPE_UNSIGNED (scalar_type));
11376 
11377   /* If we felt back to using the mode fail if there was
11378      no scalar type for it.  */
11379   if (scalar_type == NULL_TREE)
11380     return NULL_TREE;
11381 
11382   /* If no prevailing mode was supplied, use the mode the target prefers.
11383      Otherwise lookup a vector mode based on the prevailing mode.  */
11384   if (prevailing_mode == VOIDmode)
11385     {
11386       gcc_assert (known_eq (nunits, 0U));
11387       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11388       if (SCALAR_INT_MODE_P (simd_mode))
11389 	{
11390 	  /* Traditional behavior is not to take the integer mode
11391 	     literally, but simply to use it as a way of determining
11392 	     the vector size.  It is up to mode_for_vector to decide
11393 	     what the TYPE_MODE should be.
11394 
11395 	     Note that nunits == 1 is allowed in order to support single
11396 	     element vector types.  */
11397 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11398 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11399 	    return NULL_TREE;
11400 	}
11401     }
11402   else if (SCALAR_INT_MODE_P (prevailing_mode)
11403 	   || !related_vector_mode (prevailing_mode,
11404 				    inner_mode, nunits).exists (&simd_mode))
11405     {
11406       /* Fall back to using mode_for_vector, mostly in the hope of being
11407 	 able to use an integer mode.  */
11408       if (known_eq (nunits, 0U)
11409 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11410 	return NULL_TREE;
11411 
11412       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11413 	return NULL_TREE;
11414     }
11415 
11416   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11417 
11418   /* In cases where the mode was chosen by mode_for_vector, check that
11419      the target actually supports the chosen mode, or that it at least
11420      allows the vector mode to be replaced by a like-sized integer.  */
11421   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11422       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11423     return NULL_TREE;
11424 
11425   /* Re-attach the address-space qualifier if we canonicalized the scalar
11426      type.  */
11427   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11428     return build_qualified_type
11429 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11430 
11431   return vectype;
11432 }
11433 
11434 /* Function get_vectype_for_scalar_type.
11435 
11436    Returns the vector type corresponding to SCALAR_TYPE as supported
11437    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11438    vectorization, make sure that the number of elements in the vector
11439    is no bigger than GROUP_SIZE.  */
11440 
11441 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11442 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11443 			     unsigned int group_size)
11444 {
11445   /* For BB vectorization, we should always have a group size once we've
11446      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11447      are tentative requests during things like early data reference
11448      analysis and pattern recognition.  */
11449   if (is_a <bb_vec_info> (vinfo))
11450     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11451   else
11452     group_size = 0;
11453 
11454   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11455 						      scalar_type);
11456   if (vectype && vinfo->vector_mode == VOIDmode)
11457     vinfo->vector_mode = TYPE_MODE (vectype);
11458 
11459   /* Register the natural choice of vector type, before the group size
11460      has been applied.  */
11461   if (vectype)
11462     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11463 
11464   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11465      try again with an explicit number of elements.  */
11466   if (vectype
11467       && group_size
11468       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11469     {
11470       /* Start with the biggest number of units that fits within
11471 	 GROUP_SIZE and halve it until we find a valid vector type.
11472 	 Usually either the first attempt will succeed or all will
11473 	 fail (in the latter case because GROUP_SIZE is too small
11474 	 for the target), but it's possible that a target could have
11475 	 a hole between supported vector types.
11476 
11477 	 If GROUP_SIZE is not a power of 2, this has the effect of
11478 	 trying the largest power of 2 that fits within the group,
11479 	 even though the group is not a multiple of that vector size.
11480 	 The BB vectorizer will then try to carve up the group into
11481 	 smaller pieces.  */
11482       unsigned int nunits = 1 << floor_log2 (group_size);
11483       do
11484 	{
11485 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11486 							 scalar_type, nunits);
11487 	  nunits /= 2;
11488 	}
11489       while (nunits > 1 && !vectype);
11490     }
11491 
11492   return vectype;
11493 }
11494 
11495 /* Return the vector type corresponding to SCALAR_TYPE as supported
11496    by the target.  NODE, if nonnull, is the SLP tree node that will
11497    use the returned vector type.  */
11498 
11499 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11500 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11501 {
11502   unsigned int group_size = 0;
11503   if (node)
11504     group_size = SLP_TREE_LANES (node);
11505   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11506 }
11507 
11508 /* Function get_mask_type_for_scalar_type.
11509 
11510    Returns the mask type corresponding to a result of comparison
11511    of vectors of specified SCALAR_TYPE as supported by target.
11512    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11513    make sure that the number of elements in the vector is no bigger
11514    than GROUP_SIZE.  */
11515 
11516 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11517 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11518 			       unsigned int group_size)
11519 {
11520   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11521 
11522   if (!vectype)
11523     return NULL;
11524 
11525   return truth_type_for (vectype);
11526 }
11527 
11528 /* Function get_same_sized_vectype
11529 
11530    Returns a vector type corresponding to SCALAR_TYPE of size
11531    VECTOR_TYPE if supported by the target.  */
11532 
11533 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11534 get_same_sized_vectype (tree scalar_type, tree vector_type)
11535 {
11536   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11537     return truth_type_for (vector_type);
11538 
11539   poly_uint64 nunits;
11540   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11541 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11542     return NULL_TREE;
11543 
11544   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11545 					      scalar_type, nunits);
11546 }
11547 
11548 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11549    would not change the chosen vector modes.  */
11550 
11551 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11552 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11553 {
11554   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11555        i != vinfo->used_vector_modes.end (); ++i)
11556     if (!VECTOR_MODE_P (*i)
11557 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11558       return false;
11559   return true;
11560 }
11561 
11562 /* Function vect_is_simple_use.
11563 
11564    Input:
11565    VINFO - the vect info of the loop or basic block that is being vectorized.
11566    OPERAND - operand in the loop or bb.
11567    Output:
11568    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11569      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11570    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11571      the definition could be anywhere in the function
11572    DT - the type of definition
11573 
11574    Returns whether a stmt with OPERAND can be vectorized.
11575    For loops, supportable operands are constants, loop invariants, and operands
11576    that are defined by the current iteration of the loop.  Unsupportable
11577    operands are those that are defined by a previous iteration of the loop (as
11578    is the case in reduction/induction computations).
11579    For basic blocks, supportable operands are constants and bb invariants.
11580    For now, operands defined outside the basic block are not supported.  */
11581 
11582 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11583 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11584 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11585 {
11586   if (def_stmt_info_out)
11587     *def_stmt_info_out = NULL;
11588   if (def_stmt_out)
11589     *def_stmt_out = NULL;
11590   *dt = vect_unknown_def_type;
11591 
11592   if (dump_enabled_p ())
11593     {
11594       dump_printf_loc (MSG_NOTE, vect_location,
11595                        "vect_is_simple_use: operand ");
11596       if (TREE_CODE (operand) == SSA_NAME
11597 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
11598 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11599       else
11600 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11601     }
11602 
11603   if (CONSTANT_CLASS_P (operand))
11604     *dt = vect_constant_def;
11605   else if (is_gimple_min_invariant (operand))
11606     *dt = vect_external_def;
11607   else if (TREE_CODE (operand) != SSA_NAME)
11608     *dt = vect_unknown_def_type;
11609   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11610     *dt = vect_external_def;
11611   else
11612     {
11613       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11614       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11615       if (!stmt_vinfo)
11616 	*dt = vect_external_def;
11617       else
11618 	{
11619 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11620 	  def_stmt = stmt_vinfo->stmt;
11621 	  *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11622 	  if (def_stmt_info_out)
11623 	    *def_stmt_info_out = stmt_vinfo;
11624 	}
11625       if (def_stmt_out)
11626 	*def_stmt_out = def_stmt;
11627     }
11628 
11629   if (dump_enabled_p ())
11630     {
11631       dump_printf (MSG_NOTE, ", type of def: ");
11632       switch (*dt)
11633 	{
11634 	case vect_uninitialized_def:
11635 	  dump_printf (MSG_NOTE, "uninitialized\n");
11636 	  break;
11637 	case vect_constant_def:
11638 	  dump_printf (MSG_NOTE, "constant\n");
11639 	  break;
11640 	case vect_external_def:
11641 	  dump_printf (MSG_NOTE, "external\n");
11642 	  break;
11643 	case vect_internal_def:
11644 	  dump_printf (MSG_NOTE, "internal\n");
11645 	  break;
11646 	case vect_induction_def:
11647 	  dump_printf (MSG_NOTE, "induction\n");
11648 	  break;
11649 	case vect_reduction_def:
11650 	  dump_printf (MSG_NOTE, "reduction\n");
11651 	  break;
11652 	case vect_double_reduction_def:
11653 	  dump_printf (MSG_NOTE, "double reduction\n");
11654 	  break;
11655 	case vect_nested_cycle:
11656 	  dump_printf (MSG_NOTE, "nested cycle\n");
11657 	  break;
11658 	case vect_unknown_def_type:
11659 	  dump_printf (MSG_NOTE, "unknown\n");
11660 	  break;
11661 	}
11662     }
11663 
11664   if (*dt == vect_unknown_def_type)
11665     {
11666       if (dump_enabled_p ())
11667         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11668                          "Unsupported pattern.\n");
11669       return false;
11670     }
11671 
11672   return true;
11673 }
11674 
11675 /* Function vect_is_simple_use.
11676 
11677    Same as vect_is_simple_use but also determines the vector operand
11678    type of OPERAND and stores it to *VECTYPE.  If the definition of
11679    OPERAND is vect_uninitialized_def, vect_constant_def or
11680    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11681    is responsible to compute the best suited vector type for the
11682    scalar operand.  */
11683 
11684 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11685 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11686 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
11687 		    gimple **def_stmt_out)
11688 {
11689   stmt_vec_info def_stmt_info;
11690   gimple *def_stmt;
11691   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11692     return false;
11693 
11694   if (def_stmt_out)
11695     *def_stmt_out = def_stmt;
11696   if (def_stmt_info_out)
11697     *def_stmt_info_out = def_stmt_info;
11698 
11699   /* Now get a vector type if the def is internal, otherwise supply
11700      NULL_TREE and leave it up to the caller to figure out a proper
11701      type for the use stmt.  */
11702   if (*dt == vect_internal_def
11703       || *dt == vect_induction_def
11704       || *dt == vect_reduction_def
11705       || *dt == vect_double_reduction_def
11706       || *dt == vect_nested_cycle)
11707     {
11708       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11709       gcc_assert (*vectype != NULL_TREE);
11710       if (dump_enabled_p ())
11711 	dump_printf_loc (MSG_NOTE, vect_location,
11712 			 "vect_is_simple_use: vectype %T\n", *vectype);
11713     }
11714   else if (*dt == vect_uninitialized_def
11715 	   || *dt == vect_constant_def
11716 	   || *dt == vect_external_def)
11717     *vectype = NULL_TREE;
11718   else
11719     gcc_unreachable ();
11720 
11721   return true;
11722 }
11723 
11724 /* Function vect_is_simple_use.
11725 
11726    Same as vect_is_simple_use but determines the operand by operand
11727    position OPERAND from either STMT or SLP_NODE, filling in *OP
11728    and *SLP_DEF (when SLP_NODE is not NULL).  */
11729 
11730 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11731 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11732 		    unsigned operand, tree *op, slp_tree *slp_def,
11733 		    enum vect_def_type *dt,
11734 		    tree *vectype, stmt_vec_info *def_stmt_info_out)
11735 {
11736   if (slp_node)
11737     {
11738       slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11739       *slp_def = child;
11740       *vectype = SLP_TREE_VECTYPE (child);
11741       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11742 	{
11743 	  *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11744 	  return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11745 	}
11746       else
11747 	{
11748 	  if (def_stmt_info_out)
11749 	    *def_stmt_info_out = NULL;
11750 	  *op = SLP_TREE_SCALAR_OPS (child)[0];
11751 	  *dt = SLP_TREE_DEF_TYPE (child);
11752 	  return true;
11753 	}
11754     }
11755   else
11756     {
11757       *slp_def = NULL;
11758       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11759 	{
11760 	  if (gimple_assign_rhs_code (ass) == COND_EXPR
11761 	      && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11762 	    {
11763 	      if (operand < 2)
11764 		*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11765 	      else
11766 		*op = gimple_op (ass, operand);
11767 	    }
11768 	  else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11769 	    *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11770 	  else
11771 	    *op = gimple_op (ass, operand + 1);
11772 	}
11773       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11774 	*op = gimple_call_arg (call, operand);
11775       else
11776 	gcc_unreachable ();
11777       return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11778     }
11779 }
11780 
11781 /* If OP is not NULL and is external or constant update its vector
11782    type with VECTYPE.  Returns true if successful or false if not,
11783    for example when conflicting vector types are present.  */
11784 
11785 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11786 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11787 {
11788   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11789     return true;
11790   if (SLP_TREE_VECTYPE (op))
11791     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11792   SLP_TREE_VECTYPE (op) = vectype;
11793   return true;
11794 }
11795 
11796 /* Function supportable_widening_operation
11797 
11798    Check whether an operation represented by the code CODE is a
11799    widening operation that is supported by the target platform in
11800    vector form (i.e., when operating on arguments of type VECTYPE_IN
11801    producing a result of type VECTYPE_OUT).
11802 
11803    Widening operations we currently support are NOP (CONVERT), FLOAT,
11804    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11805    are supported by the target platform either directly (via vector
11806    tree-codes), or via target builtins.
11807 
11808    Output:
11809    - CODE1 and CODE2 are codes of vector operations to be used when
11810    vectorizing the operation, if available.
11811    - MULTI_STEP_CVT determines the number of required intermediate steps in
11812    case of multi-step conversion (like char->short->int - in that case
11813    MULTI_STEP_CVT will be 1).
11814    - INTERM_TYPES contains the intermediate type required to perform the
11815    widening operation (short in the above example).  */
11816 
11817 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11818 supportable_widening_operation (vec_info *vinfo,
11819 				enum tree_code code, stmt_vec_info stmt_info,
11820 				tree vectype_out, tree vectype_in,
11821                                 enum tree_code *code1, enum tree_code *code2,
11822                                 int *multi_step_cvt,
11823                                 vec<tree> *interm_types)
11824 {
11825   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11826   class loop *vect_loop = NULL;
11827   machine_mode vec_mode;
11828   enum insn_code icode1, icode2;
11829   optab optab1, optab2;
11830   tree vectype = vectype_in;
11831   tree wide_vectype = vectype_out;
11832   enum tree_code c1, c2;
11833   int i;
11834   tree prev_type, intermediate_type;
11835   machine_mode intermediate_mode, prev_mode;
11836   optab optab3, optab4;
11837 
11838   *multi_step_cvt = 0;
11839   if (loop_info)
11840     vect_loop = LOOP_VINFO_LOOP (loop_info);
11841 
11842   switch (code)
11843     {
11844     case WIDEN_MULT_EXPR:
11845       /* The result of a vectorized widening operation usually requires
11846 	 two vectors (because the widened results do not fit into one vector).
11847 	 The generated vector results would normally be expected to be
11848 	 generated in the same order as in the original scalar computation,
11849 	 i.e. if 8 results are generated in each vector iteration, they are
11850 	 to be organized as follows:
11851 		vect1: [res1,res2,res3,res4],
11852 		vect2: [res5,res6,res7,res8].
11853 
11854 	 However, in the special case that the result of the widening
11855 	 operation is used in a reduction computation only, the order doesn't
11856 	 matter (because when vectorizing a reduction we change the order of
11857 	 the computation).  Some targets can take advantage of this and
11858 	 generate more efficient code.  For example, targets like Altivec,
11859 	 that support widen_mult using a sequence of {mult_even,mult_odd}
11860 	 generate the following vectors:
11861 		vect1: [res1,res3,res5,res7],
11862 		vect2: [res2,res4,res6,res8].
11863 
11864 	 When vectorizing outer-loops, we execute the inner-loop sequentially
11865 	 (each vectorized inner-loop iteration contributes to VF outer-loop
11866 	 iterations in parallel).  We therefore don't allow to change the
11867 	 order of the computation in the inner-loop during outer-loop
11868 	 vectorization.  */
11869       /* TODO: Another case in which order doesn't *really* matter is when we
11870 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
11871 	 Normally, pack_trunc performs an even/odd permute, whereas the
11872 	 repack from an even/odd expansion would be an interleave, which
11873 	 would be significantly simpler for e.g. AVX2.  */
11874       /* In any case, in order to avoid duplicating the code below, recurse
11875 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
11876 	 are properly set up for the caller.  If we fail, we'll continue with
11877 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
11878       if (vect_loop
11879 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11880 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
11881 	  && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11882 					     stmt_info, vectype_out,
11883 					     vectype_in, code1, code2,
11884 					     multi_step_cvt, interm_types))
11885         {
11886           /* Elements in a vector with vect_used_by_reduction property cannot
11887              be reordered if the use chain with this property does not have the
11888              same operation.  One such an example is s += a * b, where elements
11889              in a and b cannot be reordered.  Here we check if the vector defined
11890              by STMT is only directly used in the reduction statement.  */
11891 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
11892 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11893 	  if (use_stmt_info
11894 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11895 	    return true;
11896         }
11897       c1 = VEC_WIDEN_MULT_LO_EXPR;
11898       c2 = VEC_WIDEN_MULT_HI_EXPR;
11899       break;
11900 
11901     case DOT_PROD_EXPR:
11902       c1 = DOT_PROD_EXPR;
11903       c2 = DOT_PROD_EXPR;
11904       break;
11905 
11906     case SAD_EXPR:
11907       c1 = SAD_EXPR;
11908       c2 = SAD_EXPR;
11909       break;
11910 
11911     case VEC_WIDEN_MULT_EVEN_EXPR:
11912       /* Support the recursion induced just above.  */
11913       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11914       c2 = VEC_WIDEN_MULT_ODD_EXPR;
11915       break;
11916 
11917     case WIDEN_LSHIFT_EXPR:
11918       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11919       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11920       break;
11921 
11922     case WIDEN_PLUS_EXPR:
11923       c1 = VEC_WIDEN_PLUS_LO_EXPR;
11924       c2 = VEC_WIDEN_PLUS_HI_EXPR;
11925       break;
11926 
11927     case WIDEN_MINUS_EXPR:
11928       c1 = VEC_WIDEN_MINUS_LO_EXPR;
11929       c2 = VEC_WIDEN_MINUS_HI_EXPR;
11930       break;
11931 
11932     CASE_CONVERT:
11933       c1 = VEC_UNPACK_LO_EXPR;
11934       c2 = VEC_UNPACK_HI_EXPR;
11935       break;
11936 
11937     case FLOAT_EXPR:
11938       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11939       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11940       break;
11941 
11942     case FIX_TRUNC_EXPR:
11943       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11944       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11945       break;
11946 
11947     default:
11948       gcc_unreachable ();
11949     }
11950 
11951   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11952     std::swap (c1, c2);
11953 
11954   if (code == FIX_TRUNC_EXPR)
11955     {
11956       /* The signedness is determined from output operand.  */
11957       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11958       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11959     }
11960   else if (CONVERT_EXPR_CODE_P (code)
11961 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11962 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
11963 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11964 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11965     {
11966       /* If the input and result modes are the same, a different optab
11967 	 is needed where we pass in the number of units in vectype.  */
11968       optab1 = vec_unpacks_sbool_lo_optab;
11969       optab2 = vec_unpacks_sbool_hi_optab;
11970     }
11971   else
11972     {
11973       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11974       optab2 = optab_for_tree_code (c2, vectype, optab_default);
11975     }
11976 
11977   if (!optab1 || !optab2)
11978     return false;
11979 
11980   vec_mode = TYPE_MODE (vectype);
11981   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11982        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11983     return false;
11984 
11985   *code1 = c1;
11986   *code2 = c2;
11987 
11988   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11989       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11990     {
11991       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11992 	return true;
11993       /* For scalar masks we may have different boolean
11994 	 vector types having the same QImode.  Thus we
11995 	 add additional check for elements number.  */
11996       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11997 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11998 	return true;
11999     }
12000 
12001   /* Check if it's a multi-step conversion that can be done using intermediate
12002      types.  */
12003 
12004   prev_type = vectype;
12005   prev_mode = vec_mode;
12006 
12007   if (!CONVERT_EXPR_CODE_P (code))
12008     return false;
12009 
12010   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12011      intermediate steps in promotion sequence.  We try
12012      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12013      not.  */
12014   interm_types->create (MAX_INTERM_CVT_STEPS);
12015   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12016     {
12017       intermediate_mode = insn_data[icode1].operand[0].mode;
12018       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12019 	intermediate_type
12020 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
12021       else
12022 	intermediate_type
12023 	  = lang_hooks.types.type_for_mode (intermediate_mode,
12024 					    TYPE_UNSIGNED (prev_type));
12025 
12026       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12027 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12028 	  && intermediate_mode == prev_mode
12029 	  && SCALAR_INT_MODE_P (prev_mode))
12030 	{
12031 	  /* If the input and result modes are the same, a different optab
12032 	     is needed where we pass in the number of units in vectype.  */
12033 	  optab3 = vec_unpacks_sbool_lo_optab;
12034 	  optab4 = vec_unpacks_sbool_hi_optab;
12035 	}
12036       else
12037 	{
12038 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12039 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12040 	}
12041 
12042       if (!optab3 || !optab4
12043           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12044 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12045 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12046 	  || insn_data[icode2].operand[0].mode != intermediate_mode
12047 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
12048 	      == CODE_FOR_nothing)
12049 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
12050 	      == CODE_FOR_nothing))
12051 	break;
12052 
12053       interm_types->quick_push (intermediate_type);
12054       (*multi_step_cvt)++;
12055 
12056       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12057 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12058 	{
12059 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12060 	    return true;
12061 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12062 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12063 	    return true;
12064 	}
12065 
12066       prev_type = intermediate_type;
12067       prev_mode = intermediate_mode;
12068     }
12069 
12070   interm_types->release ();
12071   return false;
12072 }
12073 
12074 
12075 /* Function supportable_narrowing_operation
12076 
12077    Check whether an operation represented by the code CODE is a
12078    narrowing operation that is supported by the target platform in
12079    vector form (i.e., when operating on arguments of type VECTYPE_IN
12080    and producing a result of type VECTYPE_OUT).
12081 
12082    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12083    and FLOAT.  This function checks if these operations are supported by
12084    the target platform directly via vector tree-codes.
12085 
12086    Output:
12087    - CODE1 is the code of a vector operation to be used when
12088    vectorizing the operation, if available.
12089    - MULTI_STEP_CVT determines the number of required intermediate steps in
12090    case of multi-step conversion (like int->short->char - in that case
12091    MULTI_STEP_CVT will be 1).
12092    - INTERM_TYPES contains the intermediate type required to perform the
12093    narrowing operation (short in the above example).   */
12094 
12095 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)12096 supportable_narrowing_operation (enum tree_code code,
12097 				 tree vectype_out, tree vectype_in,
12098 				 enum tree_code *code1, int *multi_step_cvt,
12099                                  vec<tree> *interm_types)
12100 {
12101   machine_mode vec_mode;
12102   enum insn_code icode1;
12103   optab optab1, interm_optab;
12104   tree vectype = vectype_in;
12105   tree narrow_vectype = vectype_out;
12106   enum tree_code c1;
12107   tree intermediate_type, prev_type;
12108   machine_mode intermediate_mode, prev_mode;
12109   int i;
12110   bool uns;
12111 
12112   *multi_step_cvt = 0;
12113   switch (code)
12114     {
12115     CASE_CONVERT:
12116       c1 = VEC_PACK_TRUNC_EXPR;
12117       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12118 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
12119 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12120 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12121 	optab1 = vec_pack_sbool_trunc_optab;
12122       else
12123 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
12124       break;
12125 
12126     case FIX_TRUNC_EXPR:
12127       c1 = VEC_PACK_FIX_TRUNC_EXPR;
12128       /* The signedness is determined from output operand.  */
12129       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12130       break;
12131 
12132     case FLOAT_EXPR:
12133       c1 = VEC_PACK_FLOAT_EXPR;
12134       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12135       break;
12136 
12137     default:
12138       gcc_unreachable ();
12139     }
12140 
12141   if (!optab1)
12142     return false;
12143 
12144   vec_mode = TYPE_MODE (vectype);
12145   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12146     return false;
12147 
12148   *code1 = c1;
12149 
12150   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12151     {
12152       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12153 	return true;
12154       /* For scalar masks we may have different boolean
12155 	 vector types having the same QImode.  Thus we
12156 	 add additional check for elements number.  */
12157       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12158 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12159 	return true;
12160     }
12161 
12162   if (code == FLOAT_EXPR)
12163     return false;
12164 
12165   /* Check if it's a multi-step conversion that can be done using intermediate
12166      types.  */
12167   prev_mode = vec_mode;
12168   prev_type = vectype;
12169   if (code == FIX_TRUNC_EXPR)
12170     uns = TYPE_UNSIGNED (vectype_out);
12171   else
12172     uns = TYPE_UNSIGNED (vectype);
12173 
12174   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12175      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12176      costly than signed.  */
12177   if (code == FIX_TRUNC_EXPR && uns)
12178     {
12179       enum insn_code icode2;
12180 
12181       intermediate_type
12182 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12183       interm_optab
12184 	= optab_for_tree_code (c1, intermediate_type, optab_default);
12185       if (interm_optab != unknown_optab
12186 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12187 	  && insn_data[icode1].operand[0].mode
12188 	     == insn_data[icode2].operand[0].mode)
12189 	{
12190 	  uns = false;
12191 	  optab1 = interm_optab;
12192 	  icode1 = icode2;
12193 	}
12194     }
12195 
12196   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12197      intermediate steps in promotion sequence.  We try
12198      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
12199   interm_types->create (MAX_INTERM_CVT_STEPS);
12200   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12201     {
12202       intermediate_mode = insn_data[icode1].operand[0].mode;
12203       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12204 	intermediate_type
12205 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
12206       else
12207 	intermediate_type
12208 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12209       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12210 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12211 	  && intermediate_mode == prev_mode
12212 	  && SCALAR_INT_MODE_P (prev_mode))
12213 	interm_optab = vec_pack_sbool_trunc_optab;
12214       else
12215 	interm_optab
12216 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12217 				 optab_default);
12218       if (!interm_optab
12219 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12220 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12221 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12222 	      == CODE_FOR_nothing))
12223 	break;
12224 
12225       interm_types->quick_push (intermediate_type);
12226       (*multi_step_cvt)++;
12227 
12228       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12229 	{
12230 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12231 	    return true;
12232 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12233 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12234 	    return true;
12235 	}
12236 
12237       prev_mode = intermediate_mode;
12238       prev_type = intermediate_type;
12239       optab1 = interm_optab;
12240     }
12241 
12242   interm_types->release ();
12243   return false;
12244 }
12245 
12246 /* Generate and return a vector mask of MASK_TYPE such that
12247    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12248    Add the statements to SEQ.  */
12249 
12250 tree
vect_gen_while(gimple_seq * seq,tree mask_type,tree start_index,tree end_index,const char * name)12251 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12252 		tree end_index, const char *name)
12253 {
12254   tree cmp_type = TREE_TYPE (start_index);
12255   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12256 						       cmp_type, mask_type,
12257 						       OPTIMIZE_FOR_SPEED));
12258   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12259 					    start_index, end_index,
12260 					    build_zero_cst (mask_type));
12261   tree tmp;
12262   if (name)
12263     tmp = make_temp_ssa_name (mask_type, NULL, name);
12264   else
12265     tmp = make_ssa_name (mask_type);
12266   gimple_call_set_lhs (call, tmp);
12267   gimple_seq_add_stmt (seq, call);
12268   return tmp;
12269 }
12270 
12271 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12272    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
12273 
12274 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12275 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12276 		    tree end_index)
12277 {
12278   tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12279   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12280 }
12281 
12282 /* Try to compute the vector types required to vectorize STMT_INFO,
12283    returning true on success and false if vectorization isn't possible.
12284    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12285    take sure that the number of elements in the vectors is no bigger
12286    than GROUP_SIZE.
12287 
12288    On success:
12289 
12290    - Set *STMT_VECTYPE_OUT to:
12291      - NULL_TREE if the statement doesn't need to be vectorized;
12292      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12293 
12294    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12295      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12296      statement does not help to determine the overall number of units.  */
12297 
12298 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12299 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12300 				tree *stmt_vectype_out,
12301 				tree *nunits_vectype_out,
12302 				unsigned int group_size)
12303 {
12304   gimple *stmt = stmt_info->stmt;
12305 
12306   /* For BB vectorization, we should always have a group size once we've
12307      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12308      are tentative requests during things like early data reference
12309      analysis and pattern recognition.  */
12310   if (is_a <bb_vec_info> (vinfo))
12311     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12312   else
12313     group_size = 0;
12314 
12315   *stmt_vectype_out = NULL_TREE;
12316   *nunits_vectype_out = NULL_TREE;
12317 
12318   if (gimple_get_lhs (stmt) == NULL_TREE
12319       /* MASK_STORE has no lhs, but is ok.  */
12320       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12321     {
12322       if (is_a <gcall *> (stmt))
12323 	{
12324 	  /* Ignore calls with no lhs.  These must be calls to
12325 	     #pragma omp simd functions, and what vectorization factor
12326 	     it really needs can't be determined until
12327 	     vectorizable_simd_clone_call.  */
12328 	  if (dump_enabled_p ())
12329 	    dump_printf_loc (MSG_NOTE, vect_location,
12330 			     "defer to SIMD clone analysis.\n");
12331 	  return opt_result::success ();
12332 	}
12333 
12334       return opt_result::failure_at (stmt,
12335 				     "not vectorized: irregular stmt.%G", stmt);
12336     }
12337 
12338   tree vectype;
12339   tree scalar_type = NULL_TREE;
12340   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12341     {
12342       vectype = STMT_VINFO_VECTYPE (stmt_info);
12343       if (dump_enabled_p ())
12344 	dump_printf_loc (MSG_NOTE, vect_location,
12345 			 "precomputed vectype: %T\n", vectype);
12346     }
12347   else if (vect_use_mask_type_p (stmt_info))
12348     {
12349       unsigned int precision = stmt_info->mask_precision;
12350       scalar_type = build_nonstandard_integer_type (precision, 1);
12351       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12352       if (!vectype)
12353 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
12354 				       " data-type %T\n", scalar_type);
12355       if (dump_enabled_p ())
12356 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12357     }
12358   else
12359     {
12360       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12361 	scalar_type = TREE_TYPE (DR_REF (dr));
12362       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12363 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12364       else
12365 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12366 
12367       if (dump_enabled_p ())
12368 	{
12369 	  if (group_size)
12370 	    dump_printf_loc (MSG_NOTE, vect_location,
12371 			     "get vectype for scalar type (group size %d):"
12372 			     " %T\n", group_size, scalar_type);
12373 	  else
12374 	    dump_printf_loc (MSG_NOTE, vect_location,
12375 			     "get vectype for scalar type: %T\n", scalar_type);
12376 	}
12377       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12378       if (!vectype)
12379 	return opt_result::failure_at (stmt,
12380 				       "not vectorized:"
12381 				       " unsupported data-type %T\n",
12382 				       scalar_type);
12383 
12384       if (dump_enabled_p ())
12385 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12386     }
12387 
12388   if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12389     return opt_result::failure_at (stmt,
12390 				   "not vectorized: vector stmt in loop:%G",
12391 				   stmt);
12392 
12393   *stmt_vectype_out = vectype;
12394 
12395   /* Don't try to compute scalar types if the stmt produces a boolean
12396      vector; use the existing vector type instead.  */
12397   tree nunits_vectype = vectype;
12398   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12399     {
12400       /* The number of units is set according to the smallest scalar
12401 	 type (or the largest vector size, but we only support one
12402 	 vector size per vectorization).  */
12403       scalar_type = vect_get_smallest_scalar_type (stmt_info,
12404 						   TREE_TYPE (vectype));
12405       if (scalar_type != TREE_TYPE (vectype))
12406 	{
12407 	  if (dump_enabled_p ())
12408 	    dump_printf_loc (MSG_NOTE, vect_location,
12409 			     "get vectype for smallest scalar type: %T\n",
12410 			     scalar_type);
12411 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12412 							group_size);
12413 	  if (!nunits_vectype)
12414 	    return opt_result::failure_at
12415 	      (stmt, "not vectorized: unsupported data-type %T\n",
12416 	       scalar_type);
12417 	  if (dump_enabled_p ())
12418 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12419 			     nunits_vectype);
12420 	}
12421     }
12422 
12423   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12424 		   TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12425     return opt_result::failure_at (stmt,
12426 				   "Not vectorized: Incompatible number "
12427 				   "of vector subparts between %T and %T\n",
12428 				   nunits_vectype, *stmt_vectype_out);
12429 
12430   if (dump_enabled_p ())
12431     {
12432       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12433       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12434       dump_printf (MSG_NOTE, "\n");
12435     }
12436 
12437   *nunits_vectype_out = nunits_vectype;
12438   return opt_result::success ();
12439 }
12440 
12441 /* Generate and return statement sequence that sets vector length LEN that is:
12442 
12443    min_of_start_and_end = min (START_INDEX, END_INDEX);
12444    left_len = END_INDEX - min_of_start_and_end;
12445    rhs = min (left_len, LEN_LIMIT);
12446    LEN = rhs;
12447 
12448    Note: the cost of the code generated by this function is modeled
12449    by vect_estimate_min_profitable_iters, so changes here may need
12450    corresponding changes there.  */
12451 
12452 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12453 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12454 {
12455   gimple_seq stmts = NULL;
12456   tree len_type = TREE_TYPE (len);
12457   gcc_assert (TREE_TYPE (start_index) == len_type);
12458 
12459   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12460   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12461   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12462   gimple* stmt = gimple_build_assign (len, rhs);
12463   gimple_seq_add_stmt (&stmts, stmt);
12464 
12465   return stmts;
12466 }
12467 
12468