1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 static unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 		  enum vect_cost_for_stmt kind,
95 		  stmt_vec_info stmt_info, slp_tree node,
96 		  tree vectype, int misalign,
97 		  enum vect_cost_model_location where)
98 {
99   if ((kind == vector_load || kind == unaligned_load)
100       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101     kind = vector_gather_load;
102   if ((kind == vector_store || kind == unaligned_store)
103       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104     kind = vector_scatter_store;
105 
106   stmt_info_for_cost si
107     = { count, kind, where, stmt_info, node, vectype, misalign };
108   body_cost_vec->safe_push (si);
109 
110   return (unsigned)
111       (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 }
113 
114 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 		  tree vectype, int misalign,
118 		  enum vect_cost_model_location where)
119 {
120   return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 			   vectype, misalign, where);
122 }
123 
124 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 		  enum vect_cost_for_stmt kind, slp_tree node,
127 		  tree vectype, int misalign,
128 		  enum vect_cost_model_location where)
129 {
130   return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 			   vectype, misalign, where);
132 }
133 
134 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,enum vect_cost_model_location where)135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 		  enum vect_cost_for_stmt kind,
137 		  enum vect_cost_model_location where)
138 {
139   gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 	      || kind == scalar_stmt);
141   return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 			   NULL_TREE, 0, where);
143 }
144 
145 /* Return a variable of type ELEM_TYPE[NELEMS].  */
146 
147 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 {
150   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 			 "vect_array");
152 }
153 
154 /* ARRAY is an array of vectors created by create_vector_array.
155    Return an SSA_NAME for the vector in index N.  The reference
156    is part of the vectorization of STMT_INFO and the vector is associated
157    with scalar destination SCALAR_DEST.  */
158 
159 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)160 read_vector_array (vec_info *vinfo,
161 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
163 {
164   tree vect_type, vect, vect_name, array_ref;
165   gimple *new_stmt;
166 
167   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168   vect_type = TREE_TYPE (TREE_TYPE (array));
169   vect = vect_create_destination_var (scalar_dest, vect_type);
170   array_ref = build4 (ARRAY_REF, vect_type, array,
171 		      build_int_cst (size_type_node, n),
172 		      NULL_TREE, NULL_TREE);
173 
174   new_stmt = gimple_build_assign (vect, array_ref);
175   vect_name = make_ssa_name (vect, new_stmt);
176   gimple_assign_set_lhs (new_stmt, vect_name);
177   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
178 
179   return vect_name;
180 }
181 
182 /* ARRAY is an array of vectors created by create_vector_array.
183    Emit code to store SSA_NAME VECT in index N of the array.
184    The store is part of the vectorization of STMT_INFO.  */
185 
186 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)187 write_vector_array (vec_info *vinfo,
188 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
190 {
191   tree array_ref;
192   gimple *new_stmt;
193 
194   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 		      build_int_cst (size_type_node, n),
196 		      NULL_TREE, NULL_TREE);
197 
198   new_stmt = gimple_build_assign (array_ref, vect);
199   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
200 }
201 
202 /* PTR is a pointer to an array of type TYPE.  Return a representation
203    of *PTR.  The memory reference replaces those in FIRST_DR
204    (and its group).  */
205 
206 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
208 {
209   tree mem_ref;
210 
211   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212   /* Arrays have the same alignment as their type.  */
213   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214   return mem_ref;
215 }
216 
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218    Emit the clobber before *GSI.  */
219 
220 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 		       gimple_stmt_iterator *gsi, tree var)
223 {
224   tree clobber = build_clobber (TREE_TYPE (var));
225   gimple *new_stmt = gimple_build_assign (var, clobber);
226   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
227 }
228 
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
230 
231 /* Function vect_mark_relevant.
232 
233    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
234 
235 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 		    enum vect_relevant relevant, bool live_p)
238 {
239   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 
242   if (dump_enabled_p ())
243     dump_printf_loc (MSG_NOTE, vect_location,
244 		     "mark relevant %d, live %d: %G", relevant, live_p,
245 		     stmt_info->stmt);
246 
247   /* If this stmt is an original stmt in a pattern, we might need to mark its
248      related pattern stmt instead of the original stmt.  However, such stmts
249      may have their own uses that are not in any pattern, in such cases the
250      stmt itself should be marked.  */
251   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252     {
253       /* This is the last stmt in a sequence that was detected as a
254 	 pattern that can potentially be vectorized.  Don't mark the stmt
255 	 as relevant/live because it's not going to be vectorized.
256 	 Instead mark the pattern-stmt that replaces it.  */
257 
258       if (dump_enabled_p ())
259 	dump_printf_loc (MSG_NOTE, vect_location,
260 			 "last stmt in pattern. don't mark"
261 			 " relevant/live.\n");
262       stmt_vec_info old_stmt_info = stmt_info;
263       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267     }
268 
269   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271     STMT_VINFO_RELEVANT (stmt_info) = relevant;
272 
273   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
275     {
276       if (dump_enabled_p ())
277         dump_printf_loc (MSG_NOTE, vect_location,
278                          "already marked relevant/live.\n");
279       return;
280     }
281 
282   worklist->safe_push (stmt_info);
283 }
284 
285 
286 /* Function is_simple_and_all_uses_invariant
287 
288    Return true if STMT_INFO is simple and all uses of it are invariant.  */
289 
290 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 				  loop_vec_info loop_vinfo)
293 {
294   tree op;
295   ssa_op_iter iter;
296 
297   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298   if (!stmt)
299     return false;
300 
301   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
302     {
303       enum vect_def_type dt = vect_uninitialized_def;
304 
305       if (!vect_is_simple_use (op, loop_vinfo, &dt))
306 	{
307 	  if (dump_enabled_p ())
308 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 			     "use not simple.\n");
310 	  return false;
311 	}
312 
313       if (dt != vect_external_def && dt != vect_constant_def)
314 	return false;
315     }
316   return true;
317 }
318 
319 /* Function vect_stmt_relevant_p.
320 
321    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322    is "relevant for vectorization".
323 
324    A stmt is considered "relevant for vectorization" if:
325    - it has uses outside the loop.
326    - it has vdefs (it alters memory).
327    - control stmts in the loop (except for the exit condition).
328 
329    CHECKME: what other side effects would the vectorizer allow?  */
330 
331 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 		      enum vect_relevant *relevant, bool *live_p)
334 {
335   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336   ssa_op_iter op_iter;
337   imm_use_iterator imm_iter;
338   use_operand_p use_p;
339   def_operand_p def_p;
340 
341   *relevant = vect_unused_in_scope;
342   *live_p = false;
343 
344   /* cond stmt other than loop exit cond.  */
345   if (is_ctrl_stmt (stmt_info->stmt)
346       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347     *relevant = vect_used_in_scope;
348 
349   /* changing memory.  */
350   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351     if (gimple_vdef (stmt_info->stmt)
352 	&& !gimple_clobber_p (stmt_info->stmt))
353       {
354 	if (dump_enabled_p ())
355 	  dump_printf_loc (MSG_NOTE, vect_location,
356                            "vec_stmt_relevant_p: stmt has vdefs.\n");
357 	*relevant = vect_used_in_scope;
358       }
359 
360   /* uses outside the loop.  */
361   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
362     {
363       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
364 	{
365 	  basic_block bb = gimple_bb (USE_STMT (use_p));
366 	  if (!flow_bb_inside_loop_p (loop, bb))
367 	    {
368 	      if (is_gimple_debug (USE_STMT (use_p)))
369 		continue;
370 
371 	      if (dump_enabled_p ())
372 		dump_printf_loc (MSG_NOTE, vect_location,
373                                  "vec_stmt_relevant_p: used out of loop.\n");
374 
375 	      /* We expect all such uses to be in the loop exit phis
376 		 (because of loop closed form)   */
377 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 	      gcc_assert (bb == single_exit (loop)->dest);
379 
380               *live_p = true;
381 	    }
382 	}
383     }
384 
385   if (*live_p && *relevant == vect_unused_in_scope
386       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
387     {
388       if (dump_enabled_p ())
389 	dump_printf_loc (MSG_NOTE, vect_location,
390 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391       *relevant = vect_used_only_live;
392     }
393 
394   return (*live_p || *relevant);
395 }
396 
397 
398 /* Function exist_non_indexing_operands_for_use_p
399 
400    USE is one of the uses attached to STMT_INFO.  Check if USE is
401    used in STMT_INFO for anything other than indexing an array.  */
402 
403 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
405 {
406   tree operand;
407 
408   /* USE corresponds to some operand in STMT.  If there is no data
409      reference in STMT, then any operand that corresponds to USE
410      is not indexing an array.  */
411   if (!STMT_VINFO_DATA_REF (stmt_info))
412     return true;
413 
414   /* STMT has a data_ref. FORNOW this means that its of one of
415      the following forms:
416      -1- ARRAY_REF = var
417      -2- var = ARRAY_REF
418      (This should have been verified in analyze_data_refs).
419 
420      'var' in the second case corresponds to a def, not a use,
421      so USE cannot correspond to any operands that are not used
422      for array indexing.
423 
424      Therefore, all we need to check is if STMT falls into the
425      first case, and whether var corresponds to USE.  */
426 
427   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428   if (!assign || !gimple_assign_copy_p (assign))
429     {
430       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431       if (call && gimple_call_internal_p (call))
432 	{
433 	  internal_fn ifn = gimple_call_internal_fn (call);
434 	  int mask_index = internal_fn_mask_index (ifn);
435 	  if (mask_index >= 0
436 	      && use == gimple_call_arg (call, mask_index))
437 	    return true;
438 	  int stored_value_index = internal_fn_stored_value_index (ifn);
439 	  if (stored_value_index >= 0
440 	      && use == gimple_call_arg (call, stored_value_index))
441 	    return true;
442 	  if (internal_gather_scatter_fn_p (ifn)
443 	      && use == gimple_call_arg (call, 1))
444 	    return true;
445 	}
446       return false;
447     }
448 
449   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450     return false;
451   operand = gimple_assign_rhs1 (assign);
452   if (TREE_CODE (operand) != SSA_NAME)
453     return false;
454 
455   if (operand == use)
456     return true;
457 
458   return false;
459 }
460 
461 
462 /*
463    Function process_use.
464 
465    Inputs:
466    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468      that defined USE.  This is done by calling mark_relevant and passing it
469      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471      be performed.
472 
473    Outputs:
474    Generally, LIVE_P and RELEVANT are used to define the liveness and
475    relevance info of the DEF_STMT of this USE:
476        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478    Exceptions:
479    - case 1: If USE is used only for address computations (e.g. array indexing),
480    which does not need to be directly vectorized, then the liveness/relevance
481    of the respective DEF_STMT is left unchanged.
482    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483    we skip DEF_STMT cause it had already been processed.
484    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485    "relevant" will be modified accordingly.
486 
487    Return true if everything is as expected. Return false otherwise.  */
488 
489 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 	     bool force)
493 {
494   stmt_vec_info dstmt_vinfo;
495   enum vect_def_type dt;
496 
497   /* case 1: we are only interested in uses that need to be vectorized.  Uses
498      that are used for address computation are not considered relevant.  */
499   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500     return opt_result::success ();
501 
502   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503     return opt_result::failure_at (stmt_vinfo->stmt,
504 				   "not vectorized:"
505 				   " unsupported use in stmt.\n");
506 
507   if (!dstmt_vinfo)
508     return opt_result::success ();
509 
510   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511   basic_block bb = gimple_bb (stmt_vinfo->stmt);
512 
513   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514      We have to force the stmt live since the epilogue loop needs it to
515      continue computing the reduction.  */
516   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520       && bb->loop_father == def_bb->loop_father)
521     {
522       if (dump_enabled_p ())
523 	dump_printf_loc (MSG_NOTE, vect_location,
524 			 "reduc-stmt defining reduc-phi in the same nest.\n");
525       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526       return opt_result::success ();
527     }
528 
529   /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 	outer-loop-header-bb:
531 		d = dstmt_vinfo
532 	inner-loop:
533 		stmt # use (d)
534 	outer-loop-tail-bb:
535 		...		  */
536   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
537     {
538       if (dump_enabled_p ())
539 	dump_printf_loc (MSG_NOTE, vect_location,
540                          "outer-loop def-stmt defining inner-loop stmt.\n");
541 
542       switch (relevant)
543 	{
544 	case vect_unused_in_scope:
545 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 		      vect_used_in_scope : vect_unused_in_scope;
547 	  break;
548 
549 	case vect_used_in_outer_by_reduction:
550           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 	  relevant = vect_used_by_reduction;
552 	  break;
553 
554 	case vect_used_in_outer:
555           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 	  relevant = vect_used_in_scope;
557 	  break;
558 
559 	case vect_used_in_scope:
560 	  break;
561 
562 	default:
563 	  gcc_unreachable ();
564 	}
565     }
566 
567   /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 	outer-loop-header-bb:
569 		...
570 	inner-loop:
571 		d = dstmt_vinfo
572 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 		stmt # use (d)		*/
574   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
575     {
576       if (dump_enabled_p ())
577 	dump_printf_loc (MSG_NOTE, vect_location,
578                          "inner-loop def-stmt defining outer-loop stmt.\n");
579 
580       switch (relevant)
581         {
582         case vect_unused_in_scope:
583           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
586           break;
587 
588         case vect_used_by_reduction:
589 	case vect_used_only_live:
590           relevant = vect_used_in_outer_by_reduction;
591           break;
592 
593         case vect_used_in_scope:
594           relevant = vect_used_in_outer;
595           break;
596 
597         default:
598           gcc_unreachable ();
599         }
600     }
601   /* We are also not interested in uses on loop PHI backedges that are
602      inductions.  Otherwise we'll needlessly vectorize the IV increment
603      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
604      of course.  */
605   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 				      loop_latch_edge (bb->loop_father))
610 	       == use))
611     {
612       if (dump_enabled_p ())
613 	dump_printf_loc (MSG_NOTE, vect_location,
614                          "induction value on backedge.\n");
615       return opt_result::success ();
616     }
617 
618 
619   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620   return opt_result::success ();
621 }
622 
623 
624 /* Function vect_mark_stmts_to_be_vectorized.
625 
626    Not all stmts in the loop need to be vectorized. For example:
627 
628      for i...
629        for j...
630    1.    T0 = i + j
631    2.	 T1 = a[T0]
632 
633    3.    j = j + 1
634 
635    Stmt 1 and 3 do not need to be vectorized, because loop control and
636    addressing of vectorized data-refs are handled differently.
637 
638    This pass detects such stmts.  */
639 
640 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
642 {
643   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645   unsigned int nbbs = loop->num_nodes;
646   gimple_stmt_iterator si;
647   unsigned int i;
648   basic_block bb;
649   bool live_p;
650   enum vect_relevant relevant;
651 
652   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
653 
654   auto_vec<stmt_vec_info, 64> worklist;
655 
656   /* 1. Init worklist.  */
657   for (i = 0; i < nbbs; i++)
658     {
659       bb = bbs[i];
660       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
661 	{
662 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 	  if (dump_enabled_p ())
664 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 			     phi_info->stmt);
666 
667 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
669 	}
670       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671 	{
672 	  if (is_gimple_debug (gsi_stmt (si)))
673 	    continue;
674 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 	  if (dump_enabled_p ())
676 	      dump_printf_loc (MSG_NOTE, vect_location,
677 			       "init: stmt relevant? %G", stmt_info->stmt);
678 
679 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
681 	}
682     }
683 
684   /* 2. Process_worklist */
685   while (worklist.length () > 0)
686     {
687       use_operand_p use_p;
688       ssa_op_iter iter;
689 
690       stmt_vec_info stmt_vinfo = worklist.pop ();
691       if (dump_enabled_p ())
692 	dump_printf_loc (MSG_NOTE, vect_location,
693 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
694 
695       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 	 of STMT.  */
698       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699 
700       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 	 propagated as is to the DEF_STMTs of its USEs.
702 
703 	 One exception is when STMT has been identified as defining a reduction
704 	 variable; in this case we set the relevance to vect_used_by_reduction.
705 	 This is because we distinguish between two kinds of relevant stmts -
706 	 those that are used by a reduction computation, and those that are
707 	 (also) used by a regular computation.  This allows us later on to
708 	 identify stmts that are used solely by a reduction, and therefore the
709 	 order of the results that they produce does not have to be kept.  */
710 
711       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
712         {
713           case vect_reduction_def:
714 	    gcc_assert (relevant != vect_unused_in_scope);
715 	    if (relevant != vect_unused_in_scope
716 		&& relevant != vect_used_in_scope
717 		&& relevant != vect_used_by_reduction
718 		&& relevant != vect_used_only_live)
719 	      return opt_result::failure_at
720 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 	    break;
722 
723           case vect_nested_cycle:
724 	    if (relevant != vect_unused_in_scope
725 		&& relevant != vect_used_in_outer_by_reduction
726 		&& relevant != vect_used_in_outer)
727 	      return opt_result::failure_at
728 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729             break;
730 
731           case vect_double_reduction_def:
732 	    if (relevant != vect_unused_in_scope
733 		&& relevant != vect_used_by_reduction
734 		&& relevant != vect_used_only_live)
735 	      return opt_result::failure_at
736 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737             break;
738 
739           default:
740             break;
741         }
742 
743       if (is_pattern_stmt_p (stmt_vinfo))
744         {
745           /* Pattern statements are not inserted into the code, so
746              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747              have to scan the RHS or function arguments instead.  */
748 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
749 	    {
750 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 	      tree op = gimple_assign_rhs1 (assign);
752 
753 	      i = 1;
754 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
755 		{
756 		  opt_result res
757 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 				   loop_vinfo, relevant, &worklist, false);
759 		  if (!res)
760 		    return res;
761 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 				     loop_vinfo, relevant, &worklist, false);
763 		  if (!res)
764 		    return res;
765 		  i = 2;
766 		}
767 	      for (; i < gimple_num_ops (assign); i++)
768 		{
769 		  op = gimple_op (assign, i);
770                   if (TREE_CODE (op) == SSA_NAME)
771 		    {
772 		      opt_result res
773 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 				       &worklist, false);
775 		      if (!res)
776 			return res;
777 		    }
778                  }
779             }
780 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
781 	    {
782 	      for (i = 0; i < gimple_call_num_args (call); i++)
783 		{
784 		  tree arg = gimple_call_arg (call, i);
785 		  opt_result res
786 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 				   &worklist, false);
788 		  if (!res)
789 		    return res;
790 		}
791 	    }
792         }
793       else
794 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
795           {
796             tree op = USE_FROM_PTR (use_p);
797 	    opt_result res
798 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 			     &worklist, false);
800 	    if (!res)
801 	      return res;
802           }
803 
804       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 	{
806 	  gather_scatter_info gs_info;
807 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 	    gcc_unreachable ();
809 	  opt_result res
810 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 			   &worklist, true);
812 	  if (!res)
813 	    {
814 	      if (fatal)
815 		*fatal = false;
816 	      return res;
817 	    }
818 	}
819     } /* while worklist */
820 
821   return opt_result::success ();
822 }
823 
824 /* Function vect_model_simple_cost.
825 
826    Models cost for simple operations, i.e. those that only emit ncopies of a
827    single op.  Right now, this does not account for multiple insns that could
828    be generated for the single vector op.  We will handle that shortly.  */
829 
830 static void
vect_model_simple_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,slp_tree node,stmt_vector_for_cost * cost_vec,vect_cost_for_stmt kind=vector_stmt)831 vect_model_simple_cost (vec_info *,
832 			stmt_vec_info stmt_info, int ncopies,
833 			enum vect_def_type *dt,
834 			int ndts,
835 			slp_tree node,
836 			stmt_vector_for_cost *cost_vec,
837 			vect_cost_for_stmt kind = vector_stmt)
838 {
839   int inside_cost = 0, prologue_cost = 0;
840 
841   gcc_assert (cost_vec != NULL);
842 
843   /* ???  Somehow we need to fix this at the callers.  */
844   if (node)
845     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
846 
847   if (!node)
848     /* Cost the "broadcast" of a scalar operand in to a vector operand.
849        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850        cost model.  */
851     for (int i = 0; i < ndts; i++)
852       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 					   stmt_info, 0, vect_prologue);
855 
856   /* Pass the inside-of-loop statements to the target-specific cost model.  */
857   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 				   stmt_info, 0, vect_body);
859 
860   if (dump_enabled_p ())
861     dump_printf_loc (MSG_NOTE, vect_location,
862                      "vect_model_simple_cost: inside_cost = %d, "
863                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
864 }
865 
866 
867 /* Model cost for type demotion and promotion operations.  PWR is
868    normally zero for single-step promotions and demotions.  It will be
869    one if two-step promotion/demotion is required, and so on.  NCOPIES
870    is the number of vector results (and thus number of instructions)
871    for the narrowest end of the operation chain.  Each additional
872    step doubles the number of instructions required.  If WIDEN_ARITH
873    is true the stmt is doing widening arithmetic.  */
874 
875 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec,bool widen_arith)876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 				    enum vect_def_type *dt,
878 				    unsigned int ncopies, int pwr,
879 				    stmt_vector_for_cost *cost_vec,
880 				    bool widen_arith)
881 {
882   int i;
883   int inside_cost = 0, prologue_cost = 0;
884 
885   for (i = 0; i < pwr + 1; i++)
886     {
887       inside_cost += record_stmt_cost (cost_vec, ncopies,
888 				       widen_arith
889 				       ? vector_stmt : vec_promote_demote,
890 				       stmt_info, 0, vect_body);
891       ncopies *= 2;
892     }
893 
894   /* FORNOW: Assuming maximum 2 args per stmts.  */
895   for (i = 0; i < 2; i++)
896     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 					 stmt_info, 0, vect_prologue);
899 
900   if (dump_enabled_p ())
901     dump_printf_loc (MSG_NOTE, vect_location,
902                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
903                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
904 }
905 
906 /* Returns true if the current function returns DECL.  */
907 
908 static bool
cfun_returns(tree decl)909 cfun_returns (tree decl)
910 {
911   edge_iterator ei;
912   edge e;
913   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
914     {
915       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916       if (!ret)
917 	continue;
918       if (gimple_return_retval (ret) == decl)
919 	return true;
920       /* We often end up with an aggregate copy to the result decl,
921          handle that case as well.  First skip intermediate clobbers
922 	 though.  */
923       gimple *def = ret;
924       do
925 	{
926 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
927 	}
928       while (gimple_clobber_p (def));
929       if (is_a <gassign *> (def)
930 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 	  && gimple_assign_rhs1 (def) == decl)
932 	return true;
933     }
934   return false;
935 }
936 
937 /* Function vect_model_store_cost
938 
939    Models cost for stores.  In the case of grouped accesses, one access
940    has the overhead of the grouped access attributed to it.  */
941 
942 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 		       vect_memory_access_type memory_access_type,
945 		       dr_alignment_support alignment_support_scheme,
946 		       int misalignment,
947 		       vec_load_store_type vls_type, slp_tree slp_node,
948 		       stmt_vector_for_cost *cost_vec)
949 {
950   unsigned int inside_cost = 0, prologue_cost = 0;
951   stmt_vec_info first_stmt_info = stmt_info;
952   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
953 
954   /* ???  Somehow we need to fix this at the callers.  */
955   if (slp_node)
956     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
957 
958   if (vls_type == VLS_STORE_INVARIANT)
959     {
960       if (!slp_node)
961 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 					   stmt_info, 0, vect_prologue);
963     }
964 
965   /* Grouped stores update all elements in the group at once,
966      so we want the DR for the first statement.  */
967   if (!slp_node && grouped_access_p)
968     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
969 
970   /* True if we should include any once-per-group costs as well as
971      the cost of the statement itself.  For SLP we only get called
972      once per group anyhow.  */
973   bool first_stmt_p = (first_stmt_info == stmt_info);
974 
975   /* We assume that the cost of a single store-lanes instruction is
976      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
977      access is instead being provided by a permute-and-store operation,
978      include the cost of the permutes.  */
979   if (first_stmt_p
980       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
981     {
982       /* Uses a high and low interleave or shuffle operations for each
983 	 needed permute.  */
984       int group_size = DR_GROUP_SIZE (first_stmt_info);
985       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 				      stmt_info, 0, vect_body);
988 
989       if (dump_enabled_p ())
990         dump_printf_loc (MSG_NOTE, vect_location,
991                          "vect_model_store_cost: strided group_size = %d .\n",
992                          group_size);
993     }
994 
995   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996   /* Costs of the stores.  */
997   if (memory_access_type == VMAT_ELEMENTWISE
998       || memory_access_type == VMAT_GATHER_SCATTER)
999     {
1000       /* N scalar stores plus extracting the elements.  */
1001       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002       inside_cost += record_stmt_cost (cost_vec,
1003 				       ncopies * assumed_nunits,
1004 				       scalar_store, stmt_info, 0, vect_body);
1005     }
1006   else
1007     vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 			 misalignment, &inside_cost, cost_vec);
1009 
1010   if (memory_access_type == VMAT_ELEMENTWISE
1011       || memory_access_type == VMAT_STRIDED_SLP)
1012     {
1013       /* N scalar stores plus extracting the elements.  */
1014       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015       inside_cost += record_stmt_cost (cost_vec,
1016 				       ncopies * assumed_nunits,
1017 				       vec_to_scalar, stmt_info, 0, vect_body);
1018     }
1019 
1020   /* When vectorizing a store into the function result assign
1021      a penalty if the function returns in a multi-register location.
1022      In this case we assume we'll end up with having to spill the
1023      vector result and do piecewise loads as a conservative estimate.  */
1024   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025   if (base
1026       && (TREE_CODE (base) == RESULT_DECL
1027 	  || (DECL_P (base) && cfun_returns (base)))
1028       && !aggregate_value_p (base, cfun->decl))
1029     {
1030       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031       /* ???  Handle PARALLEL in some way.  */
1032       if (REG_P (reg))
1033 	{
1034 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 	  /* Assume that a single reg-reg move is possible and cheap,
1036 	     do not account for vector to gp register move cost.  */
1037 	  if (nregs > 1)
1038 	    {
1039 	      /* Spill.  */
1040 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 						 vector_store,
1042 						 stmt_info, 0, vect_epilogue);
1043 	      /* Loads.  */
1044 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 						 scalar_load,
1046 						 stmt_info, 0, vect_epilogue);
1047 	    }
1048 	}
1049     }
1050 
1051   if (dump_enabled_p ())
1052     dump_printf_loc (MSG_NOTE, vect_location,
1053                      "vect_model_store_cost: inside_cost = %d, "
1054                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1055 }
1056 
1057 
1058 /* Calculate cost of DR's memory access.  */
1059 void
vect_get_store_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 		     dr_alignment_support alignment_support_scheme,
1062 		     int misalignment,
1063 		     unsigned int *inside_cost,
1064 		     stmt_vector_for_cost *body_cost_vec)
1065 {
1066   switch (alignment_support_scheme)
1067     {
1068     case dr_aligned:
1069       {
1070 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 					  vector_store, stmt_info, 0,
1072 					  vect_body);
1073 
1074         if (dump_enabled_p ())
1075           dump_printf_loc (MSG_NOTE, vect_location,
1076                            "vect_model_store_cost: aligned.\n");
1077         break;
1078       }
1079 
1080     case dr_unaligned_supported:
1081       {
1082         /* Here, we assign an additional cost for the unaligned store.  */
1083 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 					  unaligned_store, stmt_info,
1085 					  misalignment, vect_body);
1086         if (dump_enabled_p ())
1087           dump_printf_loc (MSG_NOTE, vect_location,
1088                            "vect_model_store_cost: unaligned supported by "
1089                            "hardware.\n");
1090         break;
1091       }
1092 
1093     case dr_unaligned_unsupported:
1094       {
1095         *inside_cost = VECT_MAX_COST;
1096 
1097         if (dump_enabled_p ())
1098           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099                            "vect_model_store_cost: unsupported access.\n");
1100         break;
1101       }
1102 
1103     default:
1104       gcc_unreachable ();
1105     }
1106 }
1107 
1108 
1109 /* Function vect_model_load_cost
1110 
1111    Models cost for loads.  In the case of grouped accesses, one access has
1112    the overhead of the grouped access attributed to it.  Since unaligned
1113    accesses are supported for loads, we also account for the costs of the
1114    access scheme chosen.  */
1115 
1116 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,gather_scatter_info * gs_info,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1117 vect_model_load_cost (vec_info *vinfo,
1118 		      stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 		      vect_memory_access_type memory_access_type,
1120 		      dr_alignment_support alignment_support_scheme,
1121 		      int misalignment,
1122 		      gather_scatter_info *gs_info,
1123 		      slp_tree slp_node,
1124 		      stmt_vector_for_cost *cost_vec)
1125 {
1126   unsigned int inside_cost = 0, prologue_cost = 0;
1127   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1128 
1129   gcc_assert (cost_vec);
1130 
1131   /* ???  Somehow we need to fix this at the callers.  */
1132   if (slp_node)
1133     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1134 
1135   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1136     {
1137       /* If the load is permuted then the alignment is determined by
1138 	 the first group element not by the first scalar stmt DR.  */
1139       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140       /* Record the cost for the permutation.  */
1141       unsigned n_perms, n_loads;
1142       vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 				    vf, true, &n_perms, &n_loads);
1144       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 				       first_stmt_info, 0, vect_body);
1146 
1147       /* And adjust the number of loads performed.  This handles
1148 	 redundancies as well as loads that are later dead.  */
1149       ncopies = n_loads;
1150     }
1151 
1152   /* Grouped loads read all elements in the group at once,
1153      so we want the DR for the first statement.  */
1154   stmt_vec_info first_stmt_info = stmt_info;
1155   if (!slp_node && grouped_access_p)
1156     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1157 
1158   /* True if we should include any once-per-group costs as well as
1159      the cost of the statement itself.  For SLP we only get called
1160      once per group anyhow.  */
1161   bool first_stmt_p = (first_stmt_info == stmt_info);
1162 
1163   /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164      ones we actually need.  Account for the cost of unused results.  */
1165   if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1166     {
1167       unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168       stmt_vec_info next_stmt_info = first_stmt_info;
1169       do
1170 	{
1171 	  gaps -= 1;
1172 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1173 	}
1174       while (next_stmt_info);
1175       if (gaps)
1176 	{
1177 	  if (dump_enabled_p ())
1178 	    dump_printf_loc (MSG_NOTE, vect_location,
1179 			     "vect_model_load_cost: %d unused vectors.\n",
1180 			     gaps);
1181 	  vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 			      alignment_support_scheme, misalignment, false,
1183 			      &inside_cost, &prologue_cost,
1184 			      cost_vec, cost_vec, true);
1185 	}
1186     }
1187 
1188   /* We assume that the cost of a single load-lanes instruction is
1189      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1190      access is instead being provided by a load-and-permute operation,
1191      include the cost of the permutes.  */
1192   if (first_stmt_p
1193       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1194     {
1195       /* Uses an even and odd extract operations or shuffle operations
1196 	 for each needed permute.  */
1197       int group_size = DR_GROUP_SIZE (first_stmt_info);
1198       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 				       stmt_info, 0, vect_body);
1201 
1202       if (dump_enabled_p ())
1203         dump_printf_loc (MSG_NOTE, vect_location,
1204                          "vect_model_load_cost: strided group_size = %d .\n",
1205                          group_size);
1206     }
1207 
1208   /* The loads themselves.  */
1209   if (memory_access_type == VMAT_ELEMENTWISE
1210       || memory_access_type == VMAT_GATHER_SCATTER)
1211     {
1212       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214       if (memory_access_type == VMAT_GATHER_SCATTER
1215 	  && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 	/* For emulated gathers N offset vector element extracts
1217 	   (we assume the scalar scaling and ptr + offset add is consumed by
1218 	   the load).  */
1219 	inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 					 vec_to_scalar, stmt_info, 0,
1221 					 vect_body);
1222       /* N scalar loads plus gathering them into a vector.  */
1223       inside_cost += record_stmt_cost (cost_vec,
1224 				       ncopies * assumed_nunits,
1225 				       scalar_load, stmt_info, 0, vect_body);
1226     }
1227   else if (memory_access_type == VMAT_INVARIANT)
1228     {
1229       /* Invariant loads will ideally be hoisted and splat to a vector.  */
1230       prologue_cost += record_stmt_cost (cost_vec, 1,
1231 					 scalar_load, stmt_info, 0,
1232 					 vect_prologue);
1233       prologue_cost += record_stmt_cost (cost_vec, 1,
1234 					 scalar_to_vec, stmt_info, 0,
1235 					 vect_prologue);
1236     }
1237   else
1238     vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 			alignment_support_scheme, misalignment, first_stmt_p,
1240 			&inside_cost, &prologue_cost,
1241 			cost_vec, cost_vec, true);
1242   if (memory_access_type == VMAT_ELEMENTWISE
1243       || memory_access_type == VMAT_STRIDED_SLP
1244       || (memory_access_type == VMAT_GATHER_SCATTER
1245 	  && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 				     stmt_info, 0, vect_body);
1248 
1249   if (dump_enabled_p ())
1250     dump_printf_loc (MSG_NOTE, vect_location,
1251                      "vect_model_load_cost: inside_cost = %d, "
1252                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1253 }
1254 
1255 
1256 /* Calculate cost of DR's memory access.  */
1257 void
vect_get_load_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 		    dr_alignment_support alignment_support_scheme,
1260 		    int misalignment,
1261 		    bool add_realign_cost, unsigned int *inside_cost,
1262 		    unsigned int *prologue_cost,
1263 		    stmt_vector_for_cost *prologue_cost_vec,
1264 		    stmt_vector_for_cost *body_cost_vec,
1265 		    bool record_prologue_costs)
1266 {
1267   switch (alignment_support_scheme)
1268     {
1269     case dr_aligned:
1270       {
1271 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 					  stmt_info, 0, vect_body);
1273 
1274         if (dump_enabled_p ())
1275           dump_printf_loc (MSG_NOTE, vect_location,
1276                            "vect_model_load_cost: aligned.\n");
1277 
1278         break;
1279       }
1280     case dr_unaligned_supported:
1281       {
1282         /* Here, we assign an additional cost for the unaligned load.  */
1283 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 					  unaligned_load, stmt_info,
1285 					  misalignment, vect_body);
1286 
1287         if (dump_enabled_p ())
1288           dump_printf_loc (MSG_NOTE, vect_location,
1289                            "vect_model_load_cost: unaligned supported by "
1290                            "hardware.\n");
1291 
1292         break;
1293       }
1294     case dr_explicit_realign:
1295       {
1296 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 					  vector_load, stmt_info, 0, vect_body);
1298 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 					  vec_perm, stmt_info, 0, vect_body);
1300 
1301         /* FIXME: If the misalignment remains fixed across the iterations of
1302            the containing loop, the following cost should be added to the
1303            prologue costs.  */
1304         if (targetm.vectorize.builtin_mask_for_load)
1305 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 					    stmt_info, 0, vect_body);
1307 
1308         if (dump_enabled_p ())
1309           dump_printf_loc (MSG_NOTE, vect_location,
1310                            "vect_model_load_cost: explicit realign\n");
1311 
1312         break;
1313       }
1314     case dr_explicit_realign_optimized:
1315       {
1316         if (dump_enabled_p ())
1317           dump_printf_loc (MSG_NOTE, vect_location,
1318                            "vect_model_load_cost: unaligned software "
1319                            "pipelined.\n");
1320 
1321         /* Unaligned software pipeline has a load of an address, an initial
1322            load, and possibly a mask operation to "prime" the loop.  However,
1323            if this is an access in a group of loads, which provide grouped
1324            access, then the above cost should only be considered for one
1325            access in the group.  Inside the loop, there is a load op
1326            and a realignment op.  */
1327 
1328         if (add_realign_cost && record_prologue_costs)
1329           {
1330 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 						vector_stmt, stmt_info,
1332 						0, vect_prologue);
1333             if (targetm.vectorize.builtin_mask_for_load)
1334 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 						  vector_stmt, stmt_info,
1336 						  0, vect_prologue);
1337           }
1338 
1339 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 					  stmt_info, 0, vect_body);
1341 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 					  stmt_info, 0, vect_body);
1343 
1344         if (dump_enabled_p ())
1345           dump_printf_loc (MSG_NOTE, vect_location,
1346                            "vect_model_load_cost: explicit realign optimized"
1347                            "\n");
1348 
1349         break;
1350       }
1351 
1352     case dr_unaligned_unsupported:
1353       {
1354         *inside_cost = VECT_MAX_COST;
1355 
1356         if (dump_enabled_p ())
1357           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358                            "vect_model_load_cost: unsupported access.\n");
1359         break;
1360       }
1361 
1362     default:
1363       gcc_unreachable ();
1364     }
1365 }
1366 
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368    the loop preheader for the vectorized stmt STMT_VINFO.  */
1369 
1370 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 		    gimple_stmt_iterator *gsi)
1373 {
1374   if (gsi)
1375     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376   else
1377     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1378 
1379   if (dump_enabled_p ())
1380     dump_printf_loc (MSG_NOTE, vect_location,
1381 		     "created new init_stmt: %G", new_stmt);
1382 }
1383 
1384 /* Function vect_init_vector.
1385 
1386    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1388    vector type a vector with all elements equal to VAL is created first.
1389    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1390    initialization at the loop preheader.
1391    Return the DEF of INIT_STMT.
1392    It will be used in the vectorization of STMT_INFO.  */
1393 
1394 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 		  gimple_stmt_iterator *gsi)
1397 {
1398   gimple *init_stmt;
1399   tree new_temp;
1400 
1401   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1402   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1403     {
1404       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1406 	{
1407 	  /* Scalar boolean value should be transformed into
1408 	     all zeros or all ones value before building a vector.  */
1409 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1410 	    {
1411 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1413 
1414 	      if (CONSTANT_CLASS_P (val))
1415 		val = integer_zerop (val) ? false_val : true_val;
1416 	      else
1417 		{
1418 		  new_temp = make_ssa_name (TREE_TYPE (type));
1419 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 						   val, true_val, false_val);
1421 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 		  val = new_temp;
1423 		}
1424 	    }
1425 	  else
1426 	    {
1427 	      gimple_seq stmts = NULL;
1428 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 				    TREE_TYPE (type), val);
1431 	      else
1432 		/* ???  Condition vectorization expects us to do
1433 		   promotion of invariant/external defs.  */
1434 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 		   !gsi_end_p (gsi2); )
1437 		{
1438 		  init_stmt = gsi_stmt (gsi2);
1439 		  gsi_remove (&gsi2, false);
1440 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1441 		}
1442 	    }
1443 	}
1444       val = build_vector_from_val (type, val);
1445     }
1446 
1447   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448   init_stmt = gimple_build_assign (new_temp, val);
1449   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450   return new_temp;
1451 }
1452 
1453 
1454 /* Function vect_get_vec_defs_for_operand.
1455 
1456    OP is an operand in STMT_VINFO.  This function returns a vector of
1457    NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1458 
1459    In the case that OP is an SSA_NAME which is defined in the loop, then
1460    STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1461 
1462    In case OP is an invariant or constant, a new stmt that creates a vector def
1463    needs to be introduced.  VECTYPE may be used to specify a required type for
1464    vector invariant.  */
1465 
1466 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 			       unsigned ncopies,
1469 			       tree op, vec<tree> *vec_oprnds, tree vectype)
1470 {
1471   gimple *def_stmt;
1472   enum vect_def_type dt;
1473   bool is_simple_use;
1474   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1475 
1476   if (dump_enabled_p ())
1477     dump_printf_loc (MSG_NOTE, vect_location,
1478 		     "vect_get_vec_defs_for_operand: %T\n", op);
1479 
1480   stmt_vec_info def_stmt_info;
1481   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 				      &def_stmt_info, &def_stmt);
1483   gcc_assert (is_simple_use);
1484   if (def_stmt && dump_enabled_p ())
1485     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1486 
1487   vec_oprnds->create (ncopies);
1488   if (dt == vect_constant_def || dt == vect_external_def)
1489     {
1490       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491       tree vector_type;
1492 
1493       if (vectype)
1494 	vector_type = vectype;
1495       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 	vector_type = truth_type_for (stmt_vectype);
1498       else
1499 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1500 
1501       gcc_assert (vector_type);
1502       tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503       while (ncopies--)
1504 	vec_oprnds->quick_push (vop);
1505     }
1506   else
1507     {
1508       def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509       gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510       for (unsigned i = 0; i < ncopies; ++i)
1511 	vec_oprnds->quick_push (gimple_get_lhs
1512 				  (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1513     }
1514 }
1515 
1516 
1517 /* Get vectorized definitions for OP0 and OP1.  */
1518 
1519 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 		   unsigned ncopies,
1522 		   tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 		   tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 		   tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 		   tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1526 {
1527   if (slp_node)
1528     {
1529       if (op0)
1530 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531       if (op1)
1532 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533       if (op2)
1534 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535       if (op3)
1536 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1537     }
1538   else
1539     {
1540       if (op0)
1541 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 				       op0, vec_oprnds0, vectype0);
1543       if (op1)
1544 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 				       op1, vec_oprnds1, vectype1);
1546       if (op2)
1547 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 				       op2, vec_oprnds2, vectype2);
1549       if (op3)
1550 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 				       op3, vec_oprnds3, vectype3);
1552     }
1553 }
1554 
1555 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 		   unsigned ncopies,
1558 		   tree op0, vec<tree> *vec_oprnds0,
1559 		   tree op1, vec<tree> *vec_oprnds1,
1560 		   tree op2, vec<tree> *vec_oprnds2,
1561 		   tree op3, vec<tree> *vec_oprnds3)
1562 {
1563   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 		     op0, vec_oprnds0, NULL_TREE,
1565 		     op1, vec_oprnds1, NULL_TREE,
1566 		     op2, vec_oprnds2, NULL_TREE,
1567 		     op3, vec_oprnds3, NULL_TREE);
1568 }
1569 
1570 /* Helper function called by vect_finish_replace_stmt and
1571    vect_finish_stmt_generation.  Set the location of the new
1572    statement and create and return a stmt_vec_info for it.  */
1573 
1574 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1575 vect_finish_stmt_generation_1 (vec_info *,
1576 			       stmt_vec_info stmt_info, gimple *vec_stmt)
1577 {
1578   if (dump_enabled_p ())
1579     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1580 
1581   if (stmt_info)
1582     {
1583       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1584 
1585       /* While EH edges will generally prevent vectorization, stmt might
1586 	 e.g. be in a must-not-throw region.  Ensure newly created stmts
1587 	 that could throw are part of the same region.  */
1588       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 	add_stmt_to_eh_lp (vec_stmt, lp_nr);
1591     }
1592   else
1593     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1594 }
1595 
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597    which sets the same scalar result as STMT_INFO did.  Create and return a
1598    stmt_vec_info for VEC_STMT.  */
1599 
1600 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 			  stmt_vec_info stmt_info, gimple *vec_stmt)
1603 {
1604   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1606 
1607   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608   gsi_replace (&gsi, vec_stmt, true);
1609 
1610   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1611 }
1612 
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1615 
1616 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 			     stmt_vec_info stmt_info, gimple *vec_stmt,
1619 			     gimple_stmt_iterator *gsi)
1620 {
1621   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1622 
1623   if (!gsi_end_p (*gsi)
1624       && gimple_has_mem_ops (vec_stmt))
1625     {
1626       gimple *at_stmt = gsi_stmt (*gsi);
1627       tree vuse = gimple_vuse (at_stmt);
1628       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1629 	{
1630 	  tree vdef = gimple_vdef (at_stmt);
1631 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 	  gimple_set_modified (vec_stmt, true);
1633 	  /* If we have an SSA vuse and insert a store, update virtual
1634 	     SSA form to avoid triggering the renamer.  Do so only
1635 	     if we can easily see all uses - which is what almost always
1636 	     happens with the way vectorized stmts are inserted.  */
1637 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 	      && ((is_gimple_assign (vec_stmt)
1639 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 		  || (is_gimple_call (vec_stmt)
1641 		      && !(gimple_call_flags (vec_stmt)
1642 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1643 	    {
1644 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1645 	      gimple_set_vdef (vec_stmt, new_vdef);
1646 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1647 	    }
1648 	}
1649     }
1650   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1651   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1652 }
1653 
1654 /* We want to vectorize a call to combined function CFN with function
1655    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656    as the types of all inputs.  Check whether this is possible using
1657    an internal function, returning its code if so or IFN_LAST if not.  */
1658 
1659 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661 				tree vectype_out, tree vectype_in)
1662 {
1663   internal_fn ifn;
1664   if (internal_fn_p (cfn))
1665     ifn = as_internal_fn (cfn);
1666   else
1667     ifn = associated_internal_fn (fndecl);
1668   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1669     {
1670       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671       if (info.vectorizable)
1672 	{
1673 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676 					      OPTIMIZE_FOR_SPEED))
1677 	    return ifn;
1678 	}
1679     }
1680   return IFN_LAST;
1681 }
1682 
1683 
1684 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1685 				  gimple_stmt_iterator *);
1686 
1687 /* Check whether a load or store statement in the loop described by
1688    LOOP_VINFO is possible in a loop using partial vectors.  This is
1689    testing whether the vectorizer pass has the appropriate support,
1690    as well as whether the target does.
1691 
1692    VLS_TYPE says whether the statement is a load or store and VECTYPE
1693    is the type of the vector being loaded or stored.  SLP_NODE is the SLP
1694    node that contains the statement, or null if none.  MEMORY_ACCESS_TYPE
1695    says how the load or store is going to be implemented and GROUP_SIZE
1696    is the number of load or store statements in the containing group.
1697    If the access is a gather load or scatter store, GS_INFO describes
1698    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1699    condition under which it occurs.
1700 
1701    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1702    vectors is not supported, otherwise record the required rgroup control
1703    types.  */
1704 
1705 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,slp_tree slp_node,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1706 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1707 				      slp_tree slp_node,
1708 				      vec_load_store_type vls_type,
1709 				      int group_size,
1710 				      vect_memory_access_type
1711 				      memory_access_type,
1712 				      gather_scatter_info *gs_info,
1713 				      tree scalar_mask)
1714 {
1715   /* Invariant loads need no special support.  */
1716   if (memory_access_type == VMAT_INVARIANT)
1717     return;
1718 
1719   unsigned int nvectors;
1720   if (slp_node)
1721     nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1722   else
1723     nvectors = vect_get_num_copies (loop_vinfo, vectype);
1724 
1725   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1726   machine_mode vecmode = TYPE_MODE (vectype);
1727   bool is_load = (vls_type == VLS_LOAD);
1728   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1729     {
1730       if (is_load
1731 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1732 	  : !vect_store_lanes_supported (vectype, group_size, true))
1733 	{
1734 	  if (dump_enabled_p ())
1735 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 			     "can't operate on partial vectors because"
1737 			     " the target doesn't have an appropriate"
1738 			     " load/store-lanes instruction.\n");
1739 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1740 	  return;
1741 	}
1742       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1743 			     scalar_mask);
1744       return;
1745     }
1746 
1747   if (memory_access_type == VMAT_GATHER_SCATTER)
1748     {
1749       internal_fn ifn = (is_load
1750 			 ? IFN_MASK_GATHER_LOAD
1751 			 : IFN_MASK_SCATTER_STORE);
1752       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1753 						   gs_info->memory_type,
1754 						   gs_info->offset_vectype,
1755 						   gs_info->scale))
1756 	{
1757 	  if (dump_enabled_p ())
1758 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1759 			     "can't operate on partial vectors because"
1760 			     " the target doesn't have an appropriate"
1761 			     " gather load or scatter store instruction.\n");
1762 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1763 	  return;
1764 	}
1765       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1766 			     scalar_mask);
1767       return;
1768     }
1769 
1770   if (memory_access_type != VMAT_CONTIGUOUS
1771       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1772     {
1773       /* Element X of the data must come from iteration i * VF + X of the
1774 	 scalar loop.  We need more work to support other mappings.  */
1775       if (dump_enabled_p ())
1776 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1777 			 "can't operate on partial vectors because an"
1778 			 " access isn't contiguous.\n");
1779       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1780       return;
1781     }
1782 
1783   if (!VECTOR_MODE_P (vecmode))
1784     {
1785       if (dump_enabled_p ())
1786 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 			 "can't operate on partial vectors when emulating"
1788 			 " vector operations.\n");
1789       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1790       return;
1791     }
1792 
1793   /* We might load more scalars than we need for permuting SLP loads.
1794      We checked in get_group_load_store_type that the extra elements
1795      don't leak into a new vector.  */
1796   auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1797   {
1798     unsigned int nvectors;
1799     if (can_div_away_from_zero_p (size, nunits, &nvectors))
1800       return nvectors;
1801     gcc_unreachable ();
1802   };
1803 
1804   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1805   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1806   machine_mode mask_mode;
1807   bool using_partial_vectors_p = false;
1808   if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1809       && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1810     {
1811       nvectors = group_memory_nvectors (group_size * vf, nunits);
1812       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1813       using_partial_vectors_p = true;
1814     }
1815 
1816   machine_mode vmode;
1817   if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1818     {
1819       nvectors = group_memory_nvectors (group_size * vf, nunits);
1820       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1821       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1822       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1823       using_partial_vectors_p = true;
1824     }
1825 
1826   if (!using_partial_vectors_p)
1827     {
1828       if (dump_enabled_p ())
1829 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1830 			 "can't operate on partial vectors because the"
1831 			 " target doesn't have the appropriate partial"
1832 			 " vectorization load or store.\n");
1833       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1834     }
1835 }
1836 
1837 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1838    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1839    that needs to be applied to all loads and stores in a vectorized loop.
1840    Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1841    otherwise return VEC_MASK & LOOP_MASK.
1842 
1843    MASK_TYPE is the type of both masks.  If new statements are needed,
1844    insert them before GSI.  */
1845 
1846 static tree
prepare_vec_mask(loop_vec_info loop_vinfo,tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1847 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1848 		  tree vec_mask, gimple_stmt_iterator *gsi)
1849 {
1850   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1851   if (!loop_mask)
1852     return vec_mask;
1853 
1854   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1855 
1856   if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1857     return vec_mask;
1858 
1859   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1860   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1861 					  vec_mask, loop_mask);
1862 
1863   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1864   return and_res;
1865 }
1866 
1867 /* Determine whether we can use a gather load or scatter store to vectorize
1868    strided load or store STMT_INFO by truncating the current offset to a
1869    smaller width.  We need to be able to construct an offset vector:
1870 
1871      { 0, X, X*2, X*3, ... }
1872 
1873    without loss of precision, where X is STMT_INFO's DR_STEP.
1874 
1875    Return true if this is possible, describing the gather load or scatter
1876    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1877 
1878 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1879 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1880 				     loop_vec_info loop_vinfo, bool masked_p,
1881 				     gather_scatter_info *gs_info)
1882 {
1883   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1884   data_reference *dr = dr_info->dr;
1885   tree step = DR_STEP (dr);
1886   if (TREE_CODE (step) != INTEGER_CST)
1887     {
1888       /* ??? Perhaps we could use range information here?  */
1889       if (dump_enabled_p ())
1890 	dump_printf_loc (MSG_NOTE, vect_location,
1891 			 "cannot truncate variable step.\n");
1892       return false;
1893     }
1894 
1895   /* Get the number of bits in an element.  */
1896   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1897   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1898   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1899 
1900   /* Set COUNT to the upper limit on the number of elements - 1.
1901      Start with the maximum vectorization factor.  */
1902   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1903 
1904   /* Try lowering COUNT to the number of scalar latch iterations.  */
1905   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1906   widest_int max_iters;
1907   if (max_loop_iterations (loop, &max_iters)
1908       && max_iters < count)
1909     count = max_iters.to_shwi ();
1910 
1911   /* Try scales of 1 and the element size.  */
1912   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1913   wi::overflow_type overflow = wi::OVF_NONE;
1914   for (int i = 0; i < 2; ++i)
1915     {
1916       int scale = scales[i];
1917       widest_int factor;
1918       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1919 	continue;
1920 
1921       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
1922       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1923       if (overflow)
1924 	continue;
1925       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1926       unsigned int min_offset_bits = wi::min_precision (range, sign);
1927 
1928       /* Find the narrowest viable offset type.  */
1929       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1930       tree offset_type = build_nonstandard_integer_type (offset_bits,
1931 							 sign == UNSIGNED);
1932 
1933       /* See whether the target supports the operation with an offset
1934 	 no narrower than OFFSET_TYPE.  */
1935       tree memory_type = TREE_TYPE (DR_REF (dr));
1936       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1937 				     vectype, memory_type, offset_type, scale,
1938 				     &gs_info->ifn, &gs_info->offset_vectype)
1939 	  || gs_info->ifn == IFN_LAST)
1940 	continue;
1941 
1942       gs_info->decl = NULL_TREE;
1943       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1944 	 but we don't need to store that here.  */
1945       gs_info->base = NULL_TREE;
1946       gs_info->element_type = TREE_TYPE (vectype);
1947       gs_info->offset = fold_convert (offset_type, step);
1948       gs_info->offset_dt = vect_constant_def;
1949       gs_info->scale = scale;
1950       gs_info->memory_type = memory_type;
1951       return true;
1952     }
1953 
1954   if (overflow && dump_enabled_p ())
1955     dump_printf_loc (MSG_NOTE, vect_location,
1956 		     "truncating gather/scatter offset to %d bits"
1957 		     " might change its value.\n", element_bits);
1958 
1959   return false;
1960 }
1961 
1962 /* Return true if we can use gather/scatter internal functions to
1963    vectorize STMT_INFO, which is a grouped or strided load or store.
1964    MASKED_P is true if load or store is conditional.  When returning
1965    true, fill in GS_INFO with the information required to perform the
1966    operation.  */
1967 
1968 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1969 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1970 				    loop_vec_info loop_vinfo, bool masked_p,
1971 				    gather_scatter_info *gs_info)
1972 {
1973   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1974       || gs_info->ifn == IFN_LAST)
1975     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1976 						masked_p, gs_info);
1977 
1978   tree old_offset_type = TREE_TYPE (gs_info->offset);
1979   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1980 
1981   gcc_assert (TYPE_PRECISION (new_offset_type)
1982 	      >= TYPE_PRECISION (old_offset_type));
1983   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1984 
1985   if (dump_enabled_p ())
1986     dump_printf_loc (MSG_NOTE, vect_location,
1987 		     "using gather/scatter for strided/grouped access,"
1988 		     " scale = %d\n", gs_info->scale);
1989 
1990   return true;
1991 }
1992 
1993 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1994    elements with a known constant step.  Return -1 if that step
1995    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1996 
1997 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1998 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1999 {
2000   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2001   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2002 			       size_zero_node);
2003 }
2004 
2005 /* If the target supports a permute mask that reverses the elements in
2006    a vector of type VECTYPE, return that mask, otherwise return null.  */
2007 
2008 static tree
perm_mask_for_reverse(tree vectype)2009 perm_mask_for_reverse (tree vectype)
2010 {
2011   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2012 
2013   /* The encoding has a single stepped pattern.  */
2014   vec_perm_builder sel (nunits, 1, 3);
2015   for (int i = 0; i < 3; ++i)
2016     sel.quick_push (nunits - 1 - i);
2017 
2018   vec_perm_indices indices (sel, 1, nunits);
2019   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2020     return NULL_TREE;
2021   return vect_gen_perm_mask_checked (vectype, indices);
2022 }
2023 
2024 /* A subroutine of get_load_store_type, with a subset of the same
2025    arguments.  Handle the case where STMT_INFO is a load or store that
2026    accesses consecutive elements with a negative step.  Sets *POFFSET
2027    to the offset to be applied to the DR for the first access.  */
2028 
2029 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies,poly_int64 * poffset)2030 get_negative_load_store_type (vec_info *vinfo,
2031 			      stmt_vec_info stmt_info, tree vectype,
2032 			      vec_load_store_type vls_type,
2033 			      unsigned int ncopies, poly_int64 *poffset)
2034 {
2035   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2036   dr_alignment_support alignment_support_scheme;
2037 
2038   if (ncopies > 1)
2039     {
2040       if (dump_enabled_p ())
2041 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042 			 "multiple types with negative step.\n");
2043       return VMAT_ELEMENTWISE;
2044     }
2045 
2046   /* For backward running DRs the first access in vectype actually is
2047      N-1 elements before the address of the DR.  */
2048   *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2049 	      * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2050 
2051   int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2052   alignment_support_scheme
2053     = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2054   if (alignment_support_scheme != dr_aligned
2055       && alignment_support_scheme != dr_unaligned_supported)
2056     {
2057       if (dump_enabled_p ())
2058 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059 			 "negative step but alignment required.\n");
2060       *poffset = 0;
2061       return VMAT_ELEMENTWISE;
2062     }
2063 
2064   if (vls_type == VLS_STORE_INVARIANT)
2065     {
2066       if (dump_enabled_p ())
2067 	dump_printf_loc (MSG_NOTE, vect_location,
2068 			 "negative step with invariant source;"
2069 			 " no permute needed.\n");
2070       return VMAT_CONTIGUOUS_DOWN;
2071     }
2072 
2073   if (!perm_mask_for_reverse (vectype))
2074     {
2075       if (dump_enabled_p ())
2076 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 			 "negative step and reversing not supported.\n");
2078       *poffset = 0;
2079       return VMAT_ELEMENTWISE;
2080     }
2081 
2082   return VMAT_CONTIGUOUS_REVERSE;
2083 }
2084 
2085 /* STMT_INFO is either a masked or unconditional store.  Return the value
2086    being stored.  */
2087 
2088 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2089 vect_get_store_rhs (stmt_vec_info stmt_info)
2090 {
2091   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2092     {
2093       gcc_assert (gimple_assign_single_p (assign));
2094       return gimple_assign_rhs1 (assign);
2095     }
2096   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2097     {
2098       internal_fn ifn = gimple_call_internal_fn (call);
2099       int index = internal_fn_stored_value_index (ifn);
2100       gcc_assert (index >= 0);
2101       return gimple_call_arg (call, index);
2102     }
2103   gcc_unreachable ();
2104 }
2105 
2106 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2107 
2108    This function returns a vector type which can be composed with NETLS pieces,
2109    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2110    same vector size as the return vector.  It checks target whether supports
2111    pieces-size vector mode for construction firstly, if target fails to, check
2112    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2113    fails to find the available composition.
2114 
2115    For example, for (vtype=V16QI, nelts=4), we can probably get:
2116      - V16QI with PTYPE V4QI.
2117      - V4SI with PTYPE SI.
2118      - NULL_TREE.  */
2119 
2120 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2121 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2122 {
2123   gcc_assert (VECTOR_TYPE_P (vtype));
2124   gcc_assert (known_gt (nelts, 0U));
2125 
2126   machine_mode vmode = TYPE_MODE (vtype);
2127   if (!VECTOR_MODE_P (vmode))
2128     return NULL_TREE;
2129 
2130   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2131   unsigned int pbsize;
2132   if (constant_multiple_p (vbsize, nelts, &pbsize))
2133     {
2134       /* First check if vec_init optab supports construction from
2135 	 vector pieces directly.  */
2136       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2137       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2138       machine_mode rmode;
2139       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2140 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
2141 	      != CODE_FOR_nothing))
2142 	{
2143 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2144 	  return vtype;
2145 	}
2146 
2147       /* Otherwise check if exists an integer type of the same piece size and
2148 	 if vec_init optab supports construction from it directly.  */
2149       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2150 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2151 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
2152 	      != CODE_FOR_nothing))
2153 	{
2154 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
2155 	  return build_vector_type (*ptype, nelts);
2156 	}
2157     }
2158 
2159   return NULL_TREE;
2160 }
2161 
2162 /* A subroutine of get_load_store_type, with a subset of the same
2163    arguments.  Handle the case where STMT_INFO is part of a grouped load
2164    or store.
2165 
2166    For stores, the statements in the group are all consecutive
2167    and there is no gap at the end.  For loads, the statements in the
2168    group might not be consecutive; there can be gaps between statements
2169    as well as at the end.  */
2170 
2171 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2172 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2173 			   tree vectype, slp_tree slp_node,
2174 			   bool masked_p, vec_load_store_type vls_type,
2175 			   vect_memory_access_type *memory_access_type,
2176 			   poly_int64 *poffset,
2177 			   dr_alignment_support *alignment_support_scheme,
2178 			   int *misalignment,
2179 			   gather_scatter_info *gs_info)
2180 {
2181   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2182   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2183   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2184   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2185   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2186   bool single_element_p = (stmt_info == first_stmt_info
2187 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2188   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2189   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2190 
2191   /* True if the vectorized statements would access beyond the last
2192      statement in the group.  */
2193   bool overrun_p = false;
2194 
2195   /* True if we can cope with such overrun by peeling for gaps, so that
2196      there is at least one final scalar iteration after the vector loop.  */
2197   bool can_overrun_p = (!masked_p
2198 			&& vls_type == VLS_LOAD
2199 			&& loop_vinfo
2200 			&& !loop->inner);
2201 
2202   /* There can only be a gap at the end of the group if the stride is
2203      known at compile time.  */
2204   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2205 
2206   /* Stores can't yet have gaps.  */
2207   gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2208 
2209   if (slp_node)
2210     {
2211       /* For SLP vectorization we directly vectorize a subchain
2212 	 without permutation.  */
2213       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2214 	first_dr_info
2215 	  = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2216       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2217 	{
2218 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2219 	     separated by the stride, until we have a complete vector.
2220 	     Fall back to scalar accesses if that isn't possible.  */
2221 	  if (multiple_p (nunits, group_size))
2222 	    *memory_access_type = VMAT_STRIDED_SLP;
2223 	  else
2224 	    *memory_access_type = VMAT_ELEMENTWISE;
2225 	}
2226       else
2227 	{
2228 	  overrun_p = loop_vinfo && gap != 0;
2229 	  if (overrun_p && vls_type != VLS_LOAD)
2230 	    {
2231 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2232 			       "Grouped store with gaps requires"
2233 			       " non-consecutive accesses\n");
2234 	      return false;
2235 	    }
2236 	  /* An overrun is fine if the trailing elements are smaller
2237 	     than the alignment boundary B.  Every vector access will
2238 	     be a multiple of B and so we are guaranteed to access a
2239 	     non-gap element in the same B-sized block.  */
2240 	  if (overrun_p
2241 	      && gap < (vect_known_alignment_in_bytes (first_dr_info,
2242 						       vectype)
2243 			/ vect_get_scalar_dr_size (first_dr_info)))
2244 	    overrun_p = false;
2245 
2246 	  /* If the gap splits the vector in half and the target
2247 	     can do half-vector operations avoid the epilogue peeling
2248 	     by simply loading half of the vector only.  Usually
2249 	     the construction with an upper zero half will be elided.  */
2250 	  dr_alignment_support alss;
2251 	  int misalign = dr_misalignment (first_dr_info, vectype);
2252 	  tree half_vtype;
2253 	  if (overrun_p
2254 	      && !masked_p
2255 	      && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2256 							  vectype, misalign)))
2257 		   == dr_aligned
2258 		  || alss == dr_unaligned_supported)
2259 	      && known_eq (nunits, (group_size - gap) * 2)
2260 	      && known_eq (nunits, group_size)
2261 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
2262 		  != NULL_TREE))
2263 	    overrun_p = false;
2264 
2265 	  if (overrun_p && !can_overrun_p)
2266 	    {
2267 	      if (dump_enabled_p ())
2268 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 				 "Peeling for outer loop is not supported\n");
2270 	      return false;
2271 	    }
2272 	  int cmp = compare_step_with_zero (vinfo, stmt_info);
2273 	  if (cmp < 0)
2274 	    {
2275 	      if (single_element_p)
2276 		/* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
2277 		   only correct for single element "interleaving" SLP.  */
2278 		*memory_access_type = get_negative_load_store_type
2279 			     (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2280 	      else
2281 		{
2282 		  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2283 		     separated by the stride, until we have a complete vector.
2284 		     Fall back to scalar accesses if that isn't possible.  */
2285 		  if (multiple_p (nunits, group_size))
2286 		    *memory_access_type = VMAT_STRIDED_SLP;
2287 		  else
2288 		    *memory_access_type = VMAT_ELEMENTWISE;
2289 		}
2290 	    }
2291 	  else
2292 	    {
2293 	      gcc_assert (!loop_vinfo || cmp > 0);
2294 	      *memory_access_type = VMAT_CONTIGUOUS;
2295 	    }
2296 
2297 	  /* When we have a contiguous access across loop iterations
2298 	     but the access in the loop doesn't cover the full vector
2299 	     we can end up with no gap recorded but still excess
2300 	     elements accessed, see PR103116.  Make sure we peel for
2301 	     gaps if necessary and sufficient and give up if not.  */
2302 	  if (loop_vinfo
2303 	      && *memory_access_type == VMAT_CONTIGUOUS
2304 	      && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2305 	      && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2306 			      nunits))
2307 	    {
2308 	      unsigned HOST_WIDE_INT cnunits, cvf;
2309 	      if (!can_overrun_p
2310 		  || !nunits.is_constant (&cnunits)
2311 		  || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2312 		  /* Peeling for gaps assumes that a single scalar iteration
2313 		     is enough to make sure the last vector iteration doesn't
2314 		     access excess elements.
2315 		     ???  Enhancements include peeling multiple iterations
2316 		     or using masked loads with a static mask.  */
2317 		  || (group_size * cvf) % cnunits + group_size < cnunits)
2318 		{
2319 		  if (dump_enabled_p ())
2320 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 				     "peeling for gaps insufficient for "
2322 				     "access\n");
2323 		  return false;
2324 		}
2325 	      overrun_p = true;
2326 	    }
2327 	}
2328     }
2329   else
2330     {
2331       /* We can always handle this case using elementwise accesses,
2332 	 but see if something more efficient is available.  */
2333       *memory_access_type = VMAT_ELEMENTWISE;
2334 
2335       /* If there is a gap at the end of the group then these optimizations
2336 	 would access excess elements in the last iteration.  */
2337       bool would_overrun_p = (gap != 0);
2338       /* An overrun is fine if the trailing elements are smaller than the
2339 	 alignment boundary B.  Every vector access will be a multiple of B
2340 	 and so we are guaranteed to access a non-gap element in the
2341 	 same B-sized block.  */
2342       if (would_overrun_p
2343 	  && !masked_p
2344 	  && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2345 		    / vect_get_scalar_dr_size (first_dr_info)))
2346 	would_overrun_p = false;
2347 
2348       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2349 	  && (can_overrun_p || !would_overrun_p)
2350 	  && compare_step_with_zero (vinfo, stmt_info) > 0)
2351 	{
2352 	  /* First cope with the degenerate case of a single-element
2353 	     vector.  */
2354 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2355 	    ;
2356 
2357 	  /* Otherwise try using LOAD/STORE_LANES.  */
2358 	  else if (vls_type == VLS_LOAD
2359 		   ? vect_load_lanes_supported (vectype, group_size, masked_p)
2360 		   : vect_store_lanes_supported (vectype, group_size,
2361 						 masked_p))
2362 	    {
2363 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2364 	      overrun_p = would_overrun_p;
2365 	    }
2366 
2367 	  /* If that fails, try using permuting loads.  */
2368 	  else if (vls_type == VLS_LOAD
2369 		   ? vect_grouped_load_supported (vectype, single_element_p,
2370 						  group_size)
2371 		   : vect_grouped_store_supported (vectype, group_size))
2372 	    {
2373 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2374 	      overrun_p = would_overrun_p;
2375 	    }
2376 	}
2377 
2378       /* As a last resort, trying using a gather load or scatter store.
2379 
2380 	 ??? Although the code can handle all group sizes correctly,
2381 	 it probably isn't a win to use separate strided accesses based
2382 	 on nearby locations.  Or, even if it's a win over scalar code,
2383 	 it might not be a win over vectorizing at a lower VF, if that
2384 	 allows us to use contiguous accesses.  */
2385       if (*memory_access_type == VMAT_ELEMENTWISE
2386 	  && single_element_p
2387 	  && loop_vinfo
2388 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2389 						 masked_p, gs_info))
2390 	*memory_access_type = VMAT_GATHER_SCATTER;
2391     }
2392 
2393   if (*memory_access_type == VMAT_GATHER_SCATTER
2394       || *memory_access_type == VMAT_ELEMENTWISE)
2395     {
2396       *alignment_support_scheme = dr_unaligned_supported;
2397       *misalignment = DR_MISALIGNMENT_UNKNOWN;
2398     }
2399   else
2400     {
2401       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2402       *alignment_support_scheme
2403 	= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2404 					 *misalignment);
2405     }
2406 
2407   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2408     {
2409       /* STMT is the leader of the group. Check the operands of all the
2410 	 stmts of the group.  */
2411       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2412       while (next_stmt_info)
2413 	{
2414 	  tree op = vect_get_store_rhs (next_stmt_info);
2415 	  enum vect_def_type dt;
2416 	  if (!vect_is_simple_use (op, vinfo, &dt))
2417 	    {
2418 	      if (dump_enabled_p ())
2419 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 				 "use not simple.\n");
2421 	      return false;
2422 	    }
2423 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2424 	}
2425     }
2426 
2427   if (overrun_p)
2428     {
2429       gcc_assert (can_overrun_p);
2430       if (dump_enabled_p ())
2431 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 			 "Data access with gaps requires scalar "
2433 			 "epilogue loop\n");
2434       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2435     }
2436 
2437   return true;
2438 }
2439 
2440 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2441    if there is a memory access type that the vectorized form can use,
2442    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2443    or scatters, fill in GS_INFO accordingly.  In addition
2444    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2445    the target does not support the alignment scheme.  *MISALIGNMENT
2446    is set according to the alignment of the access (including
2447    DR_MISALIGNMENT_UNKNOWN when it is unknown).
2448 
2449    SLP says whether we're performing SLP rather than loop vectorization.
2450    MASKED_P is true if the statement is conditional on a vectorized mask.
2451    VECTYPE is the vector type that the vectorized statements will use.
2452    NCOPIES is the number of vector statements that will be needed.  */
2453 
2454 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2455 get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
2456 		     tree vectype, slp_tree slp_node,
2457 		     bool masked_p, vec_load_store_type vls_type,
2458 		     unsigned int ncopies,
2459 		     vect_memory_access_type *memory_access_type,
2460 		     poly_int64 *poffset,
2461 		     dr_alignment_support *alignment_support_scheme,
2462 		     int *misalignment,
2463 		     gather_scatter_info *gs_info)
2464 {
2465   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2466   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2467   *misalignment = DR_MISALIGNMENT_UNKNOWN;
2468   *poffset = 0;
2469   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2470     {
2471       *memory_access_type = VMAT_GATHER_SCATTER;
2472       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2473 	gcc_unreachable ();
2474       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2475 				    &gs_info->offset_dt,
2476 				    &gs_info->offset_vectype))
2477 	{
2478 	  if (dump_enabled_p ())
2479 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 			     "%s index use not simple.\n",
2481 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2482 	  return false;
2483 	}
2484       else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2485 	{
2486 	  if (vls_type != VLS_LOAD)
2487 	    {
2488 	      if (dump_enabled_p ())
2489 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 				 "unsupported emulated scatter.\n");
2491 	      return false;
2492 	    }
2493 	  else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2494 		   || !TYPE_VECTOR_SUBPARTS
2495 			 (gs_info->offset_vectype).is_constant ()
2496 		   || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2497 					      (gs_info->offset_vectype),
2498 					    TYPE_VECTOR_SUBPARTS (vectype)))
2499 	    {
2500 	      if (dump_enabled_p ())
2501 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 				 "unsupported vector types for emulated "
2503 				 "gather.\n");
2504 	      return false;
2505 	    }
2506 	}
2507       /* Gather-scatter accesses perform only component accesses, alignment
2508 	 is irrelevant for them.  */
2509       *alignment_support_scheme = dr_unaligned_supported;
2510     }
2511   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2512     {
2513       if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2514 				      masked_p,
2515 				      vls_type, memory_access_type, poffset,
2516 				      alignment_support_scheme,
2517 				      misalignment, gs_info))
2518 	return false;
2519     }
2520   else if (STMT_VINFO_STRIDED_P (stmt_info))
2521     {
2522       gcc_assert (!slp_node);
2523       if (loop_vinfo
2524 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2525 						 masked_p, gs_info))
2526 	*memory_access_type = VMAT_GATHER_SCATTER;
2527       else
2528 	*memory_access_type = VMAT_ELEMENTWISE;
2529       /* Alignment is irrelevant here.  */
2530       *alignment_support_scheme = dr_unaligned_supported;
2531     }
2532   else
2533     {
2534       int cmp = compare_step_with_zero (vinfo, stmt_info);
2535       if (cmp == 0)
2536 	{
2537 	  gcc_assert (vls_type == VLS_LOAD);
2538 	  *memory_access_type = VMAT_INVARIANT;
2539 	  /* Invariant accesses perform only component accesses, alignment
2540 	     is irrelevant for them.  */
2541 	  *alignment_support_scheme = dr_unaligned_supported;
2542 	}
2543       else
2544 	{
2545 	  if (cmp < 0)
2546 	    *memory_access_type = get_negative_load_store_type
2547 	       (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2548 	  else
2549 	    *memory_access_type = VMAT_CONTIGUOUS;
2550 	  *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2551 					   vectype, *poffset);
2552 	  *alignment_support_scheme
2553 	    = vect_supportable_dr_alignment (vinfo,
2554 					     STMT_VINFO_DR_INFO (stmt_info),
2555 					     vectype, *misalignment);
2556 	}
2557     }
2558 
2559   if ((*memory_access_type == VMAT_ELEMENTWISE
2560        || *memory_access_type == VMAT_STRIDED_SLP)
2561       && !nunits.is_constant ())
2562     {
2563       if (dump_enabled_p ())
2564 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 			 "Not using elementwise accesses due to variable "
2566 			 "vectorization factor.\n");
2567       return false;
2568     }
2569 
2570   if (*alignment_support_scheme == dr_unaligned_unsupported)
2571     {
2572       if (dump_enabled_p ())
2573 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574 			 "unsupported unaligned access\n");
2575       return false;
2576     }
2577 
2578   /* FIXME: At the moment the cost model seems to underestimate the
2579      cost of using elementwise accesses.  This check preserves the
2580      traditional behavior until that can be fixed.  */
2581   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2582   if (!first_stmt_info)
2583     first_stmt_info = stmt_info;
2584   if (*memory_access_type == VMAT_ELEMENTWISE
2585       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2586       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2587 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2588 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2589     {
2590       if (dump_enabled_p ())
2591 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2592 			 "not falling back to elementwise accesses\n");
2593       return false;
2594     }
2595   return true;
2596 }
2597 
2598 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2599    conditional operation STMT_INFO.  When returning true, store the mask
2600    in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2601    vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2602    to the mask in *MASK_NODE if MASK_NODE is not NULL.  */
2603 
2604 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned mask_index,tree * mask,slp_tree * mask_node,vect_def_type * mask_dt_out,tree * mask_vectype_out)2605 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2606 			slp_tree slp_node, unsigned mask_index,
2607 			tree *mask, slp_tree *mask_node,
2608 			vect_def_type *mask_dt_out, tree *mask_vectype_out)
2609 {
2610   enum vect_def_type mask_dt;
2611   tree mask_vectype;
2612   slp_tree mask_node_1;
2613   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2614 			   mask, &mask_node_1, &mask_dt, &mask_vectype))
2615     {
2616       if (dump_enabled_p ())
2617 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 			 "mask use not simple.\n");
2619       return false;
2620     }
2621 
2622   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2623     {
2624       if (dump_enabled_p ())
2625 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2626 			 "mask argument is not a boolean.\n");
2627       return false;
2628     }
2629 
2630   /* If the caller is not prepared for adjusting an external/constant
2631      SLP mask vector type fail.  */
2632   if (slp_node
2633       && !mask_node
2634       && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2635     {
2636       if (dump_enabled_p ())
2637 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2638 			 "SLP mask argument is not vectorized.\n");
2639       return false;
2640     }
2641 
2642   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2643   if (!mask_vectype)
2644     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2645 
2646   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2647     {
2648       if (dump_enabled_p ())
2649 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 			 "could not find an appropriate vector mask type.\n");
2651       return false;
2652     }
2653 
2654   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2655 		TYPE_VECTOR_SUBPARTS (vectype)))
2656     {
2657       if (dump_enabled_p ())
2658 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2659 			 "vector mask type %T"
2660 			 " does not match vector data type %T.\n",
2661 			 mask_vectype, vectype);
2662 
2663       return false;
2664     }
2665 
2666   *mask_dt_out = mask_dt;
2667   *mask_vectype_out = mask_vectype;
2668   if (mask_node)
2669     *mask_node = mask_node_1;
2670   return true;
2671 }
2672 
2673 /* Return true if stored value RHS is suitable for vectorizing store
2674    statement STMT_INFO.  When returning true, store the type of the
2675    definition in *RHS_DT_OUT, the type of the vectorized store value in
2676    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2677 
2678 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2679 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2680 		      slp_tree slp_node, tree rhs,
2681 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2682 		      vec_load_store_type *vls_type_out)
2683 {
2684   /* In the case this is a store from a constant make sure
2685      native_encode_expr can handle it.  */
2686   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2687     {
2688       if (dump_enabled_p ())
2689 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690 			 "cannot encode constant as a byte sequence.\n");
2691       return false;
2692     }
2693 
2694   unsigned op_no = 0;
2695   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2696     {
2697       if (gimple_call_internal_p (call)
2698 	  && internal_store_fn_p (gimple_call_internal_fn (call)))
2699 	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2700     }
2701 
2702   enum vect_def_type rhs_dt;
2703   tree rhs_vectype;
2704   slp_tree slp_op;
2705   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2706 			   &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2707     {
2708       if (dump_enabled_p ())
2709 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 			 "use not simple.\n");
2711       return false;
2712     }
2713 
2714   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2715   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2716     {
2717       if (dump_enabled_p ())
2718 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2719 			 "incompatible vector types.\n");
2720       return false;
2721     }
2722 
2723   *rhs_dt_out = rhs_dt;
2724   *rhs_vectype_out = rhs_vectype;
2725   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2726     *vls_type_out = VLS_STORE_INVARIANT;
2727   else
2728     *vls_type_out = VLS_STORE;
2729   return true;
2730 }
2731 
2732 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2733    Note that we support masks with floating-point type, in which case the
2734    floats are interpreted as a bitmask.  */
2735 
2736 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2737 vect_build_all_ones_mask (vec_info *vinfo,
2738 			  stmt_vec_info stmt_info, tree masktype)
2739 {
2740   if (TREE_CODE (masktype) == INTEGER_TYPE)
2741     return build_int_cst (masktype, -1);
2742   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2743     {
2744       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2745       mask = build_vector_from_val (masktype, mask);
2746       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2747     }
2748   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2749     {
2750       REAL_VALUE_TYPE r;
2751       long tmp[6];
2752       for (int j = 0; j < 6; ++j)
2753 	tmp[j] = -1;
2754       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2755       tree mask = build_real (TREE_TYPE (masktype), r);
2756       mask = build_vector_from_val (masktype, mask);
2757       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2758     }
2759   gcc_unreachable ();
2760 }
2761 
2762 /* Build an all-zero merge value of type VECTYPE while vectorizing
2763    STMT_INFO as a gather load.  */
2764 
2765 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2766 vect_build_zero_merge_argument (vec_info *vinfo,
2767 				stmt_vec_info stmt_info, tree vectype)
2768 {
2769   tree merge;
2770   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2771     merge = build_int_cst (TREE_TYPE (vectype), 0);
2772   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2773     {
2774       REAL_VALUE_TYPE r;
2775       long tmp[6];
2776       for (int j = 0; j < 6; ++j)
2777 	tmp[j] = 0;
2778       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2779       merge = build_real (TREE_TYPE (vectype), r);
2780     }
2781   else
2782     gcc_unreachable ();
2783   merge = build_vector_from_val (vectype, merge);
2784   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2785 }
2786 
2787 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2788    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2789    the gather load operation.  If the load is conditional, MASK is the
2790    unvectorized condition and MASK_DT is its definition type, otherwise
2791    MASK is null.  */
2792 
2793 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2794 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2795 			      gimple_stmt_iterator *gsi,
2796 			      gimple **vec_stmt,
2797 			      gather_scatter_info *gs_info,
2798 			      tree mask)
2799 {
2800   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2801   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2803   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2804   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2805   edge pe = loop_preheader_edge (loop);
2806   enum { NARROW, NONE, WIDEN } modifier;
2807   poly_uint64 gather_off_nunits
2808     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2809 
2810   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2811   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2812   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2813   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2814   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2815   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816   tree scaletype = TREE_VALUE (arglist);
2817   tree real_masktype = masktype;
2818   gcc_checking_assert (types_compatible_p (srctype, rettype)
2819 		       && (!mask
2820 			   || TREE_CODE (masktype) == INTEGER_TYPE
2821 			   || types_compatible_p (srctype, masktype)));
2822   if (mask)
2823     masktype = truth_type_for (srctype);
2824 
2825   tree mask_halftype = masktype;
2826   tree perm_mask = NULL_TREE;
2827   tree mask_perm_mask = NULL_TREE;
2828   if (known_eq (nunits, gather_off_nunits))
2829     modifier = NONE;
2830   else if (known_eq (nunits * 2, gather_off_nunits))
2831     {
2832       modifier = WIDEN;
2833 
2834       /* Currently widening gathers and scatters are only supported for
2835 	 fixed-length vectors.  */
2836       int count = gather_off_nunits.to_constant ();
2837       vec_perm_builder sel (count, count, 1);
2838       for (int i = 0; i < count; ++i)
2839 	sel.quick_push (i | (count / 2));
2840 
2841       vec_perm_indices indices (sel, 1, count);
2842       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2843 					      indices);
2844     }
2845   else if (known_eq (nunits, gather_off_nunits * 2))
2846     {
2847       modifier = NARROW;
2848 
2849       /* Currently narrowing gathers and scatters are only supported for
2850 	 fixed-length vectors.  */
2851       int count = nunits.to_constant ();
2852       vec_perm_builder sel (count, count, 1);
2853       sel.quick_grow (count);
2854       for (int i = 0; i < count; ++i)
2855 	sel[i] = i < count / 2 ? i : i + count / 2;
2856       vec_perm_indices indices (sel, 2, count);
2857       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2858 
2859       ncopies *= 2;
2860 
2861       if (mask && VECTOR_TYPE_P (real_masktype))
2862 	{
2863 	  for (int i = 0; i < count; ++i)
2864 	    sel[i] = i | (count / 2);
2865 	  indices.new_vector (sel, 2, count);
2866 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2867 	}
2868       else if (mask)
2869 	mask_halftype = truth_type_for (gs_info->offset_vectype);
2870     }
2871   else
2872     gcc_unreachable ();
2873 
2874   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2875   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2876 
2877   tree ptr = fold_convert (ptrtype, gs_info->base);
2878   if (!is_gimple_min_invariant (ptr))
2879     {
2880       gimple_seq seq;
2881       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2882       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2883       gcc_assert (!new_bb);
2884     }
2885 
2886   tree scale = build_int_cst (scaletype, gs_info->scale);
2887 
2888   tree vec_oprnd0 = NULL_TREE;
2889   tree vec_mask = NULL_TREE;
2890   tree src_op = NULL_TREE;
2891   tree mask_op = NULL_TREE;
2892   tree prev_res = NULL_TREE;
2893 
2894   if (!mask)
2895     {
2896       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2897       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2898     }
2899 
2900   auto_vec<tree> vec_oprnds0;
2901   auto_vec<tree> vec_masks;
2902   vect_get_vec_defs_for_operand (vinfo, stmt_info,
2903 				 modifier == WIDEN ? ncopies / 2 : ncopies,
2904 				 gs_info->offset, &vec_oprnds0);
2905   if (mask)
2906     vect_get_vec_defs_for_operand (vinfo, stmt_info,
2907 				   modifier == NARROW ? ncopies / 2 : ncopies,
2908 				   mask, &vec_masks, masktype);
2909   for (int j = 0; j < ncopies; ++j)
2910     {
2911       tree op, var;
2912       if (modifier == WIDEN && (j & 1))
2913 	op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2914 				   perm_mask, stmt_info, gsi);
2915       else
2916 	op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2917 
2918       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2919 	{
2920 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2921 				TYPE_VECTOR_SUBPARTS (idxtype)));
2922 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2923 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2924 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2925 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2926 	  op = var;
2927 	}
2928 
2929       if (mask)
2930 	{
2931 	  if (mask_perm_mask && (j & 1))
2932 	    mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2933 					    mask_perm_mask, stmt_info, gsi);
2934 	  else
2935 	    {
2936 	      if (modifier == NARROW)
2937 		{
2938 		  if ((j & 1) == 0)
2939 		    vec_mask = vec_masks[j / 2];
2940 		}
2941 	      else
2942 		vec_mask = vec_masks[j];
2943 
2944 	      mask_op = vec_mask;
2945 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2946 		{
2947 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2948 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2949 		  gcc_assert (known_eq (sub1, sub2));
2950 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2951 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2952 		  gassign *new_stmt
2953 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2954 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2955 		  mask_op = var;
2956 		}
2957 	    }
2958 	  if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2959 	    {
2960 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2961 	      gassign *new_stmt
2962 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2963 						    : VEC_UNPACK_LO_EXPR,
2964 				       mask_op);
2965 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2966 	      mask_op = var;
2967 	    }
2968 	  src_op = mask_op;
2969 	}
2970 
2971       tree mask_arg = mask_op;
2972       if (masktype != real_masktype)
2973 	{
2974 	  tree utype, optype = TREE_TYPE (mask_op);
2975 	  if (VECTOR_TYPE_P (real_masktype)
2976 	      || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2977 	    utype = real_masktype;
2978 	  else
2979 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2980 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2981 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2982 	  gassign *new_stmt
2983 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2984 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2985 	  mask_arg = var;
2986 	  if (!useless_type_conversion_p (real_masktype, utype))
2987 	    {
2988 	      gcc_assert (TYPE_PRECISION (utype)
2989 			  <= TYPE_PRECISION (real_masktype));
2990 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2991 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2992 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2993 	      mask_arg = var;
2994 	    }
2995 	  src_op = build_zero_cst (srctype);
2996 	}
2997       gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2998 					    mask_arg, scale);
2999 
3000       if (!useless_type_conversion_p (vectype, rettype))
3001 	{
3002 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3003 				TYPE_VECTOR_SUBPARTS (rettype)));
3004 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
3005 	  gimple_call_set_lhs (new_stmt, op);
3006 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3007 	  var = make_ssa_name (vec_dest);
3008 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3009 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3010 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3011 	}
3012       else
3013 	{
3014 	  var = make_ssa_name (vec_dest, new_stmt);
3015 	  gimple_call_set_lhs (new_stmt, var);
3016 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3017 	}
3018 
3019       if (modifier == NARROW)
3020 	{
3021 	  if ((j & 1) == 0)
3022 	    {
3023 	      prev_res = var;
3024 	      continue;
3025 	    }
3026 	  var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3027 				      stmt_info, gsi);
3028 	  new_stmt = SSA_NAME_DEF_STMT (var);
3029 	}
3030 
3031       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3032     }
3033   *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3034 }
3035 
3036 /* Prepare the base and offset in GS_INFO for vectorization.
3037    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3038    to the vectorized offset argument for the first copy of STMT_INFO.
3039    STMT_INFO is the statement described by GS_INFO and LOOP is the
3040    containing loop.  */
3041 
3042 static void
vect_get_gather_scatter_ops(loop_vec_info loop_vinfo,class loop * loop,stmt_vec_info stmt_info,slp_tree slp_node,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset)3043 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3044 			     class loop *loop, stmt_vec_info stmt_info,
3045 			     slp_tree slp_node, gather_scatter_info *gs_info,
3046 			     tree *dataref_ptr, vec<tree> *vec_offset)
3047 {
3048   gimple_seq stmts = NULL;
3049   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3050   if (stmts != NULL)
3051     {
3052       basic_block new_bb;
3053       edge pe = loop_preheader_edge (loop);
3054       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3055       gcc_assert (!new_bb);
3056     }
3057   if (slp_node)
3058     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3059   else
3060     {
3061       unsigned ncopies
3062 	= vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3063       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3064 				     gs_info->offset, vec_offset,
3065 				     gs_info->offset_vectype);
3066     }
3067 }
3068 
3069 /* Prepare to implement a grouped or strided load or store using
3070    the gather load or scatter store operation described by GS_INFO.
3071    STMT_INFO is the load or store statement.
3072 
3073    Set *DATAREF_BUMP to the amount that should be added to the base
3074    address after each copy of the vectorized statement.  Set *VEC_OFFSET
3075    to an invariant offset vector in which element I has the value
3076    I * DR_STEP / SCALE.  */
3077 
3078 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3079 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3080 				 loop_vec_info loop_vinfo,
3081 				 gather_scatter_info *gs_info,
3082 				 tree *dataref_bump, tree *vec_offset)
3083 {
3084   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3085   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3086 
3087   tree bump = size_binop (MULT_EXPR,
3088 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3089 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3090   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3091 
3092   /* The offset given in GS_INFO can have pointer type, so use the element
3093      type of the vector instead.  */
3094   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3095 
3096   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3097   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3098 			  ssize_int (gs_info->scale));
3099   step = fold_convert (offset_type, step);
3100 
3101   /* Create {0, X, X*2, X*3, ...}.  */
3102   tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3103 			     build_zero_cst (offset_type), step);
3104   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3105 }
3106 
3107 /* Return the amount that should be added to a vector pointer to move
3108    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3109    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3110    vectorization.  */
3111 
3112 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3113 vect_get_data_ptr_increment (vec_info *vinfo,
3114 			     dr_vec_info *dr_info, tree aggr_type,
3115 			     vect_memory_access_type memory_access_type)
3116 {
3117   if (memory_access_type == VMAT_INVARIANT)
3118     return size_zero_node;
3119 
3120   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3121   tree step = vect_dr_behavior (vinfo, dr_info)->step;
3122   if (tree_int_cst_sgn (step) == -1)
3123     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3124   return iv_step;
3125 }
3126 
3127 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
3128 
3129 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)3130 vectorizable_bswap (vec_info *vinfo,
3131 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3132 		    gimple **vec_stmt, slp_tree slp_node,
3133 		    slp_tree *slp_op,
3134 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3135 {
3136   tree op, vectype;
3137   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3138   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3139   unsigned ncopies;
3140 
3141   op = gimple_call_arg (stmt, 0);
3142   vectype = STMT_VINFO_VECTYPE (stmt_info);
3143   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3144 
3145   /* Multiple types in SLP are handled by creating the appropriate number of
3146      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3147      case of SLP.  */
3148   if (slp_node)
3149     ncopies = 1;
3150   else
3151     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3152 
3153   gcc_assert (ncopies >= 1);
3154 
3155   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3156   if (! char_vectype)
3157     return false;
3158 
3159   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3160   unsigned word_bytes;
3161   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3162     return false;
3163 
3164   /* The encoding uses one stepped pattern for each byte in the word.  */
3165   vec_perm_builder elts (num_bytes, word_bytes, 3);
3166   for (unsigned i = 0; i < 3; ++i)
3167     for (unsigned j = 0; j < word_bytes; ++j)
3168       elts.quick_push ((i + 1) * word_bytes - j - 1);
3169 
3170   vec_perm_indices indices (elts, 1, num_bytes);
3171   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3172     return false;
3173 
3174   if (! vec_stmt)
3175     {
3176       if (slp_node
3177 	  && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3178 	{
3179 	  if (dump_enabled_p ())
3180 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3181 			     "incompatible vector types for invariants\n");
3182 	  return false;
3183 	}
3184 
3185       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3186       DUMP_VECT_SCOPE ("vectorizable_bswap");
3187       record_stmt_cost (cost_vec,
3188 			1, vector_stmt, stmt_info, 0, vect_prologue);
3189       record_stmt_cost (cost_vec,
3190 			slp_node
3191 			? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3192 			vec_perm, stmt_info, 0, vect_body);
3193       return true;
3194     }
3195 
3196   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3197 
3198   /* Transform.  */
3199   vec<tree> vec_oprnds = vNULL;
3200   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3201 		     op, &vec_oprnds);
3202   /* Arguments are ready. create the new vector stmt.  */
3203   unsigned i;
3204   tree vop;
3205   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3206     {
3207       gimple *new_stmt;
3208       tree tem = make_ssa_name (char_vectype);
3209       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3210 						   char_vectype, vop));
3211       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3212       tree tem2 = make_ssa_name (char_vectype);
3213       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3214 				      tem, tem, bswap_vconst);
3215       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216       tem = make_ssa_name (vectype);
3217       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3218 						   vectype, tem2));
3219       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220       if (slp_node)
3221 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3222       else
3223 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3224     }
3225 
3226   if (!slp_node)
3227     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3228 
3229   vec_oprnds.release ();
3230   return true;
3231 }
3232 
3233 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3234    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3235    in a single step.  On success, store the binary pack code in
3236    *CONVERT_CODE.  */
3237 
3238 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3239 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3240 			  tree_code *convert_code)
3241 {
3242   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3243       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3244     return false;
3245 
3246   tree_code code;
3247   int multi_step_cvt = 0;
3248   auto_vec <tree, 8> interm_types;
3249   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3250 					&code, &multi_step_cvt, &interm_types)
3251       || multi_step_cvt)
3252     return false;
3253 
3254   *convert_code = code;
3255   return true;
3256 }
3257 
3258 /* Function vectorizable_call.
3259 
3260    Check if STMT_INFO performs a function call that can be vectorized.
3261    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3262    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3263    Return true if STMT_INFO is vectorizable in this way.  */
3264 
3265 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3266 vectorizable_call (vec_info *vinfo,
3267 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3268 		   gimple **vec_stmt, slp_tree slp_node,
3269 		   stmt_vector_for_cost *cost_vec)
3270 {
3271   gcall *stmt;
3272   tree vec_dest;
3273   tree scalar_dest;
3274   tree op;
3275   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3276   tree vectype_out, vectype_in;
3277   poly_uint64 nunits_in;
3278   poly_uint64 nunits_out;
3279   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3280   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3281   tree fndecl, new_temp, rhs_type;
3282   enum vect_def_type dt[4]
3283     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3284 	vect_unknown_def_type };
3285   tree vectypes[ARRAY_SIZE (dt)] = {};
3286   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3287   int ndts = ARRAY_SIZE (dt);
3288   int ncopies, j;
3289   auto_vec<tree, 8> vargs;
3290   enum { NARROW, NONE, WIDEN } modifier;
3291   size_t i, nargs;
3292   tree lhs;
3293 
3294   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3295     return false;
3296 
3297   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3298       && ! vec_stmt)
3299     return false;
3300 
3301   /* Is STMT_INFO a vectorizable call?   */
3302   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3303   if (!stmt)
3304     return false;
3305 
3306   if (gimple_call_internal_p (stmt)
3307       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3308 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3309     /* Handled by vectorizable_load and vectorizable_store.  */
3310     return false;
3311 
3312   if (gimple_call_lhs (stmt) == NULL_TREE
3313       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3314     return false;
3315 
3316   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3317 
3318   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3319 
3320   /* Process function arguments.  */
3321   rhs_type = NULL_TREE;
3322   vectype_in = NULL_TREE;
3323   nargs = gimple_call_num_args (stmt);
3324 
3325   /* Bail out if the function has more than four arguments, we do not have
3326      interesting builtin functions to vectorize with more than two arguments
3327      except for fma.  No arguments is also not good.  */
3328   if (nargs == 0 || nargs > 4)
3329     return false;
3330 
3331   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3332   combined_fn cfn = gimple_call_combined_fn (stmt);
3333   if (cfn == CFN_GOMP_SIMD_LANE)
3334     {
3335       nargs = 0;
3336       rhs_type = unsigned_type_node;
3337     }
3338 
3339   int mask_opno = -1;
3340   if (internal_fn_p (cfn))
3341     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3342 
3343   for (i = 0; i < nargs; i++)
3344     {
3345       if ((int) i == mask_opno)
3346 	{
3347 	  if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3348 				       &op, &slp_op[i], &dt[i], &vectypes[i]))
3349 	    return false;
3350 	  continue;
3351 	}
3352 
3353       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3354 			       i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3355 	{
3356 	  if (dump_enabled_p ())
3357 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 			     "use not simple.\n");
3359 	  return false;
3360 	}
3361 
3362       /* We can only handle calls with arguments of the same type.  */
3363       if (rhs_type
3364 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3365 	{
3366 	  if (dump_enabled_p ())
3367 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3368                              "argument types differ.\n");
3369 	  return false;
3370 	}
3371       if (!rhs_type)
3372 	rhs_type = TREE_TYPE (op);
3373 
3374       if (!vectype_in)
3375 	vectype_in = vectypes[i];
3376       else if (vectypes[i]
3377 	       && !types_compatible_p (vectypes[i], vectype_in))
3378 	{
3379 	  if (dump_enabled_p ())
3380 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3381                              "argument vector types differ.\n");
3382 	  return false;
3383 	}
3384     }
3385   /* If all arguments are external or constant defs, infer the vector type
3386      from the scalar type.  */
3387   if (!vectype_in)
3388     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3389   if (vec_stmt)
3390     gcc_assert (vectype_in);
3391   if (!vectype_in)
3392     {
3393       if (dump_enabled_p ())
3394 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3395 			 "no vectype for scalar type %T\n", rhs_type);
3396 
3397       return false;
3398     }
3399   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3400      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3401      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3402      by a pack of the two vectors into an SI vector.  We would need
3403      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3404   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3405     {
3406       if (dump_enabled_p ())
3407 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3408 			 "mismatched vector sizes %T and %T\n",
3409 			 vectype_in, vectype_out);
3410       return false;
3411     }
3412 
3413   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3414       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3415     {
3416       if (dump_enabled_p ())
3417 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3418 			 "mixed mask and nonmask vector types\n");
3419       return false;
3420     }
3421 
3422   if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3423   {
3424       if (dump_enabled_p ())
3425 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426 			 "use emulated vector type for call\n");
3427       return false;
3428   }
3429 
3430   /* FORNOW */
3431   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3432   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3433   if (known_eq (nunits_in * 2, nunits_out))
3434     modifier = NARROW;
3435   else if (known_eq (nunits_out, nunits_in))
3436     modifier = NONE;
3437   else if (known_eq (nunits_out * 2, nunits_in))
3438     modifier = WIDEN;
3439   else
3440     return false;
3441 
3442   /* We only handle functions that do not read or clobber memory.  */
3443   if (gimple_vuse (stmt))
3444     {
3445       if (dump_enabled_p ())
3446 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3447 			 "function reads from or writes to memory.\n");
3448       return false;
3449     }
3450 
3451   /* For now, we only vectorize functions if a target specific builtin
3452      is available.  TODO -- in some cases, it might be profitable to
3453      insert the calls for pieces of the vector, in order to be able
3454      to vectorize other operations in the loop.  */
3455   fndecl = NULL_TREE;
3456   internal_fn ifn = IFN_LAST;
3457   tree callee = gimple_call_fndecl (stmt);
3458 
3459   /* First try using an internal function.  */
3460   tree_code convert_code = ERROR_MARK;
3461   if (cfn != CFN_LAST
3462       && (modifier == NONE
3463 	  || (modifier == NARROW
3464 	      && simple_integer_narrowing (vectype_out, vectype_in,
3465 					   &convert_code))))
3466     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3467 					  vectype_in);
3468 
3469   /* If that fails, try asking for a target-specific built-in function.  */
3470   if (ifn == IFN_LAST)
3471     {
3472       if (cfn != CFN_LAST)
3473 	fndecl = targetm.vectorize.builtin_vectorized_function
3474 	  (cfn, vectype_out, vectype_in);
3475       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3476 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3477 	  (callee, vectype_out, vectype_in);
3478     }
3479 
3480   if (ifn == IFN_LAST && !fndecl)
3481     {
3482       if (cfn == CFN_GOMP_SIMD_LANE
3483 	  && !slp_node
3484 	  && loop_vinfo
3485 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3486 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3487 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3488 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3489 	{
3490 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3491 	     { 0, 1, 2, ... vf - 1 } vector.  */
3492 	  gcc_assert (nargs == 0);
3493 	}
3494       else if (modifier == NONE
3495 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3496 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3497 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3498 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3499 	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3500 				   slp_op, vectype_in, cost_vec);
3501       else
3502 	{
3503 	  if (dump_enabled_p ())
3504 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3505 			     "function is not vectorizable.\n");
3506 	  return false;
3507 	}
3508     }
3509 
3510   if (slp_node)
3511     ncopies = 1;
3512   else if (modifier == NARROW && ifn == IFN_LAST)
3513     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3514   else
3515     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3516 
3517   /* Sanity check: make sure that at least one copy of the vectorized stmt
3518      needs to be generated.  */
3519   gcc_assert (ncopies >= 1);
3520 
3521   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3522   internal_fn cond_fn = get_conditional_internal_fn (ifn);
3523   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3524   if (!vec_stmt) /* transformation not required.  */
3525     {
3526       if (slp_node)
3527 	for (i = 0; i < nargs; ++i)
3528 	  if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3529 						 vectypes[i]
3530 						 ? vectypes[i] : vectype_in))
3531 	    {
3532 	      if (dump_enabled_p ())
3533 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534 				 "incompatible vector types for invariants\n");
3535 	      return false;
3536 	    }
3537       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3538       DUMP_VECT_SCOPE ("vectorizable_call");
3539       vect_model_simple_cost (vinfo, stmt_info,
3540 			      ncopies, dt, ndts, slp_node, cost_vec);
3541       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3542 	record_stmt_cost (cost_vec, ncopies / 2,
3543 			  vec_promote_demote, stmt_info, 0, vect_body);
3544 
3545       if (loop_vinfo
3546 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3547 	  && (reduc_idx >= 0 || mask_opno >= 0))
3548 	{
3549 	  if (reduc_idx >= 0
3550 	      && (cond_fn == IFN_LAST
3551 		  || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3552 						      OPTIMIZE_FOR_SPEED)))
3553 	    {
3554 	      if (dump_enabled_p ())
3555 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556 				 "can't use a fully-masked loop because no"
3557 				 " conditional operation is available.\n");
3558 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3559 	    }
3560 	  else
3561 	    {
3562 	      unsigned int nvectors
3563 		= (slp_node
3564 		   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3565 		   : ncopies);
3566 	      tree scalar_mask = NULL_TREE;
3567 	      if (mask_opno >= 0)
3568 		scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3569 	      vect_record_loop_mask (loop_vinfo, masks, nvectors,
3570 				     vectype_out, scalar_mask);
3571 	    }
3572 	}
3573       return true;
3574     }
3575 
3576   /* Transform.  */
3577 
3578   if (dump_enabled_p ())
3579     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3580 
3581   /* Handle def.  */
3582   scalar_dest = gimple_call_lhs (stmt);
3583   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3584 
3585   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3586   unsigned int vect_nargs = nargs;
3587   if (masked_loop_p && reduc_idx >= 0)
3588     {
3589       ifn = cond_fn;
3590       vect_nargs += 2;
3591     }
3592 
3593   if (modifier == NONE || ifn != IFN_LAST)
3594     {
3595       tree prev_res = NULL_TREE;
3596       vargs.safe_grow (vect_nargs, true);
3597       auto_vec<vec<tree> > vec_defs (nargs);
3598       for (j = 0; j < ncopies; ++j)
3599 	{
3600 	  /* Build argument list for the vectorized call.  */
3601 	  if (slp_node)
3602 	    {
3603 	      vec<tree> vec_oprnds0;
3604 
3605 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3606 	      vec_oprnds0 = vec_defs[0];
3607 
3608 	      /* Arguments are ready.  Create the new vector stmt.  */
3609 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3610 		{
3611 		  int varg = 0;
3612 		  if (masked_loop_p && reduc_idx >= 0)
3613 		    {
3614 		      unsigned int vec_num = vec_oprnds0.length ();
3615 		      /* Always true for SLP.  */
3616 		      gcc_assert (ncopies == 1);
3617 		      vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3618 							  vectype_out, i);
3619 		    }
3620 		  size_t k;
3621 		  for (k = 0; k < nargs; k++)
3622 		    {
3623 		      vec<tree> vec_oprndsk = vec_defs[k];
3624 		      vargs[varg++] = vec_oprndsk[i];
3625 		    }
3626 		  if (masked_loop_p && reduc_idx >= 0)
3627 		    vargs[varg++] = vargs[reduc_idx + 1];
3628 		  gimple *new_stmt;
3629 		  if (modifier == NARROW)
3630 		    {
3631 		      /* We don't define any narrowing conditional functions
3632 			 at present.  */
3633 		      gcc_assert (mask_opno < 0);
3634 		      tree half_res = make_ssa_name (vectype_in);
3635 		      gcall *call
3636 			= gimple_build_call_internal_vec (ifn, vargs);
3637 		      gimple_call_set_lhs (call, half_res);
3638 		      gimple_call_set_nothrow (call, true);
3639 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3640 		      if ((i & 1) == 0)
3641 			{
3642 			  prev_res = half_res;
3643 			  continue;
3644 			}
3645 		      new_temp = make_ssa_name (vec_dest);
3646 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3647 						      prev_res, half_res);
3648 		      vect_finish_stmt_generation (vinfo, stmt_info,
3649 						   new_stmt, gsi);
3650 		    }
3651 		  else
3652 		    {
3653 		      if (mask_opno >= 0 && masked_loop_p)
3654 			{
3655 			  unsigned int vec_num = vec_oprnds0.length ();
3656 			  /* Always true for SLP.  */
3657 			  gcc_assert (ncopies == 1);
3658 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3659 							  vectype_out, i);
3660 			  vargs[mask_opno] = prepare_vec_mask
3661 			    (loop_vinfo, TREE_TYPE (mask), mask,
3662 			     vargs[mask_opno], gsi);
3663 			}
3664 
3665 		      gcall *call;
3666 		      if (ifn != IFN_LAST)
3667 			call = gimple_build_call_internal_vec (ifn, vargs);
3668 		      else
3669 			call = gimple_build_call_vec (fndecl, vargs);
3670 		      new_temp = make_ssa_name (vec_dest, call);
3671 		      gimple_call_set_lhs (call, new_temp);
3672 		      gimple_call_set_nothrow (call, true);
3673 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3674 		      new_stmt = call;
3675 		    }
3676 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3677 		}
3678 	      continue;
3679 	    }
3680 
3681 	  int varg = 0;
3682 	  if (masked_loop_p && reduc_idx >= 0)
3683 	    vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3684 						vectype_out, j);
3685 	  for (i = 0; i < nargs; i++)
3686 	    {
3687 	      op = gimple_call_arg (stmt, i);
3688 	      if (j == 0)
3689 		{
3690 		  vec_defs.quick_push (vNULL);
3691 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3692 						 op, &vec_defs[i],
3693 						 vectypes[i]);
3694 		}
3695 	      vargs[varg++] = vec_defs[i][j];
3696 	    }
3697 	  if (masked_loop_p && reduc_idx >= 0)
3698 	    vargs[varg++] = vargs[reduc_idx + 1];
3699 
3700 	  if (mask_opno >= 0 && masked_loop_p)
3701 	    {
3702 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3703 					      vectype_out, j);
3704 	      vargs[mask_opno]
3705 		= prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3706 				    vargs[mask_opno], gsi);
3707 	    }
3708 
3709 	  gimple *new_stmt;
3710 	  if (cfn == CFN_GOMP_SIMD_LANE)
3711 	    {
3712 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3713 	      tree new_var
3714 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3715 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3716 	      vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3717 	      new_temp = make_ssa_name (vec_dest);
3718 	      new_stmt = gimple_build_assign (new_temp, new_var);
3719 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3720 	    }
3721 	  else if (modifier == NARROW)
3722 	    {
3723 	      /* We don't define any narrowing conditional functions at
3724 		 present.  */
3725 	      gcc_assert (mask_opno < 0);
3726 	      tree half_res = make_ssa_name (vectype_in);
3727 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3728 	      gimple_call_set_lhs (call, half_res);
3729 	      gimple_call_set_nothrow (call, true);
3730 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3731 	      if ((j & 1) == 0)
3732 		{
3733 		  prev_res = half_res;
3734 		  continue;
3735 		}
3736 	      new_temp = make_ssa_name (vec_dest);
3737 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3738 					      prev_res, half_res);
3739 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3740 	    }
3741 	  else
3742 	    {
3743 	      gcall *call;
3744 	      if (ifn != IFN_LAST)
3745 		call = gimple_build_call_internal_vec (ifn, vargs);
3746 	      else
3747 		call = gimple_build_call_vec (fndecl, vargs);
3748 	      new_temp = make_ssa_name (vec_dest, call);
3749 	      gimple_call_set_lhs (call, new_temp);
3750 	      gimple_call_set_nothrow (call, true);
3751 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3752 	      new_stmt = call;
3753 	    }
3754 
3755 	  if (j == (modifier == NARROW ? 1 : 0))
3756 	    *vec_stmt = new_stmt;
3757 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3758 	}
3759       for (i = 0; i < nargs; i++)
3760 	{
3761 	  vec<tree> vec_oprndsi = vec_defs[i];
3762 	  vec_oprndsi.release ();
3763 	}
3764     }
3765   else if (modifier == NARROW)
3766     {
3767       auto_vec<vec<tree> > vec_defs (nargs);
3768       /* We don't define any narrowing conditional functions at present.  */
3769       gcc_assert (mask_opno < 0);
3770       for (j = 0; j < ncopies; ++j)
3771 	{
3772 	  /* Build argument list for the vectorized call.  */
3773 	  if (j == 0)
3774 	    vargs.create (nargs * 2);
3775 	  else
3776 	    vargs.truncate (0);
3777 
3778 	  if (slp_node)
3779 	    {
3780 	      vec<tree> vec_oprnds0;
3781 
3782 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3783 	      vec_oprnds0 = vec_defs[0];
3784 
3785 	      /* Arguments are ready.  Create the new vector stmt.  */
3786 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3787 		{
3788 		  size_t k;
3789 		  vargs.truncate (0);
3790 		  for (k = 0; k < nargs; k++)
3791 		    {
3792 		      vec<tree> vec_oprndsk = vec_defs[k];
3793 		      vargs.quick_push (vec_oprndsk[i]);
3794 		      vargs.quick_push (vec_oprndsk[i + 1]);
3795 		    }
3796 		  gcall *call;
3797 		  if (ifn != IFN_LAST)
3798 		    call = gimple_build_call_internal_vec (ifn, vargs);
3799 		  else
3800 		    call = gimple_build_call_vec (fndecl, vargs);
3801 		  new_temp = make_ssa_name (vec_dest, call);
3802 		  gimple_call_set_lhs (call, new_temp);
3803 		  gimple_call_set_nothrow (call, true);
3804 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3805 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3806 		}
3807 	      continue;
3808 	    }
3809 
3810 	  for (i = 0; i < nargs; i++)
3811 	    {
3812 	      op = gimple_call_arg (stmt, i);
3813 	      if (j == 0)
3814 		{
3815 		  vec_defs.quick_push (vNULL);
3816 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3817 						 op, &vec_defs[i], vectypes[i]);
3818 		}
3819 	      vec_oprnd0 = vec_defs[i][2*j];
3820 	      vec_oprnd1 = vec_defs[i][2*j+1];
3821 
3822 	      vargs.quick_push (vec_oprnd0);
3823 	      vargs.quick_push (vec_oprnd1);
3824 	    }
3825 
3826 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3827 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3828 	  gimple_call_set_lhs (new_stmt, new_temp);
3829 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3830 
3831 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3832 	}
3833 
3834       if (!slp_node)
3835 	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3836 
3837       for (i = 0; i < nargs; i++)
3838 	{
3839 	  vec<tree> vec_oprndsi = vec_defs[i];
3840 	  vec_oprndsi.release ();
3841 	}
3842     }
3843   else
3844     /* No current target implements this case.  */
3845     return false;
3846 
3847   vargs.release ();
3848 
3849   /* The call in STMT might prevent it from being removed in dce.
3850      We however cannot remove it here, due to the way the ssa name
3851      it defines is mapped to the new definition.  So just replace
3852      rhs of the statement with something harmless.  */
3853 
3854   if (slp_node)
3855     return true;
3856 
3857   stmt_info = vect_orig_stmt (stmt_info);
3858   lhs = gimple_get_lhs (stmt_info->stmt);
3859 
3860   gassign *new_stmt
3861     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3862   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3863 
3864   return true;
3865 }
3866 
3867 
3868 struct simd_call_arg_info
3869 {
3870   tree vectype;
3871   tree op;
3872   HOST_WIDE_INT linear_step;
3873   enum vect_def_type dt;
3874   unsigned int align;
3875   bool simd_lane_linear;
3876 };
3877 
3878 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3879    is linear within simd lane (but not within whole loop), note it in
3880    *ARGINFO.  */
3881 
3882 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3883 vect_simd_lane_linear (tree op, class loop *loop,
3884 		       struct simd_call_arg_info *arginfo)
3885 {
3886   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3887 
3888   if (!is_gimple_assign (def_stmt)
3889       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3890       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3891     return;
3892 
3893   tree base = gimple_assign_rhs1 (def_stmt);
3894   HOST_WIDE_INT linear_step = 0;
3895   tree v = gimple_assign_rhs2 (def_stmt);
3896   while (TREE_CODE (v) == SSA_NAME)
3897     {
3898       tree t;
3899       def_stmt = SSA_NAME_DEF_STMT (v);
3900       if (is_gimple_assign (def_stmt))
3901 	switch (gimple_assign_rhs_code (def_stmt))
3902 	  {
3903 	  case PLUS_EXPR:
3904 	    t = gimple_assign_rhs2 (def_stmt);
3905 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3906 	      return;
3907 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3908 	    v = gimple_assign_rhs1 (def_stmt);
3909 	    continue;
3910 	  case MULT_EXPR:
3911 	    t = gimple_assign_rhs2 (def_stmt);
3912 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3913 	      return;
3914 	    linear_step = tree_to_shwi (t);
3915 	    v = gimple_assign_rhs1 (def_stmt);
3916 	    continue;
3917 	  CASE_CONVERT:
3918 	    t = gimple_assign_rhs1 (def_stmt);
3919 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3920 		|| (TYPE_PRECISION (TREE_TYPE (v))
3921 		    < TYPE_PRECISION (TREE_TYPE (t))))
3922 	      return;
3923 	    if (!linear_step)
3924 	      linear_step = 1;
3925 	    v = t;
3926 	    continue;
3927 	  default:
3928 	    return;
3929 	  }
3930       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3931 	       && loop->simduid
3932 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3933 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3934 		   == loop->simduid))
3935 	{
3936 	  if (!linear_step)
3937 	    linear_step = 1;
3938 	  arginfo->linear_step = linear_step;
3939 	  arginfo->op = base;
3940 	  arginfo->simd_lane_linear = true;
3941 	  return;
3942 	}
3943     }
3944 }
3945 
3946 /* Return the number of elements in vector type VECTYPE, which is associated
3947    with a SIMD clone.  At present these vectors always have a constant
3948    length.  */
3949 
3950 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3951 simd_clone_subparts (tree vectype)
3952 {
3953   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3954 }
3955 
3956 /* Function vectorizable_simd_clone_call.
3957 
3958    Check if STMT_INFO performs a function call that can be vectorized
3959    by calling a simd clone of the function.
3960    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3961    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3962    Return true if STMT_INFO is vectorizable in this way.  */
3963 
3964 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3965 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3966 			      gimple_stmt_iterator *gsi,
3967 			      gimple **vec_stmt, slp_tree slp_node,
3968 			      stmt_vector_for_cost *)
3969 {
3970   tree vec_dest;
3971   tree scalar_dest;
3972   tree op, type;
3973   tree vec_oprnd0 = NULL_TREE;
3974   tree vectype;
3975   poly_uint64 nunits;
3976   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3977   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3978   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3979   tree fndecl, new_temp;
3980   int ncopies, j;
3981   auto_vec<simd_call_arg_info> arginfo;
3982   vec<tree> vargs = vNULL;
3983   size_t i, nargs;
3984   tree lhs, rtype, ratype;
3985   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3986 
3987   /* Is STMT a vectorizable call?   */
3988   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3989   if (!stmt)
3990     return false;
3991 
3992   fndecl = gimple_call_fndecl (stmt);
3993   if (fndecl == NULL_TREE)
3994     return false;
3995 
3996   struct cgraph_node *node = cgraph_node::get (fndecl);
3997   if (node == NULL || node->simd_clones == NULL)
3998     return false;
3999 
4000   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4001     return false;
4002 
4003   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4004       && ! vec_stmt)
4005     return false;
4006 
4007   if (gimple_call_lhs (stmt)
4008       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4009     return false;
4010 
4011   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4012 
4013   vectype = STMT_VINFO_VECTYPE (stmt_info);
4014 
4015   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4016     return false;
4017 
4018   /* FORNOW */
4019   if (slp_node)
4020     return false;
4021 
4022   /* Process function arguments.  */
4023   nargs = gimple_call_num_args (stmt);
4024 
4025   /* Bail out if the function has zero arguments.  */
4026   if (nargs == 0)
4027     return false;
4028 
4029   arginfo.reserve (nargs, true);
4030 
4031   for (i = 0; i < nargs; i++)
4032     {
4033       simd_call_arg_info thisarginfo;
4034       affine_iv iv;
4035 
4036       thisarginfo.linear_step = 0;
4037       thisarginfo.align = 0;
4038       thisarginfo.op = NULL_TREE;
4039       thisarginfo.simd_lane_linear = false;
4040 
4041       op = gimple_call_arg (stmt, i);
4042       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4043 			       &thisarginfo.vectype)
4044 	  || thisarginfo.dt == vect_uninitialized_def)
4045 	{
4046 	  if (dump_enabled_p ())
4047 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4048 			     "use not simple.\n");
4049 	  return false;
4050 	}
4051 
4052       if (thisarginfo.dt == vect_constant_def
4053 	  || thisarginfo.dt == vect_external_def)
4054 	gcc_assert (thisarginfo.vectype == NULL_TREE);
4055       else
4056 	{
4057 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
4058 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4059 	    {
4060 	      if (dump_enabled_p ())
4061 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062 				 "vector mask arguments are not supported\n");
4063 	      return false;
4064 	    }
4065 	}
4066 
4067       /* For linear arguments, the analyze phase should have saved
4068 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
4069       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4070 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4071 	{
4072 	  gcc_assert (vec_stmt);
4073 	  thisarginfo.linear_step
4074 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4075 	  thisarginfo.op
4076 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4077 	  thisarginfo.simd_lane_linear
4078 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4079 	       == boolean_true_node);
4080 	  /* If loop has been peeled for alignment, we need to adjust it.  */
4081 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4082 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4083 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
4084 	    {
4085 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4086 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4087 	      tree opt = TREE_TYPE (thisarginfo.op);
4088 	      bias = fold_convert (TREE_TYPE (step), bias);
4089 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4090 	      thisarginfo.op
4091 		= fold_build2 (POINTER_TYPE_P (opt)
4092 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4093 			       thisarginfo.op, bias);
4094 	    }
4095 	}
4096       else if (!vec_stmt
4097 	       && thisarginfo.dt != vect_constant_def
4098 	       && thisarginfo.dt != vect_external_def
4099 	       && loop_vinfo
4100 	       && TREE_CODE (op) == SSA_NAME
4101 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
4102 			     &iv, false)
4103 	       && tree_fits_shwi_p (iv.step))
4104 	{
4105 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
4106 	  thisarginfo.op = iv.base;
4107 	}
4108       else if ((thisarginfo.dt == vect_constant_def
4109 		|| thisarginfo.dt == vect_external_def)
4110 	       && POINTER_TYPE_P (TREE_TYPE (op)))
4111 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4112       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4113 	 linear too.  */
4114       if (POINTER_TYPE_P (TREE_TYPE (op))
4115 	  && !thisarginfo.linear_step
4116 	  && !vec_stmt
4117 	  && thisarginfo.dt != vect_constant_def
4118 	  && thisarginfo.dt != vect_external_def
4119 	  && loop_vinfo
4120 	  && !slp_node
4121 	  && TREE_CODE (op) == SSA_NAME)
4122 	vect_simd_lane_linear (op, loop, &thisarginfo);
4123 
4124       arginfo.quick_push (thisarginfo);
4125     }
4126 
4127   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4128   if (!vf.is_constant ())
4129     {
4130       if (dump_enabled_p ())
4131 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132 			 "not considering SIMD clones; not yet supported"
4133 			 " for variable-width vectors.\n");
4134       return false;
4135     }
4136 
4137   unsigned int badness = 0;
4138   struct cgraph_node *bestn = NULL;
4139   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4140     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4141   else
4142     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4143 	 n = n->simdclone->next_clone)
4144       {
4145 	unsigned int this_badness = 0;
4146 	unsigned int num_calls;
4147 	if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4148 	    || n->simdclone->nargs != nargs)
4149 	  continue;
4150 	if (num_calls != 1)
4151 	  this_badness += exact_log2 (num_calls) * 4096;
4152 	if (n->simdclone->inbranch)
4153 	  this_badness += 8192;
4154 	int target_badness = targetm.simd_clone.usable (n);
4155 	if (target_badness < 0)
4156 	  continue;
4157 	this_badness += target_badness * 512;
4158 	/* FORNOW: Have to add code to add the mask argument.  */
4159 	if (n->simdclone->inbranch)
4160 	  continue;
4161 	for (i = 0; i < nargs; i++)
4162 	  {
4163 	    switch (n->simdclone->args[i].arg_type)
4164 	      {
4165 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4166 		if (!useless_type_conversion_p
4167 			(n->simdclone->args[i].orig_type,
4168 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4169 		  i = -1;
4170 		else if (arginfo[i].dt == vect_constant_def
4171 			 || arginfo[i].dt == vect_external_def
4172 			 || arginfo[i].linear_step)
4173 		  this_badness += 64;
4174 		break;
4175 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4176 		if (arginfo[i].dt != vect_constant_def
4177 		    && arginfo[i].dt != vect_external_def)
4178 		  i = -1;
4179 		break;
4180 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4181 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4182 		if (arginfo[i].dt == vect_constant_def
4183 		    || arginfo[i].dt == vect_external_def
4184 		    || (arginfo[i].linear_step
4185 			!= n->simdclone->args[i].linear_step))
4186 		  i = -1;
4187 		break;
4188 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4189 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4190 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4191 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4192 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4193 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4194 		/* FORNOW */
4195 		i = -1;
4196 		break;
4197 	      case SIMD_CLONE_ARG_TYPE_MASK:
4198 		gcc_unreachable ();
4199 	      }
4200 	    if (i == (size_t) -1)
4201 	      break;
4202 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4203 	      {
4204 		i = -1;
4205 		break;
4206 	      }
4207 	    if (arginfo[i].align)
4208 	      this_badness += (exact_log2 (arginfo[i].align)
4209 			       - exact_log2 (n->simdclone->args[i].alignment));
4210 	  }
4211 	if (i == (size_t) -1)
4212 	  continue;
4213 	if (bestn == NULL || this_badness < badness)
4214 	  {
4215 	    bestn = n;
4216 	    badness = this_badness;
4217 	  }
4218       }
4219 
4220   if (bestn == NULL)
4221     return false;
4222 
4223   for (i = 0; i < nargs; i++)
4224     if ((arginfo[i].dt == vect_constant_def
4225 	 || arginfo[i].dt == vect_external_def)
4226 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4227       {
4228 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4229 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4230 							  slp_node);
4231 	if (arginfo[i].vectype == NULL
4232 	    || !constant_multiple_p (bestn->simdclone->simdlen,
4233 				     simd_clone_subparts (arginfo[i].vectype)))
4234 	  return false;
4235       }
4236 
4237   fndecl = bestn->decl;
4238   nunits = bestn->simdclone->simdlen;
4239   ncopies = vector_unroll_factor (vf, nunits);
4240 
4241   /* If the function isn't const, only allow it in simd loops where user
4242      has asserted that at least nunits consecutive iterations can be
4243      performed using SIMD instructions.  */
4244   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4245       && gimple_vuse (stmt))
4246     return false;
4247 
4248   /* Sanity check: make sure that at least one copy of the vectorized stmt
4249      needs to be generated.  */
4250   gcc_assert (ncopies >= 1);
4251 
4252   if (!vec_stmt) /* transformation not required.  */
4253     {
4254       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4255       for (i = 0; i < nargs; i++)
4256 	if ((bestn->simdclone->args[i].arg_type
4257 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4258 	    || (bestn->simdclone->args[i].arg_type
4259 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4260 	  {
4261 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4262 									+ 1,
4263 								      true);
4264 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4265 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4266 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4267 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4268 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4269 	    tree sll = arginfo[i].simd_lane_linear
4270 		       ? boolean_true_node : boolean_false_node;
4271 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4272 	  }
4273       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4274       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4275 /*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
4276 				dt, slp_node, cost_vec); */
4277       return true;
4278     }
4279 
4280   /* Transform.  */
4281 
4282   if (dump_enabled_p ())
4283     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4284 
4285   /* Handle def.  */
4286   scalar_dest = gimple_call_lhs (stmt);
4287   vec_dest = NULL_TREE;
4288   rtype = NULL_TREE;
4289   ratype = NULL_TREE;
4290   if (scalar_dest)
4291     {
4292       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4293       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4294       if (TREE_CODE (rtype) == ARRAY_TYPE)
4295 	{
4296 	  ratype = rtype;
4297 	  rtype = TREE_TYPE (ratype);
4298 	}
4299     }
4300 
4301   auto_vec<vec<tree> > vec_oprnds;
4302   auto_vec<unsigned> vec_oprnds_i;
4303   vec_oprnds.safe_grow_cleared (nargs, true);
4304   vec_oprnds_i.safe_grow_cleared (nargs, true);
4305   for (j = 0; j < ncopies; ++j)
4306     {
4307       /* Build argument list for the vectorized call.  */
4308       if (j == 0)
4309 	vargs.create (nargs);
4310       else
4311 	vargs.truncate (0);
4312 
4313       for (i = 0; i < nargs; i++)
4314 	{
4315 	  unsigned int k, l, m, o;
4316 	  tree atype;
4317 	  op = gimple_call_arg (stmt, i);
4318 	  switch (bestn->simdclone->args[i].arg_type)
4319 	    {
4320 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4321 	      atype = bestn->simdclone->args[i].vector_type;
4322 	      o = vector_unroll_factor (nunits,
4323 					simd_clone_subparts (atype));
4324 	      for (m = j * o; m < (j + 1) * o; m++)
4325 		{
4326 		  if (simd_clone_subparts (atype)
4327 		      < simd_clone_subparts (arginfo[i].vectype))
4328 		    {
4329 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4330 		      k = (simd_clone_subparts (arginfo[i].vectype)
4331 			   / simd_clone_subparts (atype));
4332 		      gcc_assert ((k & (k - 1)) == 0);
4333 		      if (m == 0)
4334 			{
4335 			  vect_get_vec_defs_for_operand (vinfo, stmt_info,
4336 							 ncopies * o / k, op,
4337 							 &vec_oprnds[i]);
4338 			  vec_oprnds_i[i] = 0;
4339 			  vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4340 			}
4341 		      else
4342 			{
4343 			  vec_oprnd0 = arginfo[i].op;
4344 			  if ((m & (k - 1)) == 0)
4345 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4346 			}
4347 		      arginfo[i].op = vec_oprnd0;
4348 		      vec_oprnd0
4349 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4350 				  bitsize_int (prec),
4351 				  bitsize_int ((m & (k - 1)) * prec));
4352 		      gassign *new_stmt
4353 			= gimple_build_assign (make_ssa_name (atype),
4354 					       vec_oprnd0);
4355 		      vect_finish_stmt_generation (vinfo, stmt_info,
4356 						   new_stmt, gsi);
4357 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4358 		    }
4359 		  else
4360 		    {
4361 		      k = (simd_clone_subparts (atype)
4362 			   / simd_clone_subparts (arginfo[i].vectype));
4363 		      gcc_assert ((k & (k - 1)) == 0);
4364 		      vec<constructor_elt, va_gc> *ctor_elts;
4365 		      if (k != 1)
4366 			vec_alloc (ctor_elts, k);
4367 		      else
4368 			ctor_elts = NULL;
4369 		      for (l = 0; l < k; l++)
4370 			{
4371 			  if (m == 0 && l == 0)
4372 			    {
4373 			      vect_get_vec_defs_for_operand (vinfo, stmt_info,
4374 							     k * o * ncopies,
4375 							     op,
4376 							     &vec_oprnds[i]);
4377 			      vec_oprnds_i[i] = 0;
4378 			      vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4379 			    }
4380 			  else
4381 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4382 			  arginfo[i].op = vec_oprnd0;
4383 			  if (k == 1)
4384 			    break;
4385 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4386 						  vec_oprnd0);
4387 			}
4388 		      if (k == 1)
4389 			if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4390 						       atype))
4391 			  {
4392 			    vec_oprnd0
4393 			      = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4394 			    gassign *new_stmt
4395 			      = gimple_build_assign (make_ssa_name (atype),
4396 						     vec_oprnd0);
4397 			    vect_finish_stmt_generation (vinfo, stmt_info,
4398 							 new_stmt, gsi);
4399 			    vargs.safe_push (gimple_assign_lhs (new_stmt));
4400 			  }
4401 			else
4402 			  vargs.safe_push (vec_oprnd0);
4403 		      else
4404 			{
4405 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4406 			  gassign *new_stmt
4407 			    = gimple_build_assign (make_ssa_name (atype),
4408 						   vec_oprnd0);
4409 			  vect_finish_stmt_generation (vinfo, stmt_info,
4410 						       new_stmt, gsi);
4411 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4412 			}
4413 		    }
4414 		}
4415 	      break;
4416 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4417 	      vargs.safe_push (op);
4418 	      break;
4419 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4420 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4421 	      if (j == 0)
4422 		{
4423 		  gimple_seq stmts;
4424 		  arginfo[i].op
4425 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
4426 					    &stmts, true, NULL_TREE);
4427 		  if (stmts != NULL)
4428 		    {
4429 		      basic_block new_bb;
4430 		      edge pe = loop_preheader_edge (loop);
4431 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4432 		      gcc_assert (!new_bb);
4433 		    }
4434 		  if (arginfo[i].simd_lane_linear)
4435 		    {
4436 		      vargs.safe_push (arginfo[i].op);
4437 		      break;
4438 		    }
4439 		  tree phi_res = copy_ssa_name (op);
4440 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4441 		  add_phi_arg (new_phi, arginfo[i].op,
4442 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4443 		  enum tree_code code
4444 		    = POINTER_TYPE_P (TREE_TYPE (op))
4445 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4446 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4447 			      ? sizetype : TREE_TYPE (op);
4448 		  poly_widest_int cst
4449 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4450 			       ncopies * nunits);
4451 		  tree tcst = wide_int_to_tree (type, cst);
4452 		  tree phi_arg = copy_ssa_name (op);
4453 		  gassign *new_stmt
4454 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4455 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4456 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4457 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4458 			       UNKNOWN_LOCATION);
4459 		  arginfo[i].op = phi_res;
4460 		  vargs.safe_push (phi_res);
4461 		}
4462 	      else
4463 		{
4464 		  enum tree_code code
4465 		    = POINTER_TYPE_P (TREE_TYPE (op))
4466 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4467 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4468 			      ? sizetype : TREE_TYPE (op);
4469 		  poly_widest_int cst
4470 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4471 			       j * nunits);
4472 		  tree tcst = wide_int_to_tree (type, cst);
4473 		  new_temp = make_ssa_name (TREE_TYPE (op));
4474 		  gassign *new_stmt
4475 		    = gimple_build_assign (new_temp, code,
4476 					   arginfo[i].op, tcst);
4477 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4478 		  vargs.safe_push (new_temp);
4479 		}
4480 	      break;
4481 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4482 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4483 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4484 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4485 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4486 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4487 	    default:
4488 	      gcc_unreachable ();
4489 	    }
4490 	}
4491 
4492       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4493       if (vec_dest)
4494 	{
4495 	  gcc_assert (ratype
4496 		      || known_eq (simd_clone_subparts (rtype), nunits));
4497 	  if (ratype)
4498 	    new_temp = create_tmp_var (ratype);
4499 	  else if (useless_type_conversion_p (vectype, rtype))
4500 	    new_temp = make_ssa_name (vec_dest, new_call);
4501 	  else
4502 	    new_temp = make_ssa_name (rtype, new_call);
4503 	  gimple_call_set_lhs (new_call, new_temp);
4504 	}
4505       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4506       gimple *new_stmt = new_call;
4507 
4508       if (vec_dest)
4509 	{
4510 	  if (!multiple_p (simd_clone_subparts (vectype), nunits))
4511 	    {
4512 	      unsigned int k, l;
4513 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4514 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4515 	      k = vector_unroll_factor (nunits,
4516 					simd_clone_subparts (vectype));
4517 	      gcc_assert ((k & (k - 1)) == 0);
4518 	      for (l = 0; l < k; l++)
4519 		{
4520 		  tree t;
4521 		  if (ratype)
4522 		    {
4523 		      t = build_fold_addr_expr (new_temp);
4524 		      t = build2 (MEM_REF, vectype, t,
4525 				  build_int_cst (TREE_TYPE (t), l * bytes));
4526 		    }
4527 		  else
4528 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4529 				bitsize_int (prec), bitsize_int (l * prec));
4530 		  new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4531 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4532 
4533 		  if (j == 0 && l == 0)
4534 		    *vec_stmt = new_stmt;
4535 		  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4536 		}
4537 
4538 	      if (ratype)
4539 		vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4540 	      continue;
4541 	    }
4542 	  else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4543 	    {
4544 	      unsigned int k = (simd_clone_subparts (vectype)
4545 				/ simd_clone_subparts (rtype));
4546 	      gcc_assert ((k & (k - 1)) == 0);
4547 	      if ((j & (k - 1)) == 0)
4548 		vec_alloc (ret_ctor_elts, k);
4549 	      if (ratype)
4550 		{
4551 		  unsigned int m, o;
4552 		  o = vector_unroll_factor (nunits,
4553 					    simd_clone_subparts (rtype));
4554 		  for (m = 0; m < o; m++)
4555 		    {
4556 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4557 					 size_int (m), NULL_TREE, NULL_TREE);
4558 		      new_stmt = gimple_build_assign (make_ssa_name (rtype),
4559 						      tem);
4560 		      vect_finish_stmt_generation (vinfo, stmt_info,
4561 						   new_stmt, gsi);
4562 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4563 					      gimple_assign_lhs (new_stmt));
4564 		    }
4565 		  vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4566 		}
4567 	      else
4568 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4569 	      if ((j & (k - 1)) != k - 1)
4570 		continue;
4571 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4572 	      new_stmt
4573 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4574 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4575 
4576 	      if ((unsigned) j == k - 1)
4577 		*vec_stmt = new_stmt;
4578 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4579 	      continue;
4580 	    }
4581 	  else if (ratype)
4582 	    {
4583 	      tree t = build_fold_addr_expr (new_temp);
4584 	      t = build2 (MEM_REF, vectype, t,
4585 			  build_int_cst (TREE_TYPE (t), 0));
4586 	      new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4587 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4588 	      vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4589 	    }
4590 	  else if (!useless_type_conversion_p (vectype, rtype))
4591 	    {
4592 	      vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4593 	      new_stmt
4594 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4595 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4596 	    }
4597 	}
4598 
4599       if (j == 0)
4600 	*vec_stmt = new_stmt;
4601       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4602     }
4603 
4604   for (i = 0; i < nargs; ++i)
4605     {
4606       vec<tree> oprndsi = vec_oprnds[i];
4607       oprndsi.release ();
4608     }
4609   vargs.release ();
4610 
4611   /* The call in STMT might prevent it from being removed in dce.
4612      We however cannot remove it here, due to the way the ssa name
4613      it defines is mapped to the new definition.  So just replace
4614      rhs of the statement with something harmless.  */
4615 
4616   if (slp_node)
4617     return true;
4618 
4619   gimple *new_stmt;
4620   if (scalar_dest)
4621     {
4622       type = TREE_TYPE (scalar_dest);
4623       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4624       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4625     }
4626   else
4627     new_stmt = gimple_build_nop ();
4628   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4629   unlink_stmt_vdef (stmt);
4630 
4631   return true;
4632 }
4633 
4634 
4635 /* Function vect_gen_widened_results_half
4636 
4637    Create a vector stmt whose code, type, number of arguments, and result
4638    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4639    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4640    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4641    needs to be created (DECL is a function-decl of a target-builtin).
4642    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4643 
4644 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4645 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4646                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4647 			       tree vec_dest, gimple_stmt_iterator *gsi,
4648 			       stmt_vec_info stmt_info)
4649 {
4650   gimple *new_stmt;
4651   tree new_temp;
4652 
4653   /* Generate half of the widened result:  */
4654   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4655   if (op_type != binary_op)
4656     vec_oprnd1 = NULL;
4657   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4658   new_temp = make_ssa_name (vec_dest, new_stmt);
4659   gimple_assign_set_lhs (new_stmt, new_temp);
4660   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4661 
4662   return new_stmt;
4663 }
4664 
4665 
4666 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4667    For multi-step conversions store the resulting vectors and call the function
4668    recursively.  */
4669 
4670 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> & vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4671 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4672 				       int multi_step_cvt,
4673 				       stmt_vec_info stmt_info,
4674 				       vec<tree> &vec_dsts,
4675 				       gimple_stmt_iterator *gsi,
4676 				       slp_tree slp_node, enum tree_code code)
4677 {
4678   unsigned int i;
4679   tree vop0, vop1, new_tmp, vec_dest;
4680 
4681   vec_dest = vec_dsts.pop ();
4682 
4683   for (i = 0; i < vec_oprnds->length (); i += 2)
4684     {
4685       /* Create demotion operation.  */
4686       vop0 = (*vec_oprnds)[i];
4687       vop1 = (*vec_oprnds)[i + 1];
4688       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4689       new_tmp = make_ssa_name (vec_dest, new_stmt);
4690       gimple_assign_set_lhs (new_stmt, new_tmp);
4691       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4692 
4693       if (multi_step_cvt)
4694 	/* Store the resulting vector for next recursive call.  */
4695 	(*vec_oprnds)[i/2] = new_tmp;
4696       else
4697 	{
4698 	  /* This is the last step of the conversion sequence. Store the
4699 	     vectors in SLP_NODE or in vector info of the scalar statement
4700 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4701 	  if (slp_node)
4702 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4703 	  else
4704 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4705 	}
4706     }
4707 
4708   /* For multi-step demotion operations we first generate demotion operations
4709      from the source type to the intermediate types, and then combine the
4710      results (stored in VEC_OPRNDS) in demotion operation to the destination
4711      type.  */
4712   if (multi_step_cvt)
4713     {
4714       /* At each level of recursion we have half of the operands we had at the
4715 	 previous level.  */
4716       vec_oprnds->truncate ((i+1)/2);
4717       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4718 					     multi_step_cvt - 1,
4719 					     stmt_info, vec_dsts, gsi,
4720 					     slp_node, VEC_PACK_TRUNC_EXPR);
4721     }
4722 
4723   vec_dsts.quick_push (vec_dest);
4724 }
4725 
4726 
4727 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4728    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4729    STMT_INFO.  For multi-step conversions store the resulting vectors and
4730    call the function recursively.  */
4731 
4732 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4733 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4734 					vec<tree> *vec_oprnds0,
4735 					vec<tree> *vec_oprnds1,
4736 					stmt_vec_info stmt_info, tree vec_dest,
4737 					gimple_stmt_iterator *gsi,
4738 					enum tree_code code1,
4739 					enum tree_code code2, int op_type)
4740 {
4741   int i;
4742   tree vop0, vop1, new_tmp1, new_tmp2;
4743   gimple *new_stmt1, *new_stmt2;
4744   vec<tree> vec_tmp = vNULL;
4745 
4746   vec_tmp.create (vec_oprnds0->length () * 2);
4747   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4748     {
4749       if (op_type == binary_op)
4750 	vop1 = (*vec_oprnds1)[i];
4751       else
4752 	vop1 = NULL_TREE;
4753 
4754       /* Generate the two halves of promotion operation.  */
4755       new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4756 						 op_type, vec_dest, gsi,
4757 						 stmt_info);
4758       new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4759 						 op_type, vec_dest, gsi,
4760 						 stmt_info);
4761       if (is_gimple_call (new_stmt1))
4762 	{
4763 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4764 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4765 	}
4766       else
4767 	{
4768 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4769 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4770 	}
4771 
4772       /* Store the results for the next step.  */
4773       vec_tmp.quick_push (new_tmp1);
4774       vec_tmp.quick_push (new_tmp2);
4775     }
4776 
4777   vec_oprnds0->release ();
4778   *vec_oprnds0 = vec_tmp;
4779 }
4780 
4781 /* Create vectorized promotion stmts for widening stmts using only half the
4782    potential vector size for input.  */
4783 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4784 vect_create_half_widening_stmts (vec_info *vinfo,
4785 					vec<tree> *vec_oprnds0,
4786 					vec<tree> *vec_oprnds1,
4787 					stmt_vec_info stmt_info, tree vec_dest,
4788 					gimple_stmt_iterator *gsi,
4789 					enum tree_code code1,
4790 					int op_type)
4791 {
4792   int i;
4793   tree vop0, vop1;
4794   gimple *new_stmt1;
4795   gimple *new_stmt2;
4796   gimple *new_stmt3;
4797   vec<tree> vec_tmp = vNULL;
4798 
4799   vec_tmp.create (vec_oprnds0->length ());
4800   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4801     {
4802       tree new_tmp1, new_tmp2, new_tmp3, out_type;
4803 
4804       gcc_assert (op_type == binary_op);
4805       vop1 = (*vec_oprnds1)[i];
4806 
4807       /* Widen the first vector input.  */
4808       out_type = TREE_TYPE (vec_dest);
4809       new_tmp1 = make_ssa_name (out_type);
4810       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4811       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4812       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4813 	{
4814 	  /* Widen the second vector input.  */
4815 	  new_tmp2 = make_ssa_name (out_type);
4816 	  new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4817 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4818 	  /* Perform the operation.  With both vector inputs widened.  */
4819 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4820 	}
4821       else
4822 	{
4823 	  /* Perform the operation.  With the single vector input widened.  */
4824 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4825       }
4826 
4827       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4828       gimple_assign_set_lhs (new_stmt3, new_tmp3);
4829       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4830 
4831       /* Store the results for the next step.  */
4832       vec_tmp.quick_push (new_tmp3);
4833     }
4834 
4835   vec_oprnds0->release ();
4836   *vec_oprnds0 = vec_tmp;
4837 }
4838 
4839 
4840 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4841    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4842    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4843    Return true if STMT_INFO is vectorizable in this way.  */
4844 
4845 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4846 vectorizable_conversion (vec_info *vinfo,
4847 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4848 			 gimple **vec_stmt, slp_tree slp_node,
4849 			 stmt_vector_for_cost *cost_vec)
4850 {
4851   tree vec_dest;
4852   tree scalar_dest;
4853   tree op0, op1 = NULL_TREE;
4854   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4855   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4856   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4857   tree new_temp;
4858   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4859   int ndts = 2;
4860   poly_uint64 nunits_in;
4861   poly_uint64 nunits_out;
4862   tree vectype_out, vectype_in;
4863   int ncopies, i;
4864   tree lhs_type, rhs_type;
4865   enum { NARROW, NONE, WIDEN } modifier;
4866   vec<tree> vec_oprnds0 = vNULL;
4867   vec<tree> vec_oprnds1 = vNULL;
4868   tree vop0;
4869   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4870   int multi_step_cvt = 0;
4871   vec<tree> interm_types = vNULL;
4872   tree intermediate_type, cvt_type = NULL_TREE;
4873   int op_type;
4874   unsigned short fltsz;
4875 
4876   /* Is STMT a vectorizable conversion?   */
4877 
4878   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4879     return false;
4880 
4881   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4882       && ! vec_stmt)
4883     return false;
4884 
4885   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4886   if (!stmt)
4887     return false;
4888 
4889   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4890     return false;
4891 
4892   code = gimple_assign_rhs_code (stmt);
4893   if (!CONVERT_EXPR_CODE_P (code)
4894       && code != FIX_TRUNC_EXPR
4895       && code != FLOAT_EXPR
4896       && code != WIDEN_PLUS_EXPR
4897       && code != WIDEN_MINUS_EXPR
4898       && code != WIDEN_MULT_EXPR
4899       && code != WIDEN_LSHIFT_EXPR)
4900     return false;
4901 
4902   bool widen_arith = (code == WIDEN_PLUS_EXPR
4903 		      || code == WIDEN_MINUS_EXPR
4904 		      || code == WIDEN_MULT_EXPR
4905 		      || code == WIDEN_LSHIFT_EXPR);
4906   op_type = TREE_CODE_LENGTH (code);
4907 
4908   /* Check types of lhs and rhs.  */
4909   scalar_dest = gimple_assign_lhs (stmt);
4910   lhs_type = TREE_TYPE (scalar_dest);
4911   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4912 
4913   /* Check the operands of the operation.  */
4914   slp_tree slp_op0, slp_op1 = NULL;
4915   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4916 			   0, &op0, &slp_op0, &dt[0], &vectype_in))
4917     {
4918       if (dump_enabled_p ())
4919 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 			 "use not simple.\n");
4921       return false;
4922     }
4923 
4924   rhs_type = TREE_TYPE (op0);
4925   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4926       && !((INTEGRAL_TYPE_P (lhs_type)
4927 	    && INTEGRAL_TYPE_P (rhs_type))
4928 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4929 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4930     return false;
4931 
4932   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4933       && ((INTEGRAL_TYPE_P (lhs_type)
4934 	   && !type_has_mode_precision_p (lhs_type))
4935 	  || (INTEGRAL_TYPE_P (rhs_type)
4936 	      && !type_has_mode_precision_p (rhs_type))))
4937     {
4938       if (dump_enabled_p ())
4939 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4940                          "type conversion to/from bit-precision unsupported."
4941                          "\n");
4942       return false;
4943     }
4944 
4945   if (op_type == binary_op)
4946     {
4947       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4948 		  || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4949 
4950       op1 = gimple_assign_rhs2 (stmt);
4951       tree vectype1_in;
4952       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4953 			       &op1, &slp_op1, &dt[1], &vectype1_in))
4954 	{
4955           if (dump_enabled_p ())
4956             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957                              "use not simple.\n");
4958 	  return false;
4959 	}
4960       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4961 	 OP1.  */
4962       if (!vectype_in)
4963 	vectype_in = vectype1_in;
4964     }
4965 
4966   /* If op0 is an external or constant def, infer the vector type
4967      from the scalar type.  */
4968   if (!vectype_in)
4969     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4970   if (vec_stmt)
4971     gcc_assert (vectype_in);
4972   if (!vectype_in)
4973     {
4974       if (dump_enabled_p ())
4975 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976 			 "no vectype for scalar type %T\n", rhs_type);
4977 
4978       return false;
4979     }
4980 
4981   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4982       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4983     {
4984       if (dump_enabled_p ())
4985 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4986 			 "can't convert between boolean and non "
4987 			 "boolean vectors %T\n", rhs_type);
4988 
4989       return false;
4990     }
4991 
4992   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4993   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4994   if (known_eq (nunits_out, nunits_in))
4995     if (widen_arith)
4996       modifier = WIDEN;
4997     else
4998       modifier = NONE;
4999   else if (multiple_p (nunits_out, nunits_in))
5000     modifier = NARROW;
5001   else
5002     {
5003       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5004       modifier = WIDEN;
5005     }
5006 
5007   /* Multiple types in SLP are handled by creating the appropriate number of
5008      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5009      case of SLP.  */
5010   if (slp_node)
5011     ncopies = 1;
5012   else if (modifier == NARROW)
5013     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5014   else
5015     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5016 
5017   /* Sanity check: make sure that at least one copy of the vectorized stmt
5018      needs to be generated.  */
5019   gcc_assert (ncopies >= 1);
5020 
5021   bool found_mode = false;
5022   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5023   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5024   opt_scalar_mode rhs_mode_iter;
5025 
5026   /* Supportable by target?  */
5027   switch (modifier)
5028     {
5029     case NONE:
5030       if (code != FIX_TRUNC_EXPR
5031 	  && code != FLOAT_EXPR
5032 	  && !CONVERT_EXPR_CODE_P (code))
5033 	return false;
5034       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5035 	break;
5036       /* FALLTHRU */
5037     unsupported:
5038       if (dump_enabled_p ())
5039 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040                          "conversion not supported by target.\n");
5041       return false;
5042 
5043     case WIDEN:
5044       if (known_eq (nunits_in, nunits_out))
5045 	{
5046 	  if (!supportable_half_widening_operation (code, vectype_out,
5047 						   vectype_in, &code1))
5048 	    goto unsupported;
5049 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
5050 	  break;
5051 	}
5052       if (supportable_widening_operation (vinfo, code, stmt_info,
5053 					       vectype_out, vectype_in, &code1,
5054 					       &code2, &multi_step_cvt,
5055 					       &interm_types))
5056 	{
5057 	  /* Binary widening operation can only be supported directly by the
5058 	     architecture.  */
5059 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
5060 	  break;
5061 	}
5062 
5063       if (code != FLOAT_EXPR
5064 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5065 	goto unsupported;
5066 
5067       fltsz = GET_MODE_SIZE (lhs_mode);
5068       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5069 	{
5070 	  rhs_mode = rhs_mode_iter.require ();
5071 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
5072 	    break;
5073 
5074 	  cvt_type
5075 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5076 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5077 	  if (cvt_type == NULL_TREE)
5078 	    goto unsupported;
5079 
5080 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
5081 	    {
5082 	      if (!supportable_convert_operation (code, vectype_out,
5083 						  cvt_type, &codecvt1))
5084 		goto unsupported;
5085 	    }
5086 	  else if (!supportable_widening_operation (vinfo, code, stmt_info,
5087 						    vectype_out, cvt_type,
5088 						    &codecvt1, &codecvt2,
5089 						    &multi_step_cvt,
5090 						    &interm_types))
5091 	    continue;
5092 	  else
5093 	    gcc_assert (multi_step_cvt == 0);
5094 
5095 	  if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5096 					      cvt_type,
5097 					      vectype_in, &code1, &code2,
5098 					      &multi_step_cvt, &interm_types))
5099 	    {
5100 	      found_mode = true;
5101 	      break;
5102 	    }
5103 	}
5104 
5105       if (!found_mode)
5106 	goto unsupported;
5107 
5108       if (GET_MODE_SIZE (rhs_mode) == fltsz)
5109 	codecvt2 = ERROR_MARK;
5110       else
5111 	{
5112 	  multi_step_cvt++;
5113 	  interm_types.safe_push (cvt_type);
5114 	  cvt_type = NULL_TREE;
5115 	}
5116       break;
5117 
5118     case NARROW:
5119       gcc_assert (op_type == unary_op);
5120       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5121 					   &code1, &multi_step_cvt,
5122 					   &interm_types))
5123 	break;
5124 
5125       if (code != FIX_TRUNC_EXPR
5126 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5127 	goto unsupported;
5128 
5129       cvt_type
5130 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5131       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5132       if (cvt_type == NULL_TREE)
5133 	goto unsupported;
5134       if (!supportable_convert_operation (code, cvt_type, vectype_in,
5135 					  &codecvt1))
5136 	goto unsupported;
5137       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5138 					   &code1, &multi_step_cvt,
5139 					   &interm_types))
5140 	break;
5141       goto unsupported;
5142 
5143     default:
5144       gcc_unreachable ();
5145     }
5146 
5147   if (!vec_stmt)		/* transformation not required.  */
5148     {
5149       if (slp_node
5150 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5151 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5152 	{
5153 	  if (dump_enabled_p ())
5154 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5155 			     "incompatible vector types for invariants\n");
5156 	  return false;
5157 	}
5158       DUMP_VECT_SCOPE ("vectorizable_conversion");
5159       if (modifier == NONE)
5160         {
5161 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5162 	  vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5163 				  cost_vec);
5164 	}
5165       else if (modifier == NARROW)
5166 	{
5167 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5168 	  /* The final packing step produces one vector result per copy.  */
5169 	  unsigned int nvectors
5170 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5171 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5172 					      multi_step_cvt, cost_vec,
5173 					      widen_arith);
5174 	}
5175       else
5176 	{
5177 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5178 	  /* The initial unpacking step produces two vector results
5179 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
5180 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
5181 	  unsigned int nvectors
5182 	    = (slp_node
5183 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5184 	       : ncopies * 2);
5185 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5186 					      multi_step_cvt, cost_vec,
5187 					      widen_arith);
5188 	}
5189       interm_types.release ();
5190       return true;
5191     }
5192 
5193   /* Transform.  */
5194   if (dump_enabled_p ())
5195     dump_printf_loc (MSG_NOTE, vect_location,
5196                      "transform conversion. ncopies = %d.\n", ncopies);
5197 
5198   if (op_type == binary_op)
5199     {
5200       if (CONSTANT_CLASS_P (op0))
5201 	op0 = fold_convert (TREE_TYPE (op1), op0);
5202       else if (CONSTANT_CLASS_P (op1))
5203 	op1 = fold_convert (TREE_TYPE (op0), op1);
5204     }
5205 
5206   /* In case of multi-step conversion, we first generate conversion operations
5207      to the intermediate types, and then from that types to the final one.
5208      We create vector destinations for the intermediate type (TYPES) received
5209      from supportable_*_operation, and store them in the correct order
5210      for future use in vect_create_vectorized_*_stmts ().  */
5211   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5212   vec_dest = vect_create_destination_var (scalar_dest,
5213 					  (cvt_type && modifier == WIDEN)
5214 					  ? cvt_type : vectype_out);
5215   vec_dsts.quick_push (vec_dest);
5216 
5217   if (multi_step_cvt)
5218     {
5219       for (i = interm_types.length () - 1;
5220 	   interm_types.iterate (i, &intermediate_type); i--)
5221 	{
5222 	  vec_dest = vect_create_destination_var (scalar_dest,
5223 						  intermediate_type);
5224 	  vec_dsts.quick_push (vec_dest);
5225 	}
5226     }
5227 
5228   if (cvt_type)
5229     vec_dest = vect_create_destination_var (scalar_dest,
5230 					    modifier == WIDEN
5231 					    ? vectype_out : cvt_type);
5232 
5233   int ninputs = 1;
5234   if (!slp_node)
5235     {
5236       if (modifier == WIDEN)
5237 	;
5238       else if (modifier == NARROW)
5239 	{
5240 	  if (multi_step_cvt)
5241 	    ninputs = vect_pow2 (multi_step_cvt);
5242 	  ninputs *= 2;
5243 	}
5244     }
5245 
5246   switch (modifier)
5247     {
5248     case NONE:
5249       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5250 			 op0, &vec_oprnds0);
5251       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 	{
5253 	  /* Arguments are ready, create the new vector stmt.  */
5254 	  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5255 	  gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5256 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5257 	  gimple_assign_set_lhs (new_stmt, new_temp);
5258 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5259 
5260 	  if (slp_node)
5261 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5262 	  else
5263 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5264 	}
5265       break;
5266 
5267     case WIDEN:
5268       /* In case the vectorization factor (VF) is bigger than the number
5269 	 of elements that we can fit in a vectype (nunits), we have to
5270 	 generate more than one vector stmt - i.e - we need to "unroll"
5271 	 the vector stmt by a factor VF/nunits.  */
5272       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5273 			 op0, &vec_oprnds0,
5274 			 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5275 			 &vec_oprnds1);
5276       if (code == WIDEN_LSHIFT_EXPR)
5277 	{
5278 	  int oprnds_size = vec_oprnds0.length ();
5279 	  vec_oprnds1.create (oprnds_size);
5280 	  for (i = 0; i < oprnds_size; ++i)
5281 	    vec_oprnds1.quick_push (op1);
5282 	}
5283       /* Arguments are ready.  Create the new vector stmts.  */
5284       for (i = multi_step_cvt; i >= 0; i--)
5285 	{
5286 	  tree this_dest = vec_dsts[i];
5287 	  enum tree_code c1 = code1, c2 = code2;
5288 	  if (i == 0 && codecvt2 != ERROR_MARK)
5289 	    {
5290 	      c1 = codecvt1;
5291 	      c2 = codecvt2;
5292 	    }
5293 	  if (known_eq (nunits_out, nunits_in))
5294 	    vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5295 						    &vec_oprnds1, stmt_info,
5296 						    this_dest, gsi,
5297 						    c1, op_type);
5298 	  else
5299 	    vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5300 						    &vec_oprnds1, stmt_info,
5301 						    this_dest, gsi,
5302 						    c1, c2, op_type);
5303 	}
5304 
5305       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5306 	{
5307 	  gimple *new_stmt;
5308 	  if (cvt_type)
5309 	    {
5310 	      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5311 	      new_temp = make_ssa_name (vec_dest);
5312 	      new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5313 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5314 	    }
5315 	  else
5316 	    new_stmt = SSA_NAME_DEF_STMT (vop0);
5317 
5318 	  if (slp_node)
5319 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5320 	  else
5321 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5322 	}
5323       break;
5324 
5325     case NARROW:
5326       /* In case the vectorization factor (VF) is bigger than the number
5327 	 of elements that we can fit in a vectype (nunits), we have to
5328 	 generate more than one vector stmt - i.e - we need to "unroll"
5329 	 the vector stmt by a factor VF/nunits.  */
5330       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5331 			 op0, &vec_oprnds0);
5332       /* Arguments are ready.  Create the new vector stmts.  */
5333       if (cvt_type)
5334 	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5335 	  {
5336 	    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5337 	    new_temp = make_ssa_name (vec_dest);
5338 	    gassign *new_stmt
5339 	      = gimple_build_assign (new_temp, codecvt1, vop0);
5340 	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5341 	    vec_oprnds0[i] = new_temp;
5342 	  }
5343 
5344       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5345 					     multi_step_cvt,
5346 					     stmt_info, vec_dsts, gsi,
5347 					     slp_node, code1);
5348       break;
5349     }
5350   if (!slp_node)
5351     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5352 
5353   vec_oprnds0.release ();
5354   vec_oprnds1.release ();
5355   interm_types.release ();
5356 
5357   return true;
5358 }
5359 
5360 /* Return true if we can assume from the scalar form of STMT_INFO that
5361    neither the scalar nor the vector forms will generate code.  STMT_INFO
5362    is known not to involve a data reference.  */
5363 
5364 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5365 vect_nop_conversion_p (stmt_vec_info stmt_info)
5366 {
5367   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5368   if (!stmt)
5369     return false;
5370 
5371   tree lhs = gimple_assign_lhs (stmt);
5372   tree_code code = gimple_assign_rhs_code (stmt);
5373   tree rhs = gimple_assign_rhs1 (stmt);
5374 
5375   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5376     return true;
5377 
5378   if (CONVERT_EXPR_CODE_P (code))
5379     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5380 
5381   return false;
5382 }
5383 
5384 /* Function vectorizable_assignment.
5385 
5386    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5387    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5388    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5389    Return true if STMT_INFO is vectorizable in this way.  */
5390 
5391 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5392 vectorizable_assignment (vec_info *vinfo,
5393 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5394 			 gimple **vec_stmt, slp_tree slp_node,
5395 			 stmt_vector_for_cost *cost_vec)
5396 {
5397   tree vec_dest;
5398   tree scalar_dest;
5399   tree op;
5400   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5401   tree new_temp;
5402   enum vect_def_type dt[1] = {vect_unknown_def_type};
5403   int ndts = 1;
5404   int ncopies;
5405   int i;
5406   vec<tree> vec_oprnds = vNULL;
5407   tree vop;
5408   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5409   enum tree_code code;
5410   tree vectype_in;
5411 
5412   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5413     return false;
5414 
5415   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5416       && ! vec_stmt)
5417     return false;
5418 
5419   /* Is vectorizable assignment?  */
5420   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5421   if (!stmt)
5422     return false;
5423 
5424   scalar_dest = gimple_assign_lhs (stmt);
5425   if (TREE_CODE (scalar_dest) != SSA_NAME)
5426     return false;
5427 
5428   if (STMT_VINFO_DATA_REF (stmt_info))
5429     return false;
5430 
5431   code = gimple_assign_rhs_code (stmt);
5432   if (!(gimple_assign_single_p (stmt)
5433 	|| code == PAREN_EXPR
5434 	|| CONVERT_EXPR_CODE_P (code)))
5435     return false;
5436 
5437   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5438   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5439 
5440   /* Multiple types in SLP are handled by creating the appropriate number of
5441      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5442      case of SLP.  */
5443   if (slp_node)
5444     ncopies = 1;
5445   else
5446     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5447 
5448   gcc_assert (ncopies >= 1);
5449 
5450   slp_tree slp_op;
5451   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5452 			   &dt[0], &vectype_in))
5453     {
5454       if (dump_enabled_p ())
5455         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456                          "use not simple.\n");
5457       return false;
5458     }
5459   if (!vectype_in)
5460     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5461 
5462   /* We can handle NOP_EXPR conversions that do not change the number
5463      of elements or the vector size.  */
5464   if ((CONVERT_EXPR_CODE_P (code)
5465        || code == VIEW_CONVERT_EXPR)
5466       && (!vectype_in
5467 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5468 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5469 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5470     return false;
5471 
5472   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5473     {
5474       if (dump_enabled_p ())
5475 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5476 			 "can't convert between boolean and non "
5477 			 "boolean vectors %T\n", TREE_TYPE (op));
5478 
5479       return false;
5480     }
5481 
5482   /* We do not handle bit-precision changes.  */
5483   if ((CONVERT_EXPR_CODE_P (code)
5484        || code == VIEW_CONVERT_EXPR)
5485       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5486       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5487 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5488       /* But a conversion that does not change the bit-pattern is ok.  */
5489       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5490 	    > TYPE_PRECISION (TREE_TYPE (op)))
5491 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
5492     {
5493       if (dump_enabled_p ())
5494         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495                          "type conversion to/from bit-precision "
5496                          "unsupported.\n");
5497       return false;
5498     }
5499 
5500   if (!vec_stmt) /* transformation not required.  */
5501     {
5502       if (slp_node
5503 	  && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5504 	{
5505 	  if (dump_enabled_p ())
5506 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5507 			     "incompatible vector types for invariants\n");
5508 	  return false;
5509 	}
5510       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5511       DUMP_VECT_SCOPE ("vectorizable_assignment");
5512       if (!vect_nop_conversion_p (stmt_info))
5513 	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5514 				cost_vec);
5515       return true;
5516     }
5517 
5518   /* Transform.  */
5519   if (dump_enabled_p ())
5520     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5521 
5522   /* Handle def.  */
5523   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5524 
5525   /* Handle use.  */
5526   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5527 
5528   /* Arguments are ready. create the new vector stmt.  */
5529   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5530     {
5531       if (CONVERT_EXPR_CODE_P (code)
5532 	  || code == VIEW_CONVERT_EXPR)
5533 	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5534       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5535       new_temp = make_ssa_name (vec_dest, new_stmt);
5536       gimple_assign_set_lhs (new_stmt, new_temp);
5537       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5538       if (slp_node)
5539 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5540       else
5541 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5542     }
5543   if (!slp_node)
5544     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5545 
5546   vec_oprnds.release ();
5547   return true;
5548 }
5549 
5550 
5551 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5552    either as shift by a scalar or by a vector.  */
5553 
5554 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5555 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5556 {
5557 
5558   machine_mode vec_mode;
5559   optab optab;
5560   int icode;
5561   tree vectype;
5562 
5563   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5564   if (!vectype)
5565     return false;
5566 
5567   optab = optab_for_tree_code (code, vectype, optab_scalar);
5568   if (!optab
5569       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5570     {
5571       optab = optab_for_tree_code (code, vectype, optab_vector);
5572       if (!optab
5573           || (optab_handler (optab, TYPE_MODE (vectype))
5574                       == CODE_FOR_nothing))
5575         return false;
5576     }
5577 
5578   vec_mode = TYPE_MODE (vectype);
5579   icode = (int) optab_handler (optab, vec_mode);
5580   if (icode == CODE_FOR_nothing)
5581     return false;
5582 
5583   return true;
5584 }
5585 
5586 
5587 /* Function vectorizable_shift.
5588 
5589    Check if STMT_INFO performs a shift operation that can be vectorized.
5590    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5591    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5592    Return true if STMT_INFO is vectorizable in this way.  */
5593 
5594 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5595 vectorizable_shift (vec_info *vinfo,
5596 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5597 		    gimple **vec_stmt, slp_tree slp_node,
5598 		    stmt_vector_for_cost *cost_vec)
5599 {
5600   tree vec_dest;
5601   tree scalar_dest;
5602   tree op0, op1 = NULL;
5603   tree vec_oprnd1 = NULL_TREE;
5604   tree vectype;
5605   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5606   enum tree_code code;
5607   machine_mode vec_mode;
5608   tree new_temp;
5609   optab optab;
5610   int icode;
5611   machine_mode optab_op2_mode;
5612   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5613   int ndts = 2;
5614   poly_uint64 nunits_in;
5615   poly_uint64 nunits_out;
5616   tree vectype_out;
5617   tree op1_vectype;
5618   int ncopies;
5619   int i;
5620   vec<tree> vec_oprnds0 = vNULL;
5621   vec<tree> vec_oprnds1 = vNULL;
5622   tree vop0, vop1;
5623   unsigned int k;
5624   bool scalar_shift_arg = true;
5625   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5626   bool incompatible_op1_vectype_p = false;
5627 
5628   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5629     return false;
5630 
5631   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5632       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5633       && ! vec_stmt)
5634     return false;
5635 
5636   /* Is STMT a vectorizable binary/unary operation?   */
5637   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5638   if (!stmt)
5639     return false;
5640 
5641   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5642     return false;
5643 
5644   code = gimple_assign_rhs_code (stmt);
5645 
5646   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5647       || code == RROTATE_EXPR))
5648     return false;
5649 
5650   scalar_dest = gimple_assign_lhs (stmt);
5651   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5652   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5653     {
5654       if (dump_enabled_p ())
5655         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656                          "bit-precision shifts not supported.\n");
5657       return false;
5658     }
5659 
5660   slp_tree slp_op0;
5661   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5662 			   0, &op0, &slp_op0, &dt[0], &vectype))
5663     {
5664       if (dump_enabled_p ())
5665         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666                          "use not simple.\n");
5667       return false;
5668     }
5669   /* If op0 is an external or constant def, infer the vector type
5670      from the scalar type.  */
5671   if (!vectype)
5672     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5673   if (vec_stmt)
5674     gcc_assert (vectype);
5675   if (!vectype)
5676     {
5677       if (dump_enabled_p ())
5678         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679                          "no vectype for scalar type\n");
5680       return false;
5681     }
5682 
5683   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5684   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5685   if (maybe_ne (nunits_out, nunits_in))
5686     return false;
5687 
5688   stmt_vec_info op1_def_stmt_info;
5689   slp_tree slp_op1;
5690   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5691 			   &dt[1], &op1_vectype, &op1_def_stmt_info))
5692     {
5693       if (dump_enabled_p ())
5694         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695                          "use not simple.\n");
5696       return false;
5697     }
5698 
5699   /* Multiple types in SLP are handled by creating the appropriate number of
5700      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5701      case of SLP.  */
5702   if (slp_node)
5703     ncopies = 1;
5704   else
5705     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5706 
5707   gcc_assert (ncopies >= 1);
5708 
5709   /* Determine whether the shift amount is a vector, or scalar.  If the
5710      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5711 
5712   if ((dt[1] == vect_internal_def
5713        || dt[1] == vect_induction_def
5714        || dt[1] == vect_nested_cycle)
5715       && !slp_node)
5716     scalar_shift_arg = false;
5717   else if (dt[1] == vect_constant_def
5718 	   || dt[1] == vect_external_def
5719 	   || dt[1] == vect_internal_def)
5720     {
5721       /* In SLP, need to check whether the shift count is the same,
5722 	 in loops if it is a constant or invariant, it is always
5723 	 a scalar shift.  */
5724       if (slp_node)
5725 	{
5726 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5727 	  stmt_vec_info slpstmt_info;
5728 
5729 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5730 	    {
5731 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5732 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5733 		scalar_shift_arg = false;
5734 	    }
5735 
5736 	  /* For internal SLP defs we have to make sure we see scalar stmts
5737 	     for all vector elements.
5738 	     ???  For different vectors we could resort to a different
5739 	     scalar shift operand but code-generation below simply always
5740 	     takes the first.  */
5741 	  if (dt[1] == vect_internal_def
5742 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5743 			   stmts.length ()))
5744 	    scalar_shift_arg = false;
5745 	}
5746 
5747       /* If the shift amount is computed by a pattern stmt we cannot
5748          use the scalar amount directly thus give up and use a vector
5749 	 shift.  */
5750       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5751 	scalar_shift_arg = false;
5752     }
5753   else
5754     {
5755       if (dump_enabled_p ())
5756         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757                          "operand mode requires invariant argument.\n");
5758       return false;
5759     }
5760 
5761   /* Vector shifted by vector.  */
5762   bool was_scalar_shift_arg = scalar_shift_arg;
5763   if (!scalar_shift_arg)
5764     {
5765       optab = optab_for_tree_code (code, vectype, optab_vector);
5766       if (dump_enabled_p ())
5767         dump_printf_loc (MSG_NOTE, vect_location,
5768                          "vector/vector shift/rotate found.\n");
5769 
5770       if (!op1_vectype)
5771 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5772 						   slp_op1);
5773       incompatible_op1_vectype_p
5774 	= (op1_vectype == NULL_TREE
5775 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5776 			TYPE_VECTOR_SUBPARTS (vectype))
5777 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5778       if (incompatible_op1_vectype_p
5779 	  && (!slp_node
5780 	      || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5781 	      || slp_op1->refcnt != 1))
5782 	{
5783 	  if (dump_enabled_p ())
5784 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785                              "unusable type for last operand in"
5786                              " vector/vector shift/rotate.\n");
5787 	  return false;
5788 	}
5789     }
5790   /* See if the machine has a vector shifted by scalar insn and if not
5791      then see if it has a vector shifted by vector insn.  */
5792   else
5793     {
5794       optab = optab_for_tree_code (code, vectype, optab_scalar);
5795       if (optab
5796           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5797         {
5798           if (dump_enabled_p ())
5799             dump_printf_loc (MSG_NOTE, vect_location,
5800                              "vector/scalar shift/rotate found.\n");
5801         }
5802       else
5803         {
5804           optab = optab_for_tree_code (code, vectype, optab_vector);
5805           if (optab
5806                && (optab_handler (optab, TYPE_MODE (vectype))
5807                       != CODE_FOR_nothing))
5808             {
5809 	      scalar_shift_arg = false;
5810 
5811               if (dump_enabled_p ())
5812                 dump_printf_loc (MSG_NOTE, vect_location,
5813                                  "vector/vector shift/rotate found.\n");
5814 
5815 	      if (!op1_vectype)
5816 		op1_vectype = get_vectype_for_scalar_type (vinfo,
5817 							   TREE_TYPE (op1),
5818 							   slp_op1);
5819 
5820               /* Unlike the other binary operators, shifts/rotates have
5821                  the rhs being int, instead of the same type as the lhs,
5822                  so make sure the scalar is the right type if we are
5823 		 dealing with vectors of long long/long/short/char.  */
5824 	      incompatible_op1_vectype_p
5825 		= (!op1_vectype
5826 		   || !tree_nop_conversion_p (TREE_TYPE (vectype),
5827 					      TREE_TYPE (op1)));
5828 	      if (incompatible_op1_vectype_p
5829 		  && dt[1] == vect_internal_def)
5830 		{
5831 		  if (dump_enabled_p ())
5832 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5833 				     "unusable type for last operand in"
5834 				     " vector/vector shift/rotate.\n");
5835 		  return false;
5836 		}
5837             }
5838         }
5839     }
5840 
5841   /* Supportable by target?  */
5842   if (!optab)
5843     {
5844       if (dump_enabled_p ())
5845         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846                          "no optab.\n");
5847       return false;
5848     }
5849   vec_mode = TYPE_MODE (vectype);
5850   icode = (int) optab_handler (optab, vec_mode);
5851   if (icode == CODE_FOR_nothing)
5852     {
5853       if (dump_enabled_p ())
5854         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855                          "op not supported by target.\n");
5856       return false;
5857     }
5858   /* vector lowering cannot optimize vector shifts using word arithmetic.  */
5859   if (vect_emulated_vector_p (vectype))
5860     return false;
5861 
5862   if (!vec_stmt) /* transformation not required.  */
5863     {
5864       if (slp_node
5865 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5866 	      || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5867 		  && (!incompatible_op1_vectype_p
5868 		      || dt[1] == vect_constant_def)
5869 		  && !vect_maybe_update_slp_op_vectype
5870 			(slp_op1,
5871 			 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5872 	{
5873 	  if (dump_enabled_p ())
5874 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5875 			     "incompatible vector types for invariants\n");
5876 	  return false;
5877 	}
5878       /* Now adjust the constant shift amount in place.  */
5879       if (slp_node
5880 	  && incompatible_op1_vectype_p
5881 	  && dt[1] == vect_constant_def)
5882 	{
5883 	  for (unsigned i = 0;
5884 	       i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5885 	    {
5886 	      SLP_TREE_SCALAR_OPS (slp_op1)[i]
5887 		= fold_convert (TREE_TYPE (vectype),
5888 				SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5889 	      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5890 			   == INTEGER_CST));
5891 	    }
5892 	}
5893       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5894       DUMP_VECT_SCOPE ("vectorizable_shift");
5895       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5896 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5897       return true;
5898     }
5899 
5900   /* Transform.  */
5901 
5902   if (dump_enabled_p ())
5903     dump_printf_loc (MSG_NOTE, vect_location,
5904                      "transform binary/unary operation.\n");
5905 
5906   if (incompatible_op1_vectype_p && !slp_node)
5907     {
5908       gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5909       op1 = fold_convert (TREE_TYPE (vectype), op1);
5910       if (dt[1] != vect_constant_def)
5911 	op1 = vect_init_vector (vinfo, stmt_info, op1,
5912 				TREE_TYPE (vectype), NULL);
5913     }
5914 
5915   /* Handle def.  */
5916   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5917 
5918   if (scalar_shift_arg && dt[1] != vect_internal_def)
5919     {
5920       /* Vector shl and shr insn patterns can be defined with scalar
5921 	 operand 2 (shift operand).  In this case, use constant or loop
5922 	 invariant op1 directly, without extending it to vector mode
5923 	 first.  */
5924       optab_op2_mode = insn_data[icode].operand[2].mode;
5925       if (!VECTOR_MODE_P (optab_op2_mode))
5926 	{
5927 	  if (dump_enabled_p ())
5928 	    dump_printf_loc (MSG_NOTE, vect_location,
5929 			     "operand 1 using scalar mode.\n");
5930 	  vec_oprnd1 = op1;
5931 	  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5932 	  vec_oprnds1.quick_push (vec_oprnd1);
5933 	      /* Store vec_oprnd1 for every vector stmt to be created.
5934 		 We check during the analysis that all the shift arguments
5935 		 are the same.
5936 		 TODO: Allow different constants for different vector
5937 		 stmts generated for an SLP instance.  */
5938 	  for (k = 0;
5939 	       k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5940 	    vec_oprnds1.quick_push (vec_oprnd1);
5941 	}
5942     }
5943   else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5944     {
5945       if (was_scalar_shift_arg)
5946 	{
5947 	  /* If the argument was the same in all lanes create
5948 	     the correctly typed vector shift amount directly.  */
5949 	  op1 = fold_convert (TREE_TYPE (vectype), op1);
5950 	  op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5951 				  !loop_vinfo ? gsi : NULL);
5952 	  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5953 					 !loop_vinfo ? gsi : NULL);
5954 	  vec_oprnds1.create (slp_node->vec_stmts_size);
5955 	  for (k = 0; k < slp_node->vec_stmts_size; k++)
5956 	    vec_oprnds1.quick_push (vec_oprnd1);
5957 	}
5958       else if (dt[1] == vect_constant_def)
5959 	/* The constant shift amount has been adjusted in place.  */
5960 	;
5961       else
5962 	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5963     }
5964 
5965   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5966      (a special case for certain kind of vector shifts); otherwise,
5967      operand 1 should be of a vector type (the usual case).  */
5968   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5969 		     op0, &vec_oprnds0,
5970 		     vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5971 
5972   /* Arguments are ready.  Create the new vector stmt.  */
5973   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5974     {
5975       /* For internal defs where we need to use a scalar shift arg
5976 	 extract the first lane.  */
5977       if (scalar_shift_arg && dt[1] == vect_internal_def)
5978 	{
5979 	  vop1 = vec_oprnds1[0];
5980 	  new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5981 	  gassign *new_stmt
5982 	    = gimple_build_assign (new_temp,
5983 				   build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5984 					   vop1,
5985 					   TYPE_SIZE (TREE_TYPE (new_temp)),
5986 					   bitsize_zero_node));
5987 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5988 	  vop1 = new_temp;
5989 	}
5990       else
5991 	vop1 = vec_oprnds1[i];
5992       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5993       new_temp = make_ssa_name (vec_dest, new_stmt);
5994       gimple_assign_set_lhs (new_stmt, new_temp);
5995       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5996       if (slp_node)
5997 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5998       else
5999 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6000     }
6001 
6002   if (!slp_node)
6003     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6004 
6005   vec_oprnds0.release ();
6006   vec_oprnds1.release ();
6007 
6008   return true;
6009 }
6010 
6011 
6012 /* Function vectorizable_operation.
6013 
6014    Check if STMT_INFO performs a binary, unary or ternary operation that can
6015    be vectorized.
6016    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6017    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6018    Return true if STMT_INFO is vectorizable in this way.  */
6019 
6020 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)6021 vectorizable_operation (vec_info *vinfo,
6022 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6023 			gimple **vec_stmt, slp_tree slp_node,
6024 			stmt_vector_for_cost *cost_vec)
6025 {
6026   tree vec_dest;
6027   tree scalar_dest;
6028   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6029   tree vectype;
6030   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6031   enum tree_code code, orig_code;
6032   machine_mode vec_mode;
6033   tree new_temp;
6034   int op_type;
6035   optab optab;
6036   bool target_support_p;
6037   enum vect_def_type dt[3]
6038     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6039   int ndts = 3;
6040   poly_uint64 nunits_in;
6041   poly_uint64 nunits_out;
6042   tree vectype_out;
6043   int ncopies, vec_num;
6044   int i;
6045   vec<tree> vec_oprnds0 = vNULL;
6046   vec<tree> vec_oprnds1 = vNULL;
6047   vec<tree> vec_oprnds2 = vNULL;
6048   tree vop0, vop1, vop2;
6049   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6050 
6051   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6052     return false;
6053 
6054   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6055       && ! vec_stmt)
6056     return false;
6057 
6058   /* Is STMT a vectorizable binary/unary operation?   */
6059   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6060   if (!stmt)
6061     return false;
6062 
6063   /* Loads and stores are handled in vectorizable_{load,store}.  */
6064   if (STMT_VINFO_DATA_REF (stmt_info))
6065     return false;
6066 
6067   orig_code = code = gimple_assign_rhs_code (stmt);
6068 
6069   /* Shifts are handled in vectorizable_shift.  */
6070   if (code == LSHIFT_EXPR
6071       || code == RSHIFT_EXPR
6072       || code == LROTATE_EXPR
6073       || code == RROTATE_EXPR)
6074    return false;
6075 
6076   /* Comparisons are handled in vectorizable_comparison.  */
6077   if (TREE_CODE_CLASS (code) == tcc_comparison)
6078     return false;
6079 
6080   /* Conditions are handled in vectorizable_condition.  */
6081   if (code == COND_EXPR)
6082     return false;
6083 
6084   /* For pointer addition and subtraction, we should use the normal
6085      plus and minus for the vector operation.  */
6086   if (code == POINTER_PLUS_EXPR)
6087     code = PLUS_EXPR;
6088   if (code == POINTER_DIFF_EXPR)
6089     code = MINUS_EXPR;
6090 
6091   /* Support only unary or binary operations.  */
6092   op_type = TREE_CODE_LENGTH (code);
6093   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6094     {
6095       if (dump_enabled_p ())
6096         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6097                          "num. args = %d (not unary/binary/ternary op).\n",
6098                          op_type);
6099       return false;
6100     }
6101 
6102   scalar_dest = gimple_assign_lhs (stmt);
6103   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6104 
6105   /* Most operations cannot handle bit-precision types without extra
6106      truncations.  */
6107   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6108   if (!mask_op_p
6109       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6110       /* Exception are bitwise binary operations.  */
6111       && code != BIT_IOR_EXPR
6112       && code != BIT_XOR_EXPR
6113       && code != BIT_AND_EXPR)
6114     {
6115       if (dump_enabled_p ())
6116         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117                          "bit-precision arithmetic not supported.\n");
6118       return false;
6119     }
6120 
6121   slp_tree slp_op0;
6122   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6123 			   0, &op0, &slp_op0, &dt[0], &vectype))
6124     {
6125       if (dump_enabled_p ())
6126         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127                          "use not simple.\n");
6128       return false;
6129     }
6130   bool is_invariant = (dt[0] == vect_external_def
6131 		       || dt[0] == vect_constant_def);
6132   /* If op0 is an external or constant def, infer the vector type
6133      from the scalar type.  */
6134   if (!vectype)
6135     {
6136       /* For boolean type we cannot determine vectype by
6137 	 invariant value (don't know whether it is a vector
6138 	 of booleans or vector of integers).  We use output
6139 	 vectype because operations on boolean don't change
6140 	 type.  */
6141       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6142 	{
6143 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6144 	    {
6145 	      if (dump_enabled_p ())
6146 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147 				 "not supported operation on bool value.\n");
6148 	      return false;
6149 	    }
6150 	  vectype = vectype_out;
6151 	}
6152       else
6153 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6154 					       slp_node);
6155     }
6156   if (vec_stmt)
6157     gcc_assert (vectype);
6158   if (!vectype)
6159     {
6160       if (dump_enabled_p ())
6161 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6162 			 "no vectype for scalar type %T\n",
6163 			 TREE_TYPE (op0));
6164 
6165       return false;
6166     }
6167 
6168   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6169   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6170   if (maybe_ne (nunits_out, nunits_in))
6171     return false;
6172 
6173   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6174   slp_tree slp_op1 = NULL, slp_op2 = NULL;
6175   if (op_type == binary_op || op_type == ternary_op)
6176     {
6177       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6178 			       1, &op1, &slp_op1, &dt[1], &vectype2))
6179 	{
6180 	  if (dump_enabled_p ())
6181 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6182                              "use not simple.\n");
6183 	  return false;
6184 	}
6185       is_invariant &= (dt[1] == vect_external_def
6186 		       || dt[1] == vect_constant_def);
6187       if (vectype2
6188 	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6189 	return false;
6190     }
6191   if (op_type == ternary_op)
6192     {
6193       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6194 			       2, &op2, &slp_op2, &dt[2], &vectype3))
6195 	{
6196 	  if (dump_enabled_p ())
6197 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6198                              "use not simple.\n");
6199 	  return false;
6200 	}
6201       is_invariant &= (dt[2] == vect_external_def
6202 		       || dt[2] == vect_constant_def);
6203       if (vectype3
6204 	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6205 	return false;
6206     }
6207 
6208   /* Multiple types in SLP are handled by creating the appropriate number of
6209      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6210      case of SLP.  */
6211   if (slp_node)
6212     {
6213       ncopies = 1;
6214       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6215     }
6216   else
6217     {
6218       ncopies = vect_get_num_copies (loop_vinfo, vectype);
6219       vec_num = 1;
6220     }
6221 
6222   gcc_assert (ncopies >= 1);
6223 
6224   /* Reject attempts to combine mask types with nonmask types, e.g. if
6225      we have an AND between a (nonmask) boolean loaded from memory and
6226      a (mask) boolean result of a comparison.
6227 
6228      TODO: We could easily fix these cases up using pattern statements.  */
6229   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6230       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6231       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6232     {
6233       if (dump_enabled_p ())
6234 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6235 			 "mixed mask and nonmask vector types\n");
6236       return false;
6237     }
6238 
6239   /* Supportable by target?  */
6240 
6241   vec_mode = TYPE_MODE (vectype);
6242   if (code == MULT_HIGHPART_EXPR)
6243     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6244   else
6245     {
6246       optab = optab_for_tree_code (code, vectype, optab_default);
6247       if (!optab)
6248 	{
6249           if (dump_enabled_p ())
6250             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251                              "no optab.\n");
6252 	  return false;
6253 	}
6254       target_support_p = (optab_handler (optab, vec_mode)
6255 			  != CODE_FOR_nothing);
6256     }
6257 
6258   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6259   if (!target_support_p)
6260     {
6261       if (dump_enabled_p ())
6262 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6263                          "op not supported by target.\n");
6264       /* Check only during analysis.  */
6265       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6266 	  || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6267         return false;
6268       if (dump_enabled_p ())
6269 	dump_printf_loc (MSG_NOTE, vect_location,
6270                          "proceeding using word mode.\n");
6271       using_emulated_vectors_p = true;
6272     }
6273 
6274   if (using_emulated_vectors_p
6275       && !vect_can_vectorize_without_simd_p (code))
6276     {
6277       if (dump_enabled_p ())
6278 	dump_printf (MSG_NOTE, "using word mode not possible.\n");
6279       return false;
6280     }
6281 
6282   /* ???  We should instead expand the operations here, instead of
6283      relying on vector lowering which has this hard cap on the number
6284      of vector elements below it performs elementwise operations.  */
6285   if (using_emulated_vectors_p
6286       && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6287       && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
6288 	  || maybe_lt (nunits_out, 4U)))
6289     {
6290       if (dump_enabled_p ())
6291 	dump_printf (MSG_NOTE, "not using word mode for +- and less than "
6292 		     "four vector elements\n");
6293       return false;
6294     }
6295 
6296   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6297   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6298   internal_fn cond_fn = get_conditional_internal_fn (code);
6299 
6300   /* If operating on inactive elements could generate spurious traps,
6301      we need to restrict the operation to active lanes.  Note that this
6302      specifically doesn't apply to unhoisted invariants, since they
6303      operate on the same value for every lane.
6304 
6305      Similarly, if this operation is part of a reduction, a fully-masked
6306      loop should only change the active lanes of the reduction chain,
6307      keeping the inactive lanes as-is.  */
6308   bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6309 			    || reduc_idx >= 0);
6310 
6311   if (!vec_stmt) /* transformation not required.  */
6312     {
6313       if (loop_vinfo
6314 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6315 	  && mask_out_inactive)
6316 	{
6317 	  if (cond_fn == IFN_LAST
6318 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
6319 						  OPTIMIZE_FOR_SPEED))
6320 	    {
6321 	      if (dump_enabled_p ())
6322 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6323 				 "can't use a fully-masked loop because no"
6324 				 " conditional operation is available.\n");
6325 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6326 	    }
6327 	  else
6328 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6329 				   vectype, NULL);
6330 	}
6331 
6332       /* Put types on constant and invariant SLP children.  */
6333       if (slp_node
6334 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6335 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6336 	      || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6337 	{
6338 	  if (dump_enabled_p ())
6339 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6340 			     "incompatible vector types for invariants\n");
6341 	  return false;
6342 	}
6343 
6344       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6345       DUMP_VECT_SCOPE ("vectorizable_operation");
6346       vect_model_simple_cost (vinfo, stmt_info,
6347 			      ncopies, dt, ndts, slp_node, cost_vec);
6348       if (using_emulated_vectors_p)
6349 	{
6350 	  /* The above vect_model_simple_cost call handles constants
6351 	     in the prologue and (mis-)costs one of the stmts as
6352 	     vector stmt.  See tree-vect-generic.cc:do_plus_minus/do_negate
6353 	     for the actual lowering that will be applied.  */
6354 	  unsigned n
6355 	    = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6356 	  switch (code)
6357 	    {
6358 	    case PLUS_EXPR:
6359 	      n *= 5;
6360 	      break;
6361 	    case MINUS_EXPR:
6362 	      n *= 6;
6363 	      break;
6364 	    case NEGATE_EXPR:
6365 	      n *= 4;
6366 	      break;
6367 	    default:;
6368 	    }
6369 	  record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6370 	}
6371       return true;
6372     }
6373 
6374   /* Transform.  */
6375 
6376   if (dump_enabled_p ())
6377     dump_printf_loc (MSG_NOTE, vect_location,
6378                      "transform binary/unary operation.\n");
6379 
6380   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6381 
6382   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6383      vectors with unsigned elements, but the result is signed.  So, we
6384      need to compute the MINUS_EXPR into vectype temporary and
6385      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6386   tree vec_cvt_dest = NULL_TREE;
6387   if (orig_code == POINTER_DIFF_EXPR)
6388     {
6389       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6390       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6391     }
6392   /* Handle def.  */
6393   else
6394     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6395 
6396   /* In case the vectorization factor (VF) is bigger than the number
6397      of elements that we can fit in a vectype (nunits), we have to generate
6398      more than one vector stmt - i.e - we need to "unroll" the
6399      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6400      from one copy of the vector stmt to the next, in the field
6401      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6402      stages to find the correct vector defs to be used when vectorizing
6403      stmts that use the defs of the current stmt.  The example below
6404      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6405      we need to create 4 vectorized stmts):
6406 
6407      before vectorization:
6408                                 RELATED_STMT    VEC_STMT
6409         S1:     x = memref      -               -
6410         S2:     z = x + 1       -               -
6411 
6412      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6413              there):
6414                                 RELATED_STMT    VEC_STMT
6415         VS1_0:  vx0 = memref0   VS1_1           -
6416         VS1_1:  vx1 = memref1   VS1_2           -
6417         VS1_2:  vx2 = memref2   VS1_3           -
6418         VS1_3:  vx3 = memref3   -               -
6419         S1:     x = load        -               VS1_0
6420         S2:     z = x + 1       -               -
6421 
6422      step2: vectorize stmt S2 (done here):
6423         To vectorize stmt S2 we first need to find the relevant vector
6424         def for the first operand 'x'.  This is, as usual, obtained from
6425         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6426         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6427         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6428         the vector stmt VS2_0, and as usual, record it in the
6429         STMT_VINFO_VEC_STMT of stmt S2.
6430         When creating the second copy (VS2_1), we obtain the relevant vector
6431         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6432         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6433         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6434         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6435         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6436         chain of stmts and pointers:
6437                                 RELATED_STMT    VEC_STMT
6438         VS1_0:  vx0 = memref0   VS1_1           -
6439         VS1_1:  vx1 = memref1   VS1_2           -
6440         VS1_2:  vx2 = memref2   VS1_3           -
6441         VS1_3:  vx3 = memref3   -               -
6442         S1:     x = load        -               VS1_0
6443         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6444         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6445         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6446         VS2_3:  vz3 = vx3 + v1  -               -
6447         S2:     z = x + 1       -               VS2_0  */
6448 
6449   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6450 		     op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6451   /* Arguments are ready.  Create the new vector stmt.  */
6452   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6453     {
6454       gimple *new_stmt = NULL;
6455       vop1 = ((op_type == binary_op || op_type == ternary_op)
6456 	      ? vec_oprnds1[i] : NULL_TREE);
6457       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6458       if (masked_loop_p && mask_out_inactive)
6459 	{
6460 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6461 					  vectype, i);
6462 	  auto_vec<tree> vops (5);
6463 	  vops.quick_push (mask);
6464 	  vops.quick_push (vop0);
6465 	  if (vop1)
6466 	    vops.quick_push (vop1);
6467 	  if (vop2)
6468 	    vops.quick_push (vop2);
6469 	  if (reduc_idx >= 0)
6470 	    {
6471 	      /* Perform the operation on active elements only and take
6472 		 inactive elements from the reduction chain input.  */
6473 	      gcc_assert (!vop2);
6474 	      vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6475 	    }
6476 	  else
6477 	    {
6478 	      auto else_value = targetm.preferred_else_value
6479 		(cond_fn, vectype, vops.length () - 1, &vops[1]);
6480 	      vops.quick_push (else_value);
6481 	    }
6482 	  gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6483 	  new_temp = make_ssa_name (vec_dest, call);
6484 	  gimple_call_set_lhs (call, new_temp);
6485 	  gimple_call_set_nothrow (call, true);
6486 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6487 	  new_stmt = call;
6488 	}
6489       else
6490 	{
6491 	  tree mask = NULL_TREE;
6492 	  /* When combining two masks check if either of them is elsewhere
6493 	     combined with a loop mask, if that's the case we can mark that the
6494 	     new combined mask doesn't need to be combined with a loop mask.  */
6495 	  if (masked_loop_p
6496 	      && code == BIT_AND_EXPR
6497 	      && VECTOR_BOOLEAN_TYPE_P (vectype))
6498 	    {
6499 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6500 								 ncopies}))
6501 		{
6502 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6503 					     vectype, i);
6504 
6505 		  vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6506 					   vop0, gsi);
6507 		}
6508 
6509 	      if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6510 								 ncopies }))
6511 		{
6512 		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6513 					     vectype, i);
6514 
6515 		  vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6516 					   vop1, gsi);
6517 		}
6518 	    }
6519 
6520 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6521 	  new_temp = make_ssa_name (vec_dest, new_stmt);
6522 	  gimple_assign_set_lhs (new_stmt, new_temp);
6523 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6524 	  if (using_emulated_vectors_p)
6525 	    suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6526 
6527 	  /* Enter the combined value into the vector cond hash so we don't
6528 	     AND it with a loop mask again.  */
6529 	  if (mask)
6530 	    loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6531 
6532 	  if (vec_cvt_dest)
6533 	    {
6534 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6535 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6536 					      new_temp);
6537 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6538 	      gimple_assign_set_lhs (new_stmt, new_temp);
6539 	      vect_finish_stmt_generation (vinfo, stmt_info,
6540 					   new_stmt, gsi);
6541 	    }
6542 	}
6543       if (slp_node)
6544 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6545       else
6546 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6547     }
6548 
6549   if (!slp_node)
6550     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6551 
6552   vec_oprnds0.release ();
6553   vec_oprnds1.release ();
6554   vec_oprnds2.release ();
6555 
6556   return true;
6557 }
6558 
6559 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6560 
6561 static void
ensure_base_align(dr_vec_info * dr_info)6562 ensure_base_align (dr_vec_info *dr_info)
6563 {
6564   /* Alignment is only analyzed for the first element of a DR group,
6565      use that to look at base alignment we need to enforce.  */
6566   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6567     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6568 
6569   gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6570 
6571   if (dr_info->base_misaligned)
6572     {
6573       tree base_decl = dr_info->base_decl;
6574 
6575       // We should only be able to increase the alignment of a base object if
6576       // we know what its new alignment should be at compile time.
6577       unsigned HOST_WIDE_INT align_base_to =
6578 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6579 
6580       if (decl_in_symtab_p (base_decl))
6581 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6582       else if (DECL_ALIGN (base_decl) < align_base_to)
6583 	{
6584 	  SET_DECL_ALIGN (base_decl, align_base_to);
6585           DECL_USER_ALIGN (base_decl) = 1;
6586 	}
6587       dr_info->base_misaligned = false;
6588     }
6589 }
6590 
6591 
6592 /* Function get_group_alias_ptr_type.
6593 
6594    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6595 
6596 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6597 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6598 {
6599   struct data_reference *first_dr, *next_dr;
6600 
6601   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6602   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6603   while (next_stmt_info)
6604     {
6605       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6606       if (get_alias_set (DR_REF (first_dr))
6607 	  != get_alias_set (DR_REF (next_dr)))
6608 	{
6609 	  if (dump_enabled_p ())
6610 	    dump_printf_loc (MSG_NOTE, vect_location,
6611 			     "conflicting alias set types.\n");
6612 	  return ptr_type_node;
6613 	}
6614       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6615     }
6616   return reference_alias_ptr_type (DR_REF (first_dr));
6617 }
6618 
6619 
6620 /* Function scan_operand_equal_p.
6621 
6622    Helper function for check_scan_store.  Compare two references
6623    with .GOMP_SIMD_LANE bases.  */
6624 
6625 static bool
scan_operand_equal_p(tree ref1,tree ref2)6626 scan_operand_equal_p (tree ref1, tree ref2)
6627 {
6628   tree ref[2] = { ref1, ref2 };
6629   poly_int64 bitsize[2], bitpos[2];
6630   tree offset[2], base[2];
6631   for (int i = 0; i < 2; ++i)
6632     {
6633       machine_mode mode;
6634       int unsignedp, reversep, volatilep = 0;
6635       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6636       				     &offset[i], &mode, &unsignedp,
6637       				     &reversep, &volatilep);
6638       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6639 	return false;
6640       if (TREE_CODE (base[i]) == MEM_REF
6641 	  && offset[i] == NULL_TREE
6642 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6643 	{
6644 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6645 	  if (is_gimple_assign (def_stmt)
6646 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6647 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6648 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6649 	    {
6650 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
6651 		return false;
6652 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6653 	      offset[i] = gimple_assign_rhs2 (def_stmt);
6654 	    }
6655 	}
6656     }
6657 
6658   if (!operand_equal_p (base[0], base[1], 0))
6659     return false;
6660   if (maybe_ne (bitsize[0], bitsize[1]))
6661     return false;
6662   if (offset[0] != offset[1])
6663     {
6664       if (!offset[0] || !offset[1])
6665 	return false;
6666       if (!operand_equal_p (offset[0], offset[1], 0))
6667 	{
6668 	  tree step[2];
6669 	  for (int i = 0; i < 2; ++i)
6670 	    {
6671 	      step[i] = integer_one_node;
6672 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6673 		{
6674 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6675 		  if (is_gimple_assign (def_stmt)
6676 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6677 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6678 			  == INTEGER_CST))
6679 		    {
6680 		      step[i] = gimple_assign_rhs2 (def_stmt);
6681 		      offset[i] = gimple_assign_rhs1 (def_stmt);
6682 		    }
6683 		}
6684 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
6685 		{
6686 		  step[i] = TREE_OPERAND (offset[i], 1);
6687 		  offset[i] = TREE_OPERAND (offset[i], 0);
6688 		}
6689 	      tree rhs1 = NULL_TREE;
6690 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6691 		{
6692 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6693 		  if (gimple_assign_cast_p (def_stmt))
6694 		    rhs1 = gimple_assign_rhs1 (def_stmt);
6695 		}
6696 	      else if (CONVERT_EXPR_P (offset[i]))
6697 		rhs1 = TREE_OPERAND (offset[i], 0);
6698 	      if (rhs1
6699 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6700 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6701 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6702 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6703 		offset[i] = rhs1;
6704 	    }
6705 	  if (!operand_equal_p (offset[0], offset[1], 0)
6706 	      || !operand_equal_p (step[0], step[1], 0))
6707 	    return false;
6708 	}
6709     }
6710   return true;
6711 }
6712 
6713 
6714 enum scan_store_kind {
6715   /* Normal permutation.  */
6716   scan_store_kind_perm,
6717 
6718   /* Whole vector left shift permutation with zero init.  */
6719   scan_store_kind_lshift_zero,
6720 
6721   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6722   scan_store_kind_lshift_cond
6723 };
6724 
6725 /* Function check_scan_store.
6726 
6727    Verify if we can perform the needed permutations or whole vector shifts.
6728    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6729    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6730    to do at each step.  */
6731 
6732 static int
scan_store_can_perm_p(tree vectype,tree init,vec<enum scan_store_kind> * use_whole_vector=NULL)6733 scan_store_can_perm_p (tree vectype, tree init,
6734 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
6735 {
6736   enum machine_mode vec_mode = TYPE_MODE (vectype);
6737   unsigned HOST_WIDE_INT nunits;
6738   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6739     return -1;
6740   int units_log2 = exact_log2 (nunits);
6741   if (units_log2 <= 0)
6742     return -1;
6743 
6744   int i;
6745   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6746   for (i = 0; i <= units_log2; ++i)
6747     {
6748       unsigned HOST_WIDE_INT j, k;
6749       enum scan_store_kind kind = scan_store_kind_perm;
6750       vec_perm_builder sel (nunits, nunits, 1);
6751       sel.quick_grow (nunits);
6752       if (i == units_log2)
6753 	{
6754 	  for (j = 0; j < nunits; ++j)
6755 	    sel[j] = nunits - 1;
6756 	}
6757       else
6758 	{
6759 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6760 	    sel[j] = j;
6761 	  for (k = 0; j < nunits; ++j, ++k)
6762 	    sel[j] = nunits + k;
6763 	}
6764       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6765       if (!can_vec_perm_const_p (vec_mode, indices))
6766 	{
6767 	  if (i == units_log2)
6768 	    return -1;
6769 
6770 	  if (whole_vector_shift_kind == scan_store_kind_perm)
6771 	    {
6772 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6773 		return -1;
6774 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
6775 	      /* Whole vector shifts shift in zeros, so if init is all zero
6776 		 constant, there is no need to do anything further.  */
6777 	      if ((TREE_CODE (init) != INTEGER_CST
6778 		   && TREE_CODE (init) != REAL_CST)
6779 		  || !initializer_zerop (init))
6780 		{
6781 		  tree masktype = truth_type_for (vectype);
6782 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6783 		    return -1;
6784 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
6785 		}
6786 	    }
6787 	  kind = whole_vector_shift_kind;
6788 	}
6789       if (use_whole_vector)
6790 	{
6791 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6792 	    use_whole_vector->safe_grow_cleared (i, true);
6793 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6794 	    use_whole_vector->safe_push (kind);
6795 	}
6796     }
6797 
6798   return units_log2;
6799 }
6800 
6801 
6802 /* Function check_scan_store.
6803 
6804    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6805 
6806 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6807 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6808 		  enum vect_def_type rhs_dt, bool slp, tree mask,
6809 		  vect_memory_access_type memory_access_type)
6810 {
6811   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6812   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6813   tree ref_type;
6814 
6815   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6816   if (slp
6817       || mask
6818       || memory_access_type != VMAT_CONTIGUOUS
6819       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6820       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6821       || loop_vinfo == NULL
6822       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6823       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6824       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6825       || !integer_zerop (DR_INIT (dr_info->dr))
6826       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6827       || !alias_sets_conflict_p (get_alias_set (vectype),
6828 				 get_alias_set (TREE_TYPE (ref_type))))
6829     {
6830       if (dump_enabled_p ())
6831 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6832 			 "unsupported OpenMP scan store.\n");
6833       return false;
6834     }
6835 
6836   /* We need to pattern match code built by OpenMP lowering and simplified
6837      by following optimizations into something we can handle.
6838      #pragma omp simd reduction(inscan,+:r)
6839      for (...)
6840        {
6841 	 r += something ();
6842 	 #pragma omp scan inclusive (r)
6843 	 use (r);
6844        }
6845      shall have body with:
6846        // Initialization for input phase, store the reduction initializer:
6847        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6848        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6849        D.2042[_21] = 0;
6850        // Actual input phase:
6851        ...
6852        r.0_5 = D.2042[_20];
6853        _6 = _4 + r.0_5;
6854        D.2042[_20] = _6;
6855        // Initialization for scan phase:
6856        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6857        _26 = D.2043[_25];
6858        _27 = D.2042[_25];
6859        _28 = _26 + _27;
6860        D.2043[_25] = _28;
6861        D.2042[_25] = _28;
6862        // Actual scan phase:
6863        ...
6864        r.1_8 = D.2042[_20];
6865        ...
6866      The "omp simd array" variable D.2042 holds the privatized copy used
6867      inside of the loop and D.2043 is another one that holds copies of
6868      the current original list item.  The separate GOMP_SIMD_LANE ifn
6869      kinds are there in order to allow optimizing the initializer store
6870      and combiner sequence, e.g. if it is originally some C++ish user
6871      defined reduction, but allow the vectorizer to pattern recognize it
6872      and turn into the appropriate vectorized scan.
6873 
6874      For exclusive scan, this is slightly different:
6875      #pragma omp simd reduction(inscan,+:r)
6876      for (...)
6877        {
6878 	 use (r);
6879 	 #pragma omp scan exclusive (r)
6880 	 r += something ();
6881        }
6882      shall have body with:
6883        // Initialization for input phase, store the reduction initializer:
6884        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6885        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6886        D.2042[_21] = 0;
6887        // Actual input phase:
6888        ...
6889        r.0_5 = D.2042[_20];
6890        _6 = _4 + r.0_5;
6891        D.2042[_20] = _6;
6892        // Initialization for scan phase:
6893        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6894        _26 = D.2043[_25];
6895        D.2044[_25] = _26;
6896        _27 = D.2042[_25];
6897        _28 = _26 + _27;
6898        D.2043[_25] = _28;
6899        // Actual scan phase:
6900        ...
6901        r.1_8 = D.2044[_20];
6902        ...  */
6903 
6904   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6905     {
6906       /* Match the D.2042[_21] = 0; store above.  Just require that
6907 	 it is a constant or external definition store.  */
6908       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6909 	{
6910 	 fail_init:
6911 	  if (dump_enabled_p ())
6912 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6913 			     "unsupported OpenMP scan initializer store.\n");
6914 	  return false;
6915 	}
6916 
6917       if (! loop_vinfo->scan_map)
6918 	loop_vinfo->scan_map = new hash_map<tree, tree>;
6919       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6920       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6921       if (cached)
6922 	goto fail_init;
6923       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6924 
6925       /* These stores can be vectorized normally.  */
6926       return true;
6927     }
6928 
6929   if (rhs_dt != vect_internal_def)
6930     {
6931      fail:
6932       if (dump_enabled_p ())
6933 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6934 			 "unsupported OpenMP scan combiner pattern.\n");
6935       return false;
6936     }
6937 
6938   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6939   tree rhs = gimple_assign_rhs1 (stmt);
6940   if (TREE_CODE (rhs) != SSA_NAME)
6941     goto fail;
6942 
6943   gimple *other_store_stmt = NULL;
6944   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6945   bool inscan_var_store
6946     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6947 
6948   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6949     {
6950       if (!inscan_var_store)
6951 	{
6952 	  use_operand_p use_p;
6953 	  imm_use_iterator iter;
6954 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6955 	    {
6956 	      gimple *use_stmt = USE_STMT (use_p);
6957 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
6958 		continue;
6959 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
6960 		  || !is_gimple_assign (use_stmt)
6961 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6962 		  || other_store_stmt
6963 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6964 		goto fail;
6965 	      other_store_stmt = use_stmt;
6966 	    }
6967 	  if (other_store_stmt == NULL)
6968 	    goto fail;
6969 	  rhs = gimple_assign_lhs (other_store_stmt);
6970 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6971 	    goto fail;
6972 	}
6973     }
6974   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6975     {
6976       use_operand_p use_p;
6977       imm_use_iterator iter;
6978       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6979 	{
6980 	  gimple *use_stmt = USE_STMT (use_p);
6981 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6982 	    continue;
6983 	  if (other_store_stmt)
6984 	    goto fail;
6985 	  other_store_stmt = use_stmt;
6986 	}
6987     }
6988   else
6989     goto fail;
6990 
6991   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6992   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6993       || !is_gimple_assign (def_stmt)
6994       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6995     goto fail;
6996 
6997   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6998   /* For pointer addition, we should use the normal plus for the vector
6999      operation.  */
7000   switch (code)
7001     {
7002     case POINTER_PLUS_EXPR:
7003       code = PLUS_EXPR;
7004       break;
7005     case MULT_HIGHPART_EXPR:
7006       goto fail;
7007     default:
7008       break;
7009     }
7010   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7011     goto fail;
7012 
7013   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7014   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7015   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7016     goto fail;
7017 
7018   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7019   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7020   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7021       || !gimple_assign_load_p (load1_stmt)
7022       || gimple_bb (load2_stmt) != gimple_bb (stmt)
7023       || !gimple_assign_load_p (load2_stmt))
7024     goto fail;
7025 
7026   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7027   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7028   if (load1_stmt_info == NULL
7029       || load2_stmt_info == NULL
7030       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7031 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7032       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7033 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7034     goto fail;
7035 
7036   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7037     {
7038       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7039       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7040 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7041 	goto fail;
7042       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7043       tree lrhs;
7044       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7045 	lrhs = rhs1;
7046       else
7047 	lrhs = rhs2;
7048       use_operand_p use_p;
7049       imm_use_iterator iter;
7050       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7051 	{
7052 	  gimple *use_stmt = USE_STMT (use_p);
7053 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7054 	    continue;
7055 	  if (other_store_stmt)
7056 	    goto fail;
7057 	  other_store_stmt = use_stmt;
7058 	}
7059     }
7060 
7061   if (other_store_stmt == NULL)
7062     goto fail;
7063   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7064       || !gimple_store_p (other_store_stmt))
7065     goto fail;
7066 
7067   stmt_vec_info other_store_stmt_info
7068     = loop_vinfo->lookup_stmt (other_store_stmt);
7069   if (other_store_stmt_info == NULL
7070       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7071 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7072     goto fail;
7073 
7074   gimple *stmt1 = stmt;
7075   gimple *stmt2 = other_store_stmt;
7076   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7077     std::swap (stmt1, stmt2);
7078   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7079 			    gimple_assign_rhs1 (load2_stmt)))
7080     {
7081       std::swap (rhs1, rhs2);
7082       std::swap (load1_stmt, load2_stmt);
7083       std::swap (load1_stmt_info, load2_stmt_info);
7084     }
7085   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7086 			     gimple_assign_rhs1 (load1_stmt)))
7087     goto fail;
7088 
7089   tree var3 = NULL_TREE;
7090   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7091       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7092 				gimple_assign_rhs1 (load2_stmt)))
7093     goto fail;
7094   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7095     {
7096       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7097       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7098 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7099 	goto fail;
7100       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7101       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7102 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7103 	  || lookup_attribute ("omp simd inscan exclusive",
7104 			       DECL_ATTRIBUTES (var3)))
7105 	goto fail;
7106     }
7107 
7108   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7109   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7110       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7111     goto fail;
7112 
7113   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7114   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7115   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7116       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7117       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7118 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7119     goto fail;
7120 
7121   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7122     std::swap (var1, var2);
7123 
7124   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7125     {
7126       if (!lookup_attribute ("omp simd inscan exclusive",
7127 			     DECL_ATTRIBUTES (var1)))
7128 	goto fail;
7129       var1 = var3;
7130     }
7131 
7132   if (loop_vinfo->scan_map == NULL)
7133     goto fail;
7134   tree *init = loop_vinfo->scan_map->get (var1);
7135   if (init == NULL)
7136     goto fail;
7137 
7138   /* The IL is as expected, now check if we can actually vectorize it.
7139      Inclusive scan:
7140        _26 = D.2043[_25];
7141        _27 = D.2042[_25];
7142        _28 = _26 + _27;
7143        D.2043[_25] = _28;
7144        D.2042[_25] = _28;
7145      should be vectorized as (where _40 is the vectorized rhs
7146      from the D.2042[_21] = 0; store):
7147        _30 = MEM <vector(8) int> [(int *)&D.2043];
7148        _31 = MEM <vector(8) int> [(int *)&D.2042];
7149        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7150        _33 = _31 + _32;
7151        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7152        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7153        _35 = _33 + _34;
7154        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7155        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
7156        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7157        _37 = _35 + _36;
7158        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7159        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
7160        _38 = _30 + _37;
7161        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7162        MEM <vector(8) int> [(int *)&D.2043] = _39;
7163        MEM <vector(8) int> [(int *)&D.2042] = _38;
7164      Exclusive scan:
7165        _26 = D.2043[_25];
7166        D.2044[_25] = _26;
7167        _27 = D.2042[_25];
7168        _28 = _26 + _27;
7169        D.2043[_25] = _28;
7170      should be vectorized as (where _40 is the vectorized rhs
7171      from the D.2042[_21] = 0; store):
7172        _30 = MEM <vector(8) int> [(int *)&D.2043];
7173        _31 = MEM <vector(8) int> [(int *)&D.2042];
7174        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7175        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7176        _34 = _32 + _33;
7177        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7178        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
7179        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7180        _36 = _34 + _35;
7181        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7182        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
7183        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7184        _38 = _36 + _37;
7185        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7186        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
7187        _39 = _30 + _38;
7188        _50 = _31 + _39;
7189        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7190        MEM <vector(8) int> [(int *)&D.2044] = _39;
7191        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
7192   enum machine_mode vec_mode = TYPE_MODE (vectype);
7193   optab optab = optab_for_tree_code (code, vectype, optab_default);
7194   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7195     goto fail;
7196 
7197   int units_log2 = scan_store_can_perm_p (vectype, *init);
7198   if (units_log2 == -1)
7199     goto fail;
7200 
7201   return true;
7202 }
7203 
7204 
7205 /* Function vectorizable_scan_store.
7206 
7207    Helper of vectorizable_score, arguments like on vectorizable_store.
7208    Handle only the transformation, checking is done in check_scan_store.  */
7209 
7210 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)7211 vectorizable_scan_store (vec_info *vinfo,
7212 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7213 			 gimple **vec_stmt, int ncopies)
7214 {
7215   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7216   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7217   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7218   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7219 
7220   if (dump_enabled_p ())
7221     dump_printf_loc (MSG_NOTE, vect_location,
7222 		     "transform scan store. ncopies = %d\n", ncopies);
7223 
7224   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7225   tree rhs = gimple_assign_rhs1 (stmt);
7226   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7227 
7228   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7229   bool inscan_var_store
7230     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7231 
7232   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7233     {
7234       use_operand_p use_p;
7235       imm_use_iterator iter;
7236       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7237 	{
7238 	  gimple *use_stmt = USE_STMT (use_p);
7239 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
7240 	    continue;
7241 	  rhs = gimple_assign_lhs (use_stmt);
7242 	  break;
7243 	}
7244     }
7245 
7246   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7247   enum tree_code code = gimple_assign_rhs_code (def_stmt);
7248   if (code == POINTER_PLUS_EXPR)
7249     code = PLUS_EXPR;
7250   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7251 	      && commutative_tree_code (code));
7252   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7253   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7254   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7255   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7256   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7257   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7258   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7259   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7260   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7261   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7262   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7263 
7264   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7265     {
7266       std::swap (rhs1, rhs2);
7267       std::swap (var1, var2);
7268       std::swap (load1_dr_info, load2_dr_info);
7269     }
7270 
7271   tree *init = loop_vinfo->scan_map->get (var1);
7272   gcc_assert (init);
7273 
7274   unsigned HOST_WIDE_INT nunits;
7275   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7276     gcc_unreachable ();
7277   auto_vec<enum scan_store_kind, 16> use_whole_vector;
7278   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7279   gcc_assert (units_log2 > 0);
7280   auto_vec<tree, 16> perms;
7281   perms.quick_grow (units_log2 + 1);
7282   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7283   for (int i = 0; i <= units_log2; ++i)
7284     {
7285       unsigned HOST_WIDE_INT j, k;
7286       vec_perm_builder sel (nunits, nunits, 1);
7287       sel.quick_grow (nunits);
7288       if (i == units_log2)
7289 	for (j = 0; j < nunits; ++j)
7290 	  sel[j] = nunits - 1;
7291       else
7292 	{
7293 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7294 	    sel[j] = j;
7295 	  for (k = 0; j < nunits; ++j, ++k)
7296 	    sel[j] = nunits + k;
7297 	}
7298       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7299       if (!use_whole_vector.is_empty ()
7300 	  && use_whole_vector[i] != scan_store_kind_perm)
7301 	{
7302 	  if (zero_vec == NULL_TREE)
7303 	    zero_vec = build_zero_cst (vectype);
7304 	  if (masktype == NULL_TREE
7305 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
7306 	    masktype = truth_type_for (vectype);
7307 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
7308 	}
7309       else
7310 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7311     }
7312 
7313   tree vec_oprnd1 = NULL_TREE;
7314   tree vec_oprnd2 = NULL_TREE;
7315   tree vec_oprnd3 = NULL_TREE;
7316   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7317   tree dataref_offset = build_int_cst (ref_type, 0);
7318   tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7319 					   vectype, VMAT_CONTIGUOUS);
7320   tree ldataref_ptr = NULL_TREE;
7321   tree orig = NULL_TREE;
7322   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7323     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7324   auto_vec<tree> vec_oprnds1;
7325   auto_vec<tree> vec_oprnds2;
7326   auto_vec<tree> vec_oprnds3;
7327   vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7328 		     *init, &vec_oprnds1,
7329 		     ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7330 		     rhs2, &vec_oprnds3);
7331   for (int j = 0; j < ncopies; j++)
7332     {
7333       vec_oprnd1 = vec_oprnds1[j];
7334       if (ldataref_ptr == NULL)
7335 	vec_oprnd2 = vec_oprnds2[j];
7336       vec_oprnd3 = vec_oprnds3[j];
7337       if (j == 0)
7338 	orig = vec_oprnd3;
7339       else if (!inscan_var_store)
7340 	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7341 
7342       if (ldataref_ptr)
7343 	{
7344 	  vec_oprnd2 = make_ssa_name (vectype);
7345 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7346 				       unshare_expr (ldataref_ptr),
7347 				       dataref_offset);
7348 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7349 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7350 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7351 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7352 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7353 	}
7354 
7355       tree v = vec_oprnd2;
7356       for (int i = 0; i < units_log2; ++i)
7357 	{
7358 	  tree new_temp = make_ssa_name (vectype);
7359 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7360 					   (zero_vec
7361 					    && (use_whole_vector[i]
7362 						!= scan_store_kind_perm))
7363 					   ? zero_vec : vec_oprnd1, v,
7364 					   perms[i]);
7365 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7366 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7367 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7368 
7369 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7370 	    {
7371 	      /* Whole vector shift shifted in zero bits, but if *init
7372 		 is not initializer_zerop, we need to replace those elements
7373 		 with elements from vec_oprnd1.  */
7374 	      tree_vector_builder vb (masktype, nunits, 1);
7375 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7376 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7377 			       ? boolean_false_node : boolean_true_node);
7378 
7379 	      tree new_temp2 = make_ssa_name (vectype);
7380 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7381 				       new_temp, vec_oprnd1);
7382 	      vect_finish_stmt_generation (vinfo, stmt_info,
7383 							   g, gsi);
7384 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7385 	      new_temp = new_temp2;
7386 	    }
7387 
7388 	  /* For exclusive scan, perform the perms[i] permutation once
7389 	     more.  */
7390 	  if (i == 0
7391 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7392 	      && v == vec_oprnd2)
7393 	    {
7394 	      v = new_temp;
7395 	      --i;
7396 	      continue;
7397 	    }
7398 
7399 	  tree new_temp2 = make_ssa_name (vectype);
7400 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
7401 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7402 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7403 
7404 	  v = new_temp2;
7405 	}
7406 
7407       tree new_temp = make_ssa_name (vectype);
7408       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7409       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7410       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7411 
7412       tree last_perm_arg = new_temp;
7413       /* For exclusive scan, new_temp computed above is the exclusive scan
7414 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
7415 	 of the last element into orig.  */
7416       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7417 	{
7418 	  last_perm_arg = make_ssa_name (vectype);
7419 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7420 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7421 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7422 	}
7423 
7424       orig = make_ssa_name (vectype);
7425       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7426 			       last_perm_arg, perms[units_log2]);
7427       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7428       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7429 
7430       if (!inscan_var_store)
7431 	{
7432 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7433 				       unshare_expr (dataref_ptr),
7434 				       dataref_offset);
7435 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7436 	  g = gimple_build_assign (data_ref, new_temp);
7437 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7438 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7439 	}
7440     }
7441 
7442   if (inscan_var_store)
7443     for (int j = 0; j < ncopies; j++)
7444       {
7445 	if (j != 0)
7446 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7447 
7448 	tree data_ref = fold_build2 (MEM_REF, vectype,
7449 				     unshare_expr (dataref_ptr),
7450 				     dataref_offset);
7451 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7452 	gimple *g = gimple_build_assign (data_ref, orig);
7453 	vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7454 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7455       }
7456   return true;
7457 }
7458 
7459 
7460 /* Function vectorizable_store.
7461 
7462    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7463    that can be vectorized.
7464    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7465    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7466    Return true if STMT_INFO is vectorizable in this way.  */
7467 
7468 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7469 vectorizable_store (vec_info *vinfo,
7470 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7471 		    gimple **vec_stmt, slp_tree slp_node,
7472 		    stmt_vector_for_cost *cost_vec)
7473 {
7474   tree data_ref;
7475   tree op;
7476   tree vec_oprnd = NULL_TREE;
7477   tree elem_type;
7478   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7479   class loop *loop = NULL;
7480   machine_mode vec_mode;
7481   tree dummy;
7482   enum vect_def_type rhs_dt = vect_unknown_def_type;
7483   enum vect_def_type mask_dt = vect_unknown_def_type;
7484   tree dataref_ptr = NULL_TREE;
7485   tree dataref_offset = NULL_TREE;
7486   gimple *ptr_incr = NULL;
7487   int ncopies;
7488   int j;
7489   stmt_vec_info first_stmt_info;
7490   bool grouped_store;
7491   unsigned int group_size, i;
7492   vec<tree> oprnds = vNULL;
7493   vec<tree> result_chain = vNULL;
7494   vec<tree> vec_oprnds = vNULL;
7495   bool slp = (slp_node != NULL);
7496   unsigned int vec_num;
7497   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7498   tree aggr_type;
7499   gather_scatter_info gs_info;
7500   poly_uint64 vf;
7501   vec_load_store_type vls_type;
7502   tree ref_type;
7503 
7504   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7505     return false;
7506 
7507   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7508       && ! vec_stmt)
7509     return false;
7510 
7511   /* Is vectorizable store? */
7512 
7513   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7514   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7515     {
7516       tree scalar_dest = gimple_assign_lhs (assign);
7517       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7518 	  && is_pattern_stmt_p (stmt_info))
7519 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
7520       if (TREE_CODE (scalar_dest) != ARRAY_REF
7521 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7522 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
7523 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
7524 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7525 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
7526 	  && TREE_CODE (scalar_dest) != MEM_REF)
7527 	return false;
7528     }
7529   else
7530     {
7531       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7532       if (!call || !gimple_call_internal_p (call))
7533 	return false;
7534 
7535       internal_fn ifn = gimple_call_internal_fn (call);
7536       if (!internal_store_fn_p (ifn))
7537 	return false;
7538 
7539       if (slp_node != NULL)
7540 	{
7541 	  if (dump_enabled_p ())
7542 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543 			     "SLP of masked stores not supported.\n");
7544 	  return false;
7545 	}
7546 
7547       int mask_index = internal_fn_mask_index (ifn);
7548       if (mask_index >= 0
7549 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7550 				      &mask, NULL, &mask_dt, &mask_vectype))
7551 	return false;
7552     }
7553 
7554   op = vect_get_store_rhs (stmt_info);
7555 
7556   /* Cannot have hybrid store SLP -- that would mean storing to the
7557      same location twice.  */
7558   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7559 
7560   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7561   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7562 
7563   if (loop_vinfo)
7564     {
7565       loop = LOOP_VINFO_LOOP (loop_vinfo);
7566       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7567     }
7568   else
7569     vf = 1;
7570 
7571   /* Multiple types in SLP are handled by creating the appropriate number of
7572      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7573      case of SLP.  */
7574   if (slp)
7575     ncopies = 1;
7576   else
7577     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7578 
7579   gcc_assert (ncopies >= 1);
7580 
7581   /* FORNOW.  This restriction should be relaxed.  */
7582   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7583     {
7584       if (dump_enabled_p ())
7585 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7586 			 "multiple types in nested loop.\n");
7587       return false;
7588     }
7589 
7590   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7591 			     op, &rhs_dt, &rhs_vectype, &vls_type))
7592     return false;
7593 
7594   elem_type = TREE_TYPE (vectype);
7595   vec_mode = TYPE_MODE (vectype);
7596 
7597   if (!STMT_VINFO_DATA_REF (stmt_info))
7598     return false;
7599 
7600   vect_memory_access_type memory_access_type;
7601   enum dr_alignment_support alignment_support_scheme;
7602   int misalignment;
7603   poly_int64 poffset;
7604   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7605 			    ncopies, &memory_access_type, &poffset,
7606 			    &alignment_support_scheme, &misalignment, &gs_info))
7607     return false;
7608 
7609   if (mask)
7610     {
7611       if (memory_access_type == VMAT_CONTIGUOUS)
7612 	{
7613 	  if (!VECTOR_MODE_P (vec_mode)
7614 	      || !can_vec_mask_load_store_p (vec_mode,
7615 					     TYPE_MODE (mask_vectype), false))
7616 	    return false;
7617 	}
7618       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7619 	       && (memory_access_type != VMAT_GATHER_SCATTER
7620 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7621 	{
7622 	  if (dump_enabled_p ())
7623 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7624 			     "unsupported access type for masked store.\n");
7625 	  return false;
7626 	}
7627     }
7628   else
7629     {
7630       /* FORNOW. In some cases can vectorize even if data-type not supported
7631 	 (e.g. - array initialization with 0).  */
7632       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7633 	return false;
7634     }
7635 
7636   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7637   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7638 		   && memory_access_type != VMAT_GATHER_SCATTER
7639 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
7640   if (grouped_store)
7641     {
7642       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7643       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7644       group_size = DR_GROUP_SIZE (first_stmt_info);
7645     }
7646   else
7647     {
7648       first_stmt_info = stmt_info;
7649       first_dr_info = dr_info;
7650       group_size = vec_num = 1;
7651     }
7652 
7653   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7654     {
7655       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7656 			     memory_access_type))
7657 	return false;
7658     }
7659 
7660   if (!vec_stmt) /* transformation not required.  */
7661     {
7662       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7663 
7664       if (loop_vinfo
7665 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7666 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7667 					      vls_type, group_size,
7668 					      memory_access_type, &gs_info,
7669 					      mask);
7670 
7671       if (slp_node
7672 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7673 						vectype))
7674 	{
7675 	  if (dump_enabled_p ())
7676 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7677 			     "incompatible vector types for invariants\n");
7678 	  return false;
7679 	}
7680 
7681       if (dump_enabled_p ()
7682 	  && memory_access_type != VMAT_ELEMENTWISE
7683 	  && memory_access_type != VMAT_GATHER_SCATTER
7684 	  && alignment_support_scheme != dr_aligned)
7685 	dump_printf_loc (MSG_NOTE, vect_location,
7686 			 "Vectorizing an unaligned access.\n");
7687 
7688       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7689       vect_model_store_cost (vinfo, stmt_info, ncopies,
7690 			     memory_access_type, alignment_support_scheme,
7691 			     misalignment, vls_type, slp_node, cost_vec);
7692       return true;
7693     }
7694   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7695 
7696   /* Transform.  */
7697 
7698   ensure_base_align (dr_info);
7699 
7700   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7701     {
7702       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7703       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7704       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7705       tree ptr, var, scale, vec_mask;
7706       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7707       tree mask_halfvectype = mask_vectype;
7708       edge pe = loop_preheader_edge (loop);
7709       gimple_seq seq;
7710       basic_block new_bb;
7711       enum { NARROW, NONE, WIDEN } modifier;
7712       poly_uint64 scatter_off_nunits
7713 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7714 
7715       if (known_eq (nunits, scatter_off_nunits))
7716 	modifier = NONE;
7717       else if (known_eq (nunits * 2, scatter_off_nunits))
7718 	{
7719 	  modifier = WIDEN;
7720 
7721 	  /* Currently gathers and scatters are only supported for
7722 	     fixed-length vectors.  */
7723 	  unsigned int count = scatter_off_nunits.to_constant ();
7724 	  vec_perm_builder sel (count, count, 1);
7725 	  for (i = 0; i < (unsigned int) count; ++i)
7726 	    sel.quick_push (i | (count / 2));
7727 
7728 	  vec_perm_indices indices (sel, 1, count);
7729 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7730 						  indices);
7731 	  gcc_assert (perm_mask != NULL_TREE);
7732 	}
7733       else if (known_eq (nunits, scatter_off_nunits * 2))
7734 	{
7735 	  modifier = NARROW;
7736 
7737 	  /* Currently gathers and scatters are only supported for
7738 	     fixed-length vectors.  */
7739 	  unsigned int count = nunits.to_constant ();
7740 	  vec_perm_builder sel (count, count, 1);
7741 	  for (i = 0; i < (unsigned int) count; ++i)
7742 	    sel.quick_push (i | (count / 2));
7743 
7744 	  vec_perm_indices indices (sel, 2, count);
7745 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7746 	  gcc_assert (perm_mask != NULL_TREE);
7747 	  ncopies *= 2;
7748 
7749 	  if (mask)
7750 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7751 	}
7752       else
7753 	gcc_unreachable ();
7754 
7755       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7756       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7757       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7758       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7759       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7760       scaletype = TREE_VALUE (arglist);
7761 
7762       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7763 			   && TREE_CODE (rettype) == VOID_TYPE);
7764 
7765       ptr = fold_convert (ptrtype, gs_info.base);
7766       if (!is_gimple_min_invariant (ptr))
7767 	{
7768 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7769 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7770 	  gcc_assert (!new_bb);
7771 	}
7772 
7773       if (mask == NULL_TREE)
7774 	{
7775 	  mask_arg = build_int_cst (masktype, -1);
7776 	  mask_arg = vect_init_vector (vinfo, stmt_info,
7777 				       mask_arg, masktype, NULL);
7778 	}
7779 
7780       scale = build_int_cst (scaletype, gs_info.scale);
7781 
7782       auto_vec<tree> vec_oprnds0;
7783       auto_vec<tree> vec_oprnds1;
7784       auto_vec<tree> vec_masks;
7785       if (mask)
7786 	{
7787 	  tree mask_vectype = truth_type_for (vectype);
7788 	  vect_get_vec_defs_for_operand (vinfo, stmt_info,
7789 					 modifier == NARROW
7790 					 ? ncopies / 2 : ncopies,
7791 					 mask, &vec_masks, mask_vectype);
7792 	}
7793       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7794 				     modifier == WIDEN
7795 				     ? ncopies / 2 : ncopies,
7796 				     gs_info.offset, &vec_oprnds0);
7797       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7798 				     modifier == NARROW
7799 				     ? ncopies / 2 : ncopies,
7800 				     op, &vec_oprnds1);
7801       for (j = 0; j < ncopies; ++j)
7802 	{
7803 	  if (modifier == WIDEN)
7804 	    {
7805 	      if (j & 1)
7806 		op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7807 					   perm_mask, stmt_info, gsi);
7808 	      else
7809 		op = vec_oprnd0 = vec_oprnds0[j / 2];
7810 	      src = vec_oprnd1 = vec_oprnds1[j];
7811 	      if (mask)
7812 		mask_op = vec_mask = vec_masks[j];
7813 	    }
7814 	  else if (modifier == NARROW)
7815 	    {
7816 	      if (j & 1)
7817 		src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7818 					    perm_mask, stmt_info, gsi);
7819 	      else
7820 		src = vec_oprnd1 = vec_oprnds1[j / 2];
7821 	      op = vec_oprnd0 = vec_oprnds0[j];
7822 	      if (mask)
7823 		mask_op = vec_mask = vec_masks[j / 2];
7824 	    }
7825 	  else
7826 	    {
7827 	      op = vec_oprnd0 = vec_oprnds0[j];
7828 	      src = vec_oprnd1 = vec_oprnds1[j];
7829 	      if (mask)
7830 		mask_op = vec_mask = vec_masks[j];
7831 	    }
7832 
7833 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7834 	    {
7835 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7836 				    TYPE_VECTOR_SUBPARTS (srctype)));
7837 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
7838 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7839 	      gassign *new_stmt
7840 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7841 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7842 	      src = var;
7843 	    }
7844 
7845 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7846 	    {
7847 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7848 				    TYPE_VECTOR_SUBPARTS (idxtype)));
7849 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7850 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7851 	      gassign *new_stmt
7852 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7853 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7854 	      op = var;
7855 	    }
7856 
7857 	  if (mask)
7858 	    {
7859 	      tree utype;
7860 	      mask_arg = mask_op;
7861 	      if (modifier == NARROW)
7862 		{
7863 		  var = vect_get_new_ssa_name (mask_halfvectype,
7864 					       vect_simple_var);
7865 		  gassign *new_stmt
7866 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7867 							: VEC_UNPACK_LO_EXPR,
7868 					   mask_op);
7869 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7870 		  mask_arg = var;
7871 		}
7872 	      tree optype = TREE_TYPE (mask_arg);
7873 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7874 		utype = masktype;
7875 	      else
7876 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7877 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
7878 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7879 	      gassign *new_stmt
7880 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7881 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7882 	      mask_arg = var;
7883 	      if (!useless_type_conversion_p (masktype, utype))
7884 		{
7885 		  gcc_assert (TYPE_PRECISION (utype)
7886 			      <= TYPE_PRECISION (masktype));
7887 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7888 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7889 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7890 		  mask_arg = var;
7891 		}
7892 	    }
7893 
7894 	  gcall *new_stmt
7895 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7896 	   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7897 
7898 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7899 	}
7900       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7901       return true;
7902     }
7903   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7904     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7905 
7906   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7907     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7908 
7909   if (grouped_store)
7910     {
7911       /* FORNOW */
7912       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7913 
7914       /* We vectorize all the stmts of the interleaving group when we
7915 	 reach the last stmt in the group.  */
7916       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7917 	  < DR_GROUP_SIZE (first_stmt_info)
7918 	  && !slp)
7919 	{
7920 	  *vec_stmt = NULL;
7921 	  return true;
7922 	}
7923 
7924       if (slp)
7925         {
7926           grouped_store = false;
7927           /* VEC_NUM is the number of vect stmts to be created for this
7928              group.  */
7929           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7930 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7931 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7932 		      == first_stmt_info);
7933 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7934 	  op = vect_get_store_rhs (first_stmt_info);
7935         }
7936       else
7937         /* VEC_NUM is the number of vect stmts to be created for this
7938            group.  */
7939 	vec_num = group_size;
7940 
7941       ref_type = get_group_alias_ptr_type (first_stmt_info);
7942     }
7943   else
7944     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7945 
7946   if (dump_enabled_p ())
7947     dump_printf_loc (MSG_NOTE, vect_location,
7948                      "transform store. ncopies = %d\n", ncopies);
7949 
7950   if (memory_access_type == VMAT_ELEMENTWISE
7951       || memory_access_type == VMAT_STRIDED_SLP)
7952     {
7953       gimple_stmt_iterator incr_gsi;
7954       bool insert_after;
7955       gimple *incr;
7956       tree offvar;
7957       tree ivstep;
7958       tree running_off;
7959       tree stride_base, stride_step, alias_off;
7960       tree vec_oprnd;
7961       tree dr_offset;
7962       unsigned int g;
7963       /* Checked by get_load_store_type.  */
7964       unsigned int const_nunits = nunits.to_constant ();
7965 
7966       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7967       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7968 
7969       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7970       stride_base
7971 	= fold_build_pointer_plus
7972 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7973 	     size_binop (PLUS_EXPR,
7974 			 convert_to_ptrofftype (dr_offset),
7975 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7976       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7977 
7978       /* For a store with loop-invariant (but other than power-of-2)
7979          stride (i.e. not a grouped access) like so:
7980 
7981 	   for (i = 0; i < n; i += stride)
7982 	     array[i] = ...;
7983 
7984 	 we generate a new induction variable and new stores from
7985 	 the components of the (vectorized) rhs:
7986 
7987 	   for (j = 0; ; j += VF*stride)
7988 	     vectemp = ...;
7989 	     tmp1 = vectemp[0];
7990 	     array[j] = tmp1;
7991 	     tmp2 = vectemp[1];
7992 	     array[j + stride] = tmp2;
7993 	     ...
7994          */
7995 
7996       unsigned nstores = const_nunits;
7997       unsigned lnel = 1;
7998       tree ltype = elem_type;
7999       tree lvectype = vectype;
8000       if (slp)
8001 	{
8002 	  if (group_size < const_nunits
8003 	      && const_nunits % group_size == 0)
8004 	    {
8005 	      nstores = const_nunits / group_size;
8006 	      lnel = group_size;
8007 	      ltype = build_vector_type (elem_type, group_size);
8008 	      lvectype = vectype;
8009 
8010 	      /* First check if vec_extract optab doesn't support extraction
8011 		 of vector elts directly.  */
8012 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8013 	      machine_mode vmode;
8014 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8015 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
8016 					   group_size).exists (&vmode)
8017 		  || (convert_optab_handler (vec_extract_optab,
8018 					     TYPE_MODE (vectype), vmode)
8019 		      == CODE_FOR_nothing))
8020 		{
8021 		  /* Try to avoid emitting an extract of vector elements
8022 		     by performing the extracts using an integer type of the
8023 		     same size, extracting from a vector of those and then
8024 		     re-interpreting it as the original vector type if
8025 		     supported.  */
8026 		  unsigned lsize
8027 		    = group_size * GET_MODE_BITSIZE (elmode);
8028 		  unsigned int lnunits = const_nunits / group_size;
8029 		  /* If we can't construct such a vector fall back to
8030 		     element extracts from the original vector type and
8031 		     element size stores.  */
8032 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
8033 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
8034 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
8035 					      lnunits).exists (&vmode)
8036 		      && (convert_optab_handler (vec_extract_optab,
8037 						 vmode, elmode)
8038 			  != CODE_FOR_nothing))
8039 		    {
8040 		      nstores = lnunits;
8041 		      lnel = group_size;
8042 		      ltype = build_nonstandard_integer_type (lsize, 1);
8043 		      lvectype = build_vector_type (ltype, nstores);
8044 		    }
8045 		  /* Else fall back to vector extraction anyway.
8046 		     Fewer stores are more important than avoiding spilling
8047 		     of the vector we extract from.  Compared to the
8048 		     construction case in vectorizable_load no store-forwarding
8049 		     issue exists here for reasonable archs.  */
8050 		}
8051 	    }
8052 	  else if (group_size >= const_nunits
8053 		   && group_size % const_nunits == 0)
8054 	    {
8055 	      nstores = 1;
8056 	      lnel = const_nunits;
8057 	      ltype = vectype;
8058 	      lvectype = vectype;
8059 	    }
8060 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8061 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8062 	}
8063 
8064       ivstep = stride_step;
8065       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8066 			    build_int_cst (TREE_TYPE (ivstep), vf));
8067 
8068       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8069 
8070       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8071       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8072       create_iv (stride_base, ivstep, NULL,
8073 		 loop, &incr_gsi, insert_after,
8074 		 &offvar, NULL);
8075       incr = gsi_stmt (incr_gsi);
8076 
8077       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8078 
8079       alias_off = build_int_cst (ref_type, 0);
8080       stmt_vec_info next_stmt_info = first_stmt_info;
8081       for (g = 0; g < group_size; g++)
8082 	{
8083 	  running_off = offvar;
8084 	  if (g)
8085 	    {
8086 	      tree size = TYPE_SIZE_UNIT (ltype);
8087 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8088 				      size);
8089 	      tree newoff = copy_ssa_name (running_off, NULL);
8090 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8091 					  running_off, pos);
8092 	      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8093 	      running_off = newoff;
8094 	    }
8095 	  if (!slp)
8096 	    op = vect_get_store_rhs (next_stmt_info);
8097 	  vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8098 			     op, &vec_oprnds);
8099 	  unsigned int group_el = 0;
8100 	  unsigned HOST_WIDE_INT
8101 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8102 	  for (j = 0; j < ncopies; j++)
8103 	    {
8104 	      vec_oprnd = vec_oprnds[j];
8105 	      /* Pun the vector to extract from if necessary.  */
8106 	      if (lvectype != vectype)
8107 		{
8108 		  tree tem = make_ssa_name (lvectype);
8109 		  gimple *pun
8110 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8111 							lvectype, vec_oprnd));
8112 		  vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8113 		  vec_oprnd = tem;
8114 		}
8115 	      for (i = 0; i < nstores; i++)
8116 		{
8117 		  tree newref, newoff;
8118 		  gimple *incr, *assign;
8119 		  tree size = TYPE_SIZE (ltype);
8120 		  /* Extract the i'th component.  */
8121 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8122 					  bitsize_int (i), size);
8123 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8124 					   size, pos);
8125 
8126 		  elem = force_gimple_operand_gsi (gsi, elem, true,
8127 						   NULL_TREE, true,
8128 						   GSI_SAME_STMT);
8129 
8130 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
8131 						 group_el * elsz);
8132 		  newref = build2 (MEM_REF, ltype,
8133 				   running_off, this_off);
8134 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8135 
8136 		  /* And store it to *running_off.  */
8137 		  assign = gimple_build_assign (newref, elem);
8138 		  vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8139 
8140 		  group_el += lnel;
8141 		  if (! slp
8142 		      || group_el == group_size)
8143 		    {
8144 		      newoff = copy_ssa_name (running_off, NULL);
8145 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8146 						  running_off, stride_step);
8147 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8148 
8149 		      running_off = newoff;
8150 		      group_el = 0;
8151 		    }
8152 		  if (g == group_size - 1
8153 		      && !slp)
8154 		    {
8155 		      if (j == 0 && i == 0)
8156 			*vec_stmt = assign;
8157 		      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8158 		    }
8159 		}
8160 	    }
8161 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8162 	  vec_oprnds.release ();
8163 	  if (slp)
8164 	    break;
8165 	}
8166 
8167       return true;
8168     }
8169 
8170   auto_vec<tree> dr_chain (group_size);
8171   oprnds.create (group_size);
8172 
8173   gcc_assert (alignment_support_scheme);
8174   vec_loop_masks *loop_masks
8175     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8176        ? &LOOP_VINFO_MASKS (loop_vinfo)
8177        : NULL);
8178   vec_loop_lens *loop_lens
8179     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8180        ? &LOOP_VINFO_LENS (loop_vinfo)
8181        : NULL);
8182 
8183   /* Shouldn't go with length-based approach if fully masked.  */
8184   gcc_assert (!loop_lens || !loop_masks);
8185 
8186   /* Targets with store-lane instructions must not require explicit
8187      realignment.  vect_supportable_dr_alignment always returns either
8188      dr_aligned or dr_unaligned_supported for masked operations.  */
8189   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8190 	       && !mask
8191 	       && !loop_masks)
8192 	      || alignment_support_scheme == dr_aligned
8193 	      || alignment_support_scheme == dr_unaligned_supported);
8194 
8195   tree offset = NULL_TREE;
8196   if (!known_eq (poffset, 0))
8197     offset = size_int (poffset);
8198 
8199   tree bump;
8200   tree vec_offset = NULL_TREE;
8201   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8202     {
8203       aggr_type = NULL_TREE;
8204       bump = NULL_TREE;
8205     }
8206   else if (memory_access_type == VMAT_GATHER_SCATTER)
8207     {
8208       aggr_type = elem_type;
8209       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8210 				       &bump, &vec_offset);
8211     }
8212   else
8213     {
8214       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8215 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8216       else
8217 	aggr_type = vectype;
8218       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8219 					  memory_access_type);
8220     }
8221 
8222   if (mask)
8223     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8224 
8225   /* In case the vectorization factor (VF) is bigger than the number
8226      of elements that we can fit in a vectype (nunits), we have to generate
8227      more than one vector stmt - i.e - we need to "unroll" the
8228      vector stmt by a factor VF/nunits.  */
8229 
8230   /* In case of interleaving (non-unit grouped access):
8231 
8232         S1:  &base + 2 = x2
8233         S2:  &base = x0
8234         S3:  &base + 1 = x1
8235         S4:  &base + 3 = x3
8236 
8237      We create vectorized stores starting from base address (the access of the
8238      first stmt in the chain (S2 in the above example), when the last store stmt
8239      of the chain (S4) is reached:
8240 
8241         VS1: &base = vx2
8242 	VS2: &base + vec_size*1 = vx0
8243 	VS3: &base + vec_size*2 = vx1
8244 	VS4: &base + vec_size*3 = vx3
8245 
8246      Then permutation statements are generated:
8247 
8248 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8249 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8250 	...
8251 
8252      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8253      (the order of the data-refs in the output of vect_permute_store_chain
8254      corresponds to the order of scalar stmts in the interleaving chain - see
8255      the documentation of vect_permute_store_chain()).
8256 
8257      In case of both multiple types and interleaving, above vector stores and
8258      permutation stmts are created for every copy.  The result vector stmts are
8259      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8260      STMT_VINFO_RELATED_STMT for the next copies.
8261   */
8262 
8263   auto_vec<tree> vec_masks;
8264   tree vec_mask = NULL;
8265   auto_vec<tree> vec_offsets;
8266   auto_vec<vec<tree> > gvec_oprnds;
8267   gvec_oprnds.safe_grow_cleared (group_size, true);
8268   for (j = 0; j < ncopies; j++)
8269     {
8270       gimple *new_stmt;
8271       if (j == 0)
8272 	{
8273           if (slp)
8274             {
8275 	      /* Get vectorized arguments for SLP_NODE.  */
8276 	      vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8277 				 op, &vec_oprnds);
8278               vec_oprnd = vec_oprnds[0];
8279             }
8280           else
8281             {
8282 	      /* For interleaved stores we collect vectorized defs for all the
8283 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8284 		 used as an input to vect_permute_store_chain().
8285 
8286 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8287 		 and OPRNDS are of size 1.  */
8288 	      stmt_vec_info next_stmt_info = first_stmt_info;
8289 	      for (i = 0; i < group_size; i++)
8290 		{
8291 		  /* Since gaps are not supported for interleaved stores,
8292 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
8293 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
8294 		     that there is no interleaving, DR_GROUP_SIZE is 1,
8295 		     and only one iteration of the loop will be executed.  */
8296 		  op = vect_get_store_rhs (next_stmt_info);
8297 		  vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8298 						 ncopies, op, &gvec_oprnds[i]);
8299 		  vec_oprnd = gvec_oprnds[i][0];
8300 		  dr_chain.quick_push (gvec_oprnds[i][0]);
8301 		  oprnds.quick_push (gvec_oprnds[i][0]);
8302 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8303 		}
8304 	      if (mask)
8305 		{
8306 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8307 						 mask, &vec_masks, mask_vectype);
8308 		  vec_mask = vec_masks[0];
8309 		}
8310 	    }
8311 
8312 	  /* We should have catched mismatched types earlier.  */
8313 	  gcc_assert (useless_type_conversion_p (vectype,
8314 						 TREE_TYPE (vec_oprnd)));
8315 	  bool simd_lane_access_p
8316 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8317 	  if (simd_lane_access_p
8318 	      && !loop_masks
8319 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8320 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8321 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8322 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8323 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8324 					get_alias_set (TREE_TYPE (ref_type))))
8325 	    {
8326 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8327 	      dataref_offset = build_int_cst (ref_type, 0);
8328 	    }
8329 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8330 	    {
8331 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8332 					   slp_node, &gs_info, &dataref_ptr,
8333 					   &vec_offsets);
8334 	      vec_offset = vec_offsets[0];
8335 	    }
8336 	  else
8337 	    dataref_ptr
8338 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8339 					  simd_lane_access_p ? loop : NULL,
8340 					  offset, &dummy, gsi, &ptr_incr,
8341 					  simd_lane_access_p, bump);
8342 	}
8343       else
8344 	{
8345 	  /* For interleaved stores we created vectorized defs for all the
8346 	     defs stored in OPRNDS in the previous iteration (previous copy).
8347 	     DR_CHAIN is then used as an input to vect_permute_store_chain().
8348 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8349 	     OPRNDS are of size 1.  */
8350 	  for (i = 0; i < group_size; i++)
8351 	    {
8352 	      vec_oprnd = gvec_oprnds[i][j];
8353 	      dr_chain[i] = gvec_oprnds[i][j];
8354 	      oprnds[i] = gvec_oprnds[i][j];
8355 	    }
8356 	  if (mask)
8357 	    vec_mask = vec_masks[j];
8358 	  if (dataref_offset)
8359 	    dataref_offset
8360 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8361 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8362 	    vec_offset = vec_offsets[j];
8363 	  else
8364 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8365 					   stmt_info, bump);
8366 	}
8367 
8368       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8369 	{
8370 	  tree vec_array;
8371 
8372 	  /* Get an array into which we can store the individual vectors.  */
8373 	  vec_array = create_vector_array (vectype, vec_num);
8374 
8375 	  /* Invalidate the current contents of VEC_ARRAY.  This should
8376 	     become an RTL clobber too, which prevents the vector registers
8377 	     from being upward-exposed.  */
8378 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8379 
8380 	  /* Store the individual vectors into the array.  */
8381 	  for (i = 0; i < vec_num; i++)
8382 	    {
8383 	      vec_oprnd = dr_chain[i];
8384 	      write_vector_array (vinfo, stmt_info,
8385 				  gsi, vec_oprnd, vec_array, i);
8386 	    }
8387 
8388 	  tree final_mask = NULL;
8389 	  if (loop_masks)
8390 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8391 					     vectype, j);
8392 	  if (vec_mask)
8393 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8394 					   final_mask, vec_mask, gsi);
8395 
8396 	  gcall *call;
8397 	  if (final_mask)
8398 	    {
8399 	      /* Emit:
8400 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8401 				     VEC_ARRAY).  */
8402 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8403 	      tree alias_ptr = build_int_cst (ref_type, align);
8404 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8405 						 dataref_ptr, alias_ptr,
8406 						 final_mask, vec_array);
8407 	    }
8408 	  else
8409 	    {
8410 	      /* Emit:
8411 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8412 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8413 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8414 						 vec_array);
8415 	      gimple_call_set_lhs (call, data_ref);
8416 	    }
8417 	  gimple_call_set_nothrow (call, true);
8418 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8419 	  new_stmt = call;
8420 
8421 	  /* Record that VEC_ARRAY is now dead.  */
8422 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8423 	}
8424       else
8425 	{
8426 	  new_stmt = NULL;
8427 	  if (grouped_store)
8428 	    {
8429 	      if (j == 0)
8430 		result_chain.create (group_size);
8431 	      /* Permute.  */
8432 	      vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8433 					gsi, &result_chain);
8434 	    }
8435 
8436 	  stmt_vec_info next_stmt_info = first_stmt_info;
8437 	  for (i = 0; i < vec_num; i++)
8438 	    {
8439 	      unsigned misalign;
8440 	      unsigned HOST_WIDE_INT align;
8441 
8442 	      tree final_mask = NULL_TREE;
8443 	      if (loop_masks)
8444 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8445 						 vec_num * ncopies,
8446 						 vectype, vec_num * j + i);
8447 	      if (vec_mask)
8448 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8449 					       final_mask, vec_mask, gsi);
8450 
8451 	      if (memory_access_type == VMAT_GATHER_SCATTER)
8452 		{
8453 		  tree scale = size_int (gs_info.scale);
8454 		  gcall *call;
8455 		  if (final_mask)
8456 		    call = gimple_build_call_internal
8457 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8458 		       scale, vec_oprnd, final_mask);
8459 		  else
8460 		    call = gimple_build_call_internal
8461 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8462 		       scale, vec_oprnd);
8463 		  gimple_call_set_nothrow (call, true);
8464 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8465 		  new_stmt = call;
8466 		  break;
8467 		}
8468 
8469 	      if (i > 0)
8470 		/* Bump the vector pointer.  */
8471 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8472 					       gsi, stmt_info, bump);
8473 
8474 	      if (slp)
8475 		vec_oprnd = vec_oprnds[i];
8476 	      else if (grouped_store)
8477 		/* For grouped stores vectorized defs are interleaved in
8478 		   vect_permute_store_chain().  */
8479 		vec_oprnd = result_chain[i];
8480 
8481 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8482 	      if (alignment_support_scheme == dr_aligned)
8483 		misalign = 0;
8484 	      else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8485 		{
8486 		  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8487 		  misalign = 0;
8488 		}
8489 	      else
8490 		misalign = misalignment;
8491 	      if (dataref_offset == NULL_TREE
8492 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
8493 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8494 					misalign);
8495 	      align = least_bit_hwi (misalign | align);
8496 
8497 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8498 		{
8499 		  tree perm_mask = perm_mask_for_reverse (vectype);
8500 		  tree perm_dest = vect_create_destination_var
8501 		    (vect_get_store_rhs (stmt_info), vectype);
8502 		  tree new_temp = make_ssa_name (perm_dest);
8503 
8504 		  /* Generate the permute statement.  */
8505 		  gimple *perm_stmt
8506 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8507 					   vec_oprnd, perm_mask);
8508 		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8509 
8510 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8511 		  vec_oprnd = new_temp;
8512 		}
8513 
8514 	      /* Arguments are ready.  Create the new vector stmt.  */
8515 	      if (final_mask)
8516 		{
8517 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8518 		  gcall *call
8519 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
8520 						  dataref_ptr, ptr,
8521 						  final_mask, vec_oprnd);
8522 		  gimple_call_set_nothrow (call, true);
8523 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8524 		  new_stmt = call;
8525 		}
8526 	      else if (loop_lens)
8527 		{
8528 		  tree final_len
8529 		    = vect_get_loop_len (loop_vinfo, loop_lens,
8530 					 vec_num * ncopies, vec_num * j + i);
8531 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8532 		  machine_mode vmode = TYPE_MODE (vectype);
8533 		  opt_machine_mode new_ovmode
8534 		    = get_len_load_store_mode (vmode, false);
8535 		  machine_mode new_vmode = new_ovmode.require ();
8536 		  /* Need conversion if it's wrapped with VnQI.  */
8537 		  if (vmode != new_vmode)
8538 		    {
8539 		      tree new_vtype
8540 			= build_vector_type_for_mode (unsigned_intQI_type_node,
8541 						      new_vmode);
8542 		      tree var
8543 			= vect_get_new_ssa_name (new_vtype, vect_simple_var);
8544 		      vec_oprnd
8545 			= build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8546 		      gassign *new_stmt
8547 			= gimple_build_assign (var, VIEW_CONVERT_EXPR,
8548 					       vec_oprnd);
8549 		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8550 						   gsi);
8551 		      vec_oprnd = var;
8552 		    }
8553 
8554 		  signed char biasval =
8555 		    LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8556 
8557 		  tree bias = build_int_cst (intQI_type_node, biasval);
8558 		  gcall *call
8559 		    = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8560 						  ptr, final_len, vec_oprnd,
8561 						  bias);
8562 		  gimple_call_set_nothrow (call, true);
8563 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8564 		  new_stmt = call;
8565 		}
8566 	      else
8567 		{
8568 		  data_ref = fold_build2 (MEM_REF, vectype,
8569 					  dataref_ptr,
8570 					  dataref_offset
8571 					  ? dataref_offset
8572 					  : build_int_cst (ref_type, 0));
8573 		  if (alignment_support_scheme == dr_aligned)
8574 		    ;
8575 		  else
8576 		    TREE_TYPE (data_ref)
8577 		      = build_aligned_type (TREE_TYPE (data_ref),
8578 					    align * BITS_PER_UNIT);
8579 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8580 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8581 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8582 		}
8583 
8584 	      if (slp)
8585 		continue;
8586 
8587 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8588 	      if (!next_stmt_info)
8589 		break;
8590 	    }
8591 	}
8592       if (!slp)
8593 	{
8594 	  if (j == 0)
8595 	    *vec_stmt = new_stmt;
8596 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8597 	}
8598     }
8599 
8600   for (i = 0; i < group_size; ++i)
8601     {
8602       vec<tree> oprndsi = gvec_oprnds[i];
8603       oprndsi.release ();
8604     }
8605   oprnds.release ();
8606   result_chain.release ();
8607   vec_oprnds.release ();
8608 
8609   return true;
8610 }
8611 
8612 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8613    VECTOR_CST mask.  No checks are made that the target platform supports the
8614    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8615    vect_gen_perm_mask_checked.  */
8616 
8617 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8618 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8619 {
8620   tree mask_type;
8621 
8622   poly_uint64 nunits = sel.length ();
8623   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8624 
8625   mask_type = build_vector_type (ssizetype, nunits);
8626   return vec_perm_indices_to_tree (mask_type, sel);
8627 }
8628 
8629 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8630    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8631 
8632 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8633 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8634 {
8635   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8636   return vect_gen_perm_mask_any (vectype, sel);
8637 }
8638 
8639 /* Given a vector variable X and Y, that was generated for the scalar
8640    STMT_INFO, generate instructions to permute the vector elements of X and Y
8641    using permutation mask MASK_VEC, insert them at *GSI and return the
8642    permuted vector variable.  */
8643 
8644 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8645 permute_vec_elements (vec_info *vinfo,
8646 		      tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8647 		      gimple_stmt_iterator *gsi)
8648 {
8649   tree vectype = TREE_TYPE (x);
8650   tree perm_dest, data_ref;
8651   gimple *perm_stmt;
8652 
8653   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8654   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8655     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8656   else
8657     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8658   data_ref = make_ssa_name (perm_dest);
8659 
8660   /* Generate the permute statement.  */
8661   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8662   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8663 
8664   return data_ref;
8665 }
8666 
8667 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8668    inserting them on the loops preheader edge.  Returns true if we
8669    were successful in doing so (and thus STMT_INFO can be moved then),
8670    otherwise returns false.  */
8671 
8672 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8673 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8674 {
8675   ssa_op_iter i;
8676   tree op;
8677   bool any = false;
8678 
8679   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8680     {
8681       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8682       if (!gimple_nop_p (def_stmt)
8683 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8684 	{
8685 	  /* Make sure we don't need to recurse.  While we could do
8686 	     so in simple cases when there are more complex use webs
8687 	     we don't have an easy way to preserve stmt order to fulfil
8688 	     dependencies within them.  */
8689 	  tree op2;
8690 	  ssa_op_iter i2;
8691 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
8692 	    return false;
8693 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8694 	    {
8695 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8696 	      if (!gimple_nop_p (def_stmt2)
8697 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8698 		return false;
8699 	    }
8700 	  any = true;
8701 	}
8702     }
8703 
8704   if (!any)
8705     return true;
8706 
8707   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8708     {
8709       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8710       if (!gimple_nop_p (def_stmt)
8711 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8712 	{
8713 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8714 	  gsi_remove (&gsi, false);
8715 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8716 	}
8717     }
8718 
8719   return true;
8720 }
8721 
8722 /* vectorizable_load.
8723 
8724    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8725    that can be vectorized.
8726    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8727    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8728    Return true if STMT_INFO is vectorizable in this way.  */
8729 
8730 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8731 vectorizable_load (vec_info *vinfo,
8732 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8733 		   gimple **vec_stmt, slp_tree slp_node,
8734 		   stmt_vector_for_cost *cost_vec)
8735 {
8736   tree scalar_dest;
8737   tree vec_dest = NULL;
8738   tree data_ref = NULL;
8739   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8740   class loop *loop = NULL;
8741   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8742   bool nested_in_vect_loop = false;
8743   tree elem_type;
8744   tree new_temp;
8745   machine_mode mode;
8746   tree dummy;
8747   tree dataref_ptr = NULL_TREE;
8748   tree dataref_offset = NULL_TREE;
8749   gimple *ptr_incr = NULL;
8750   int ncopies;
8751   int i, j;
8752   unsigned int group_size;
8753   poly_uint64 group_gap_adj;
8754   tree msq = NULL_TREE, lsq;
8755   tree realignment_token = NULL_TREE;
8756   gphi *phi = NULL;
8757   vec<tree> dr_chain = vNULL;
8758   bool grouped_load = false;
8759   stmt_vec_info first_stmt_info;
8760   stmt_vec_info first_stmt_info_for_drptr = NULL;
8761   bool compute_in_loop = false;
8762   class loop *at_loop;
8763   int vec_num;
8764   bool slp = (slp_node != NULL);
8765   bool slp_perm = false;
8766   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8767   poly_uint64 vf;
8768   tree aggr_type;
8769   gather_scatter_info gs_info;
8770   tree ref_type;
8771   enum vect_def_type mask_dt = vect_unknown_def_type;
8772 
8773   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8774     return false;
8775 
8776   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8777       && ! vec_stmt)
8778     return false;
8779 
8780   if (!STMT_VINFO_DATA_REF (stmt_info))
8781     return false;
8782 
8783   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8784   int mask_index = -1;
8785   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8786     {
8787       scalar_dest = gimple_assign_lhs (assign);
8788       if (TREE_CODE (scalar_dest) != SSA_NAME)
8789 	return false;
8790 
8791       tree_code code = gimple_assign_rhs_code (assign);
8792       if (code != ARRAY_REF
8793 	  && code != BIT_FIELD_REF
8794 	  && code != INDIRECT_REF
8795 	  && code != COMPONENT_REF
8796 	  && code != IMAGPART_EXPR
8797 	  && code != REALPART_EXPR
8798 	  && code != MEM_REF
8799 	  && TREE_CODE_CLASS (code) != tcc_declaration)
8800 	return false;
8801     }
8802   else
8803     {
8804       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8805       if (!call || !gimple_call_internal_p (call))
8806 	return false;
8807 
8808       internal_fn ifn = gimple_call_internal_fn (call);
8809       if (!internal_load_fn_p (ifn))
8810 	return false;
8811 
8812       scalar_dest = gimple_call_lhs (call);
8813       if (!scalar_dest)
8814 	return false;
8815 
8816       mask_index = internal_fn_mask_index (ifn);
8817       /* ??? For SLP the mask operand is always last.  */
8818       if (mask_index >= 0 && slp_node)
8819 	mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8820       if (mask_index >= 0
8821 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8822 				      &mask, NULL, &mask_dt, &mask_vectype))
8823 	return false;
8824     }
8825 
8826   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8827   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8828 
8829   if (loop_vinfo)
8830     {
8831       loop = LOOP_VINFO_LOOP (loop_vinfo);
8832       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8833       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8834     }
8835   else
8836     vf = 1;
8837 
8838   /* Multiple types in SLP are handled by creating the appropriate number of
8839      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8840      case of SLP.  */
8841   if (slp)
8842     ncopies = 1;
8843   else
8844     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8845 
8846   gcc_assert (ncopies >= 1);
8847 
8848   /* FORNOW. This restriction should be relaxed.  */
8849   if (nested_in_vect_loop && ncopies > 1)
8850     {
8851       if (dump_enabled_p ())
8852         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8853                          "multiple types in nested loop.\n");
8854       return false;
8855     }
8856 
8857   /* Invalidate assumptions made by dependence analysis when vectorization
8858      on the unrolled body effectively re-orders stmts.  */
8859   if (ncopies > 1
8860       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8861       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8862 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8863     {
8864       if (dump_enabled_p ())
8865 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8866 			 "cannot perform implicit CSE when unrolling "
8867 			 "with negative dependence distance\n");
8868       return false;
8869     }
8870 
8871   elem_type = TREE_TYPE (vectype);
8872   mode = TYPE_MODE (vectype);
8873 
8874   /* FORNOW. In some cases can vectorize even if data-type not supported
8875     (e.g. - data copies).  */
8876   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8877     {
8878       if (dump_enabled_p ())
8879         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8880                          "Aligned load, but unsupported type.\n");
8881       return false;
8882     }
8883 
8884   /* Check if the load is a part of an interleaving chain.  */
8885   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8886     {
8887       grouped_load = true;
8888       /* FORNOW */
8889       gcc_assert (!nested_in_vect_loop);
8890       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8891 
8892       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8893       group_size = DR_GROUP_SIZE (first_stmt_info);
8894 
8895       /* Refuse non-SLP vectorization of SLP-only groups.  */
8896       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8897 	{
8898 	  if (dump_enabled_p ())
8899 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8900 			     "cannot vectorize load in non-SLP mode.\n");
8901 	  return false;
8902 	}
8903 
8904       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8905 	{
8906 	  slp_perm = true;
8907 
8908 	  if (!loop_vinfo)
8909 	    {
8910 	      /* In BB vectorization we may not actually use a loaded vector
8911 		 accessing elements in excess of DR_GROUP_SIZE.  */
8912 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8913 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8914 	      unsigned HOST_WIDE_INT nunits;
8915 	      unsigned j, k, maxk = 0;
8916 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8917 		if (k > maxk)
8918 		  maxk = k;
8919 	      tree vectype = SLP_TREE_VECTYPE (slp_node);
8920 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8921 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8922 		{
8923 		  if (dump_enabled_p ())
8924 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8925 				     "BB vectorization with gaps at the end of "
8926 				     "a load is not supported\n");
8927 		  return false;
8928 		}
8929 	    }
8930 
8931 	  auto_vec<tree> tem;
8932 	  unsigned n_perms;
8933 	  if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8934 					     true, &n_perms))
8935 	    {
8936 	      if (dump_enabled_p ())
8937 		dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8938 				 vect_location,
8939 				 "unsupported load permutation\n");
8940 	      return false;
8941 	    }
8942 	}
8943 
8944       /* Invalidate assumptions made by dependence analysis when vectorization
8945 	 on the unrolled body effectively re-orders stmts.  */
8946       if (!PURE_SLP_STMT (stmt_info)
8947 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8948 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8949 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8950 	{
8951 	  if (dump_enabled_p ())
8952 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8953 			     "cannot perform implicit CSE when performing "
8954 			     "group loads with negative dependence distance\n");
8955 	  return false;
8956 	}
8957     }
8958   else
8959     group_size = 1;
8960 
8961   vect_memory_access_type memory_access_type;
8962   enum dr_alignment_support alignment_support_scheme;
8963   int misalignment;
8964   poly_int64 poffset;
8965   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8966 			    ncopies, &memory_access_type, &poffset,
8967 			    &alignment_support_scheme, &misalignment, &gs_info))
8968     return false;
8969 
8970   if (mask)
8971     {
8972       if (memory_access_type == VMAT_CONTIGUOUS)
8973 	{
8974 	  machine_mode vec_mode = TYPE_MODE (vectype);
8975 	  if (!VECTOR_MODE_P (vec_mode)
8976 	      || !can_vec_mask_load_store_p (vec_mode,
8977 					     TYPE_MODE (mask_vectype), true))
8978 	    return false;
8979 	}
8980       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8981 	       && memory_access_type != VMAT_GATHER_SCATTER)
8982 	{
8983 	  if (dump_enabled_p ())
8984 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8985 			     "unsupported access type for masked load.\n");
8986 	  return false;
8987 	}
8988       else if (memory_access_type == VMAT_GATHER_SCATTER
8989 	       && gs_info.ifn == IFN_LAST
8990 	       && !gs_info.decl)
8991 	{
8992 	  if (dump_enabled_p ())
8993 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8994 			     "unsupported masked emulated gather.\n");
8995 	  return false;
8996 	}
8997     }
8998 
8999   if (!vec_stmt) /* transformation not required.  */
9000     {
9001       if (slp_node
9002 	  && mask
9003 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9004 						mask_vectype))
9005 	{
9006 	  if (dump_enabled_p ())
9007 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9008 			     "incompatible vector types for invariants\n");
9009 	  return false;
9010 	}
9011 
9012       if (!slp)
9013 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9014 
9015       if (loop_vinfo
9016 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9017 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9018 					      VLS_LOAD, group_size,
9019 					      memory_access_type, &gs_info,
9020 					      mask);
9021 
9022       if (dump_enabled_p ()
9023 	  && memory_access_type != VMAT_ELEMENTWISE
9024 	  && memory_access_type != VMAT_GATHER_SCATTER
9025 	  && alignment_support_scheme != dr_aligned)
9026 	dump_printf_loc (MSG_NOTE, vect_location,
9027 			 "Vectorizing an unaligned access.\n");
9028 
9029       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9030       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9031 			    alignment_support_scheme, misalignment,
9032 			    &gs_info, slp_node, cost_vec);
9033       return true;
9034     }
9035 
9036   if (!slp)
9037     gcc_assert (memory_access_type
9038 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9039 
9040   if (dump_enabled_p ())
9041     dump_printf_loc (MSG_NOTE, vect_location,
9042                      "transform load. ncopies = %d\n", ncopies);
9043 
9044   /* Transform.  */
9045 
9046   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9047   ensure_base_align (dr_info);
9048 
9049   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9050     {
9051       vect_build_gather_load_calls (vinfo,
9052 				    stmt_info, gsi, vec_stmt, &gs_info, mask);
9053       return true;
9054     }
9055 
9056   if (memory_access_type == VMAT_INVARIANT)
9057     {
9058       gcc_assert (!grouped_load && !mask && !bb_vinfo);
9059       /* If we have versioned for aliasing or the loop doesn't
9060 	 have any data dependencies that would preclude this,
9061 	 then we are sure this is a loop invariant load and
9062 	 thus we can insert it on the preheader edge.  */
9063       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9064 		      && !nested_in_vect_loop
9065 		      && hoist_defs_of_uses (stmt_info, loop));
9066       if (hoist_p)
9067 	{
9068 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9069 	  if (dump_enabled_p ())
9070 	    dump_printf_loc (MSG_NOTE, vect_location,
9071 			     "hoisting out of the vectorized loop: %G", stmt);
9072 	  scalar_dest = copy_ssa_name (scalar_dest);
9073 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9074 	  gsi_insert_on_edge_immediate
9075 	    (loop_preheader_edge (loop),
9076 	     gimple_build_assign (scalar_dest, rhs));
9077 	}
9078       /* These copies are all equivalent, but currently the representation
9079 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
9080       gimple_stmt_iterator gsi2 = *gsi;
9081       gsi_next (&gsi2);
9082       for (j = 0; j < ncopies; j++)
9083 	{
9084 	  if (hoist_p)
9085 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9086 					 vectype, NULL);
9087 	  else
9088 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9089 					 vectype, &gsi2);
9090 	  gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9091 	  if (slp)
9092 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9093 	  else
9094 	    {
9095 	      if (j == 0)
9096 		*vec_stmt = new_stmt;
9097 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9098 	    }
9099 	}
9100       return true;
9101     }
9102 
9103   if (memory_access_type == VMAT_ELEMENTWISE
9104       || memory_access_type == VMAT_STRIDED_SLP)
9105     {
9106       gimple_stmt_iterator incr_gsi;
9107       bool insert_after;
9108       tree offvar;
9109       tree ivstep;
9110       tree running_off;
9111       vec<constructor_elt, va_gc> *v = NULL;
9112       tree stride_base, stride_step, alias_off;
9113       /* Checked by get_load_store_type.  */
9114       unsigned int const_nunits = nunits.to_constant ();
9115       unsigned HOST_WIDE_INT cst_offset = 0;
9116       tree dr_offset;
9117 
9118       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9119       gcc_assert (!nested_in_vect_loop);
9120 
9121       if (grouped_load)
9122 	{
9123 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9124 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9125 	}
9126       else
9127 	{
9128 	  first_stmt_info = stmt_info;
9129 	  first_dr_info = dr_info;
9130 	}
9131       if (slp && grouped_load)
9132 	{
9133 	  group_size = DR_GROUP_SIZE (first_stmt_info);
9134 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
9135 	}
9136       else
9137 	{
9138 	  if (grouped_load)
9139 	    cst_offset
9140 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9141 		 * vect_get_place_in_interleaving_chain (stmt_info,
9142 							 first_stmt_info));
9143 	  group_size = 1;
9144 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9145 	}
9146 
9147       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9148       stride_base
9149 	= fold_build_pointer_plus
9150 	    (DR_BASE_ADDRESS (first_dr_info->dr),
9151 	     size_binop (PLUS_EXPR,
9152 			 convert_to_ptrofftype (dr_offset),
9153 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9154       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9155 
9156       /* For a load with loop-invariant (but other than power-of-2)
9157          stride (i.e. not a grouped access) like so:
9158 
9159 	   for (i = 0; i < n; i += stride)
9160 	     ... = array[i];
9161 
9162 	 we generate a new induction variable and new accesses to
9163 	 form a new vector (or vectors, depending on ncopies):
9164 
9165 	   for (j = 0; ; j += VF*stride)
9166 	     tmp1 = array[j];
9167 	     tmp2 = array[j + stride];
9168 	     ...
9169 	     vectemp = {tmp1, tmp2, ...}
9170          */
9171 
9172       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9173 			    build_int_cst (TREE_TYPE (stride_step), vf));
9174 
9175       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9176 
9177       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9178       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9179       create_iv (stride_base, ivstep, NULL,
9180 		 loop, &incr_gsi, insert_after,
9181 		 &offvar, NULL);
9182 
9183       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9184 
9185       running_off = offvar;
9186       alias_off = build_int_cst (ref_type, 0);
9187       int nloads = const_nunits;
9188       int lnel = 1;
9189       tree ltype = TREE_TYPE (vectype);
9190       tree lvectype = vectype;
9191       auto_vec<tree> dr_chain;
9192       if (memory_access_type == VMAT_STRIDED_SLP)
9193 	{
9194 	  if (group_size < const_nunits)
9195 	    {
9196 	      /* First check if vec_init optab supports construction from vector
9197 		 elts directly.  Otherwise avoid emitting a constructor of
9198 		 vector elements by performing the loads using an integer type
9199 		 of the same size, constructing a vector of those and then
9200 		 re-interpreting it as the original vector type.  This avoids a
9201 		 huge runtime penalty due to the general inability to perform
9202 		 store forwarding from smaller stores to a larger load.  */
9203 	      tree ptype;
9204 	      tree vtype
9205 		= vector_vector_composition_type (vectype,
9206 						  const_nunits / group_size,
9207 						  &ptype);
9208 	      if (vtype != NULL_TREE)
9209 		{
9210 		  nloads = const_nunits / group_size;
9211 		  lnel = group_size;
9212 		  lvectype = vtype;
9213 		  ltype = ptype;
9214 		}
9215 	    }
9216 	  else
9217 	    {
9218 	      nloads = 1;
9219 	      lnel = const_nunits;
9220 	      ltype = vectype;
9221 	    }
9222 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9223 	}
9224       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
9225       else if (nloads == 1)
9226 	ltype = vectype;
9227 
9228       if (slp)
9229 	{
9230 	  /* For SLP permutation support we need to load the whole group,
9231 	     not only the number of vector stmts the permutation result
9232 	     fits in.  */
9233 	  if (slp_perm)
9234 	    {
9235 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9236 		 variable VF.  */
9237 	      unsigned int const_vf = vf.to_constant ();
9238 	      ncopies = CEIL (group_size * const_vf, const_nunits);
9239 	      dr_chain.create (ncopies);
9240 	    }
9241 	  else
9242 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9243 	}
9244       unsigned int group_el = 0;
9245       unsigned HOST_WIDE_INT
9246 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9247       unsigned int n_groups = 0;
9248       for (j = 0; j < ncopies; j++)
9249 	{
9250 	  if (nloads > 1)
9251 	    vec_alloc (v, nloads);
9252 	  gimple *new_stmt = NULL;
9253 	  for (i = 0; i < nloads; i++)
9254 	    {
9255 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
9256 					     group_el * elsz + cst_offset);
9257 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9258 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9259 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9260 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9261 	      if (nloads > 1)
9262 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9263 					gimple_assign_lhs (new_stmt));
9264 
9265 	      group_el += lnel;
9266 	      if (! slp
9267 		  || group_el == group_size)
9268 		{
9269 		  n_groups++;
9270 		  /* When doing SLP make sure to not load elements from
9271 		     the next vector iteration, those will not be accessed
9272 		     so just use the last element again.  See PR107451.  */
9273 		  if (!slp || known_lt (n_groups, vf))
9274 		    {
9275 		      tree newoff = copy_ssa_name (running_off);
9276 		      gimple *incr
9277 			= gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9278 					       running_off, stride_step);
9279 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9280 		      running_off = newoff;
9281 		    }
9282 		  group_el = 0;
9283 		}
9284 	    }
9285 	  if (nloads > 1)
9286 	    {
9287 	      tree vec_inv = build_constructor (lvectype, v);
9288 	      new_temp = vect_init_vector (vinfo, stmt_info,
9289 					   vec_inv, lvectype, gsi);
9290 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
9291 	      if (lvectype != vectype)
9292 		{
9293 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
9294 						  VIEW_CONVERT_EXPR,
9295 						  build1 (VIEW_CONVERT_EXPR,
9296 							  vectype, new_temp));
9297 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9298 		}
9299 	    }
9300 
9301 	  if (slp)
9302 	    {
9303 	      if (slp_perm)
9304 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9305 	      else
9306 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9307 	    }
9308 	  else
9309 	    {
9310 	      if (j == 0)
9311 		*vec_stmt = new_stmt;
9312 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9313 	    }
9314 	}
9315       if (slp_perm)
9316 	{
9317 	  unsigned n_perms;
9318 	  vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9319 					false, &n_perms);
9320 	}
9321       return true;
9322     }
9323 
9324   if (memory_access_type == VMAT_GATHER_SCATTER
9325       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9326     grouped_load = false;
9327 
9328   if (grouped_load)
9329     {
9330       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9331       group_size = DR_GROUP_SIZE (first_stmt_info);
9332       /* For SLP vectorization we directly vectorize a subchain
9333          without permutation.  */
9334       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9335 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9336       /* For BB vectorization always use the first stmt to base
9337 	 the data ref pointer on.  */
9338       if (bb_vinfo)
9339 	first_stmt_info_for_drptr
9340 	  = vect_find_first_scalar_stmt_in_slp (slp_node);
9341 
9342       /* Check if the chain of loads is already vectorized.  */
9343       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9344 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9345 	     ???  But we can only do so if there is exactly one
9346 	     as we have no way to get at the rest.  Leave the CSE
9347 	     opportunity alone.
9348 	     ???  With the group load eventually participating
9349 	     in multiple different permutations (having multiple
9350 	     slp nodes which refer to the same group) the CSE
9351 	     is even wrong code.  See PR56270.  */
9352 	  && !slp)
9353 	{
9354 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9355 	  return true;
9356 	}
9357       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9358       group_gap_adj = 0;
9359 
9360       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9361       if (slp)
9362 	{
9363 	  grouped_load = false;
9364 	  /* If an SLP permutation is from N elements to N elements,
9365 	     and if one vector holds a whole number of N, we can load
9366 	     the inputs to the permutation in the same way as an
9367 	     unpermuted sequence.  In other cases we need to load the
9368 	     whole group, not only the number of vector stmts the
9369 	     permutation result fits in.  */
9370 	  unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9371 	  if (slp_perm
9372 	      && (group_size != scalar_lanes
9373 		  || !multiple_p (nunits, group_size)))
9374 	    {
9375 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9376 		 variable VF; see vect_transform_slp_perm_load.  */
9377 	      unsigned int const_vf = vf.to_constant ();
9378 	      unsigned int const_nunits = nunits.to_constant ();
9379 	      vec_num = CEIL (group_size * const_vf, const_nunits);
9380 	      group_gap_adj = vf * group_size - nunits * vec_num;
9381 	    }
9382 	  else
9383 	    {
9384 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9385 	      group_gap_adj
9386 		= group_size - scalar_lanes;
9387 	    }
9388     	}
9389       else
9390 	vec_num = group_size;
9391 
9392       ref_type = get_group_alias_ptr_type (first_stmt_info);
9393     }
9394   else
9395     {
9396       first_stmt_info = stmt_info;
9397       first_dr_info = dr_info;
9398       group_size = vec_num = 1;
9399       group_gap_adj = 0;
9400       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9401       if (slp)
9402 	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9403     }
9404 
9405   gcc_assert (alignment_support_scheme);
9406   vec_loop_masks *loop_masks
9407     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9408        ? &LOOP_VINFO_MASKS (loop_vinfo)
9409        : NULL);
9410   vec_loop_lens *loop_lens
9411     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9412        ? &LOOP_VINFO_LENS (loop_vinfo)
9413        : NULL);
9414 
9415   /* Shouldn't go with length-based approach if fully masked.  */
9416   gcc_assert (!loop_lens || !loop_masks);
9417 
9418   /* Targets with store-lane instructions must not require explicit
9419      realignment.  vect_supportable_dr_alignment always returns either
9420      dr_aligned or dr_unaligned_supported for masked operations.  */
9421   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9422 	       && !mask
9423 	       && !loop_masks)
9424 	      || alignment_support_scheme == dr_aligned
9425 	      || alignment_support_scheme == dr_unaligned_supported);
9426 
9427   /* In case the vectorization factor (VF) is bigger than the number
9428      of elements that we can fit in a vectype (nunits), we have to generate
9429      more than one vector stmt - i.e - we need to "unroll" the
9430      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9431      from one copy of the vector stmt to the next, in the field
9432      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9433      stages to find the correct vector defs to be used when vectorizing
9434      stmts that use the defs of the current stmt.  The example below
9435      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9436      need to create 4 vectorized stmts):
9437 
9438      before vectorization:
9439                                 RELATED_STMT    VEC_STMT
9440         S1:     x = memref      -               -
9441         S2:     z = x + 1       -               -
9442 
9443      step 1: vectorize stmt S1:
9444         We first create the vector stmt VS1_0, and, as usual, record a
9445         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9446         Next, we create the vector stmt VS1_1, and record a pointer to
9447         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9448         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9449         stmts and pointers:
9450                                 RELATED_STMT    VEC_STMT
9451         VS1_0:  vx0 = memref0   VS1_1           -
9452         VS1_1:  vx1 = memref1   VS1_2           -
9453         VS1_2:  vx2 = memref2   VS1_3           -
9454         VS1_3:  vx3 = memref3   -               -
9455         S1:     x = load        -               VS1_0
9456         S2:     z = x + 1       -               -
9457   */
9458 
9459   /* In case of interleaving (non-unit grouped access):
9460 
9461      S1:  x2 = &base + 2
9462      S2:  x0 = &base
9463      S3:  x1 = &base + 1
9464      S4:  x3 = &base + 3
9465 
9466      Vectorized loads are created in the order of memory accesses
9467      starting from the access of the first stmt of the chain:
9468 
9469      VS1: vx0 = &base
9470      VS2: vx1 = &base + vec_size*1
9471      VS3: vx3 = &base + vec_size*2
9472      VS4: vx4 = &base + vec_size*3
9473 
9474      Then permutation statements are generated:
9475 
9476      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9477      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9478        ...
9479 
9480      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9481      (the order of the data-refs in the output of vect_permute_load_chain
9482      corresponds to the order of scalar stmts in the interleaving chain - see
9483      the documentation of vect_permute_load_chain()).
9484      The generation of permutation stmts and recording them in
9485      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9486 
9487      In case of both multiple types and interleaving, the vector loads and
9488      permutation stmts above are created for every copy.  The result vector
9489      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9490      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9491 
9492   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9493      on a target that supports unaligned accesses (dr_unaligned_supported)
9494      we generate the following code:
9495          p = initial_addr;
9496          indx = 0;
9497          loop {
9498 	   p = p + indx * vectype_size;
9499            vec_dest = *(p);
9500            indx = indx + 1;
9501          }
9502 
9503      Otherwise, the data reference is potentially unaligned on a target that
9504      does not support unaligned accesses (dr_explicit_realign_optimized) -
9505      then generate the following code, in which the data in each iteration is
9506      obtained by two vector loads, one from the previous iteration, and one
9507      from the current iteration:
9508          p1 = initial_addr;
9509          msq_init = *(floor(p1))
9510          p2 = initial_addr + VS - 1;
9511          realignment_token = call target_builtin;
9512          indx = 0;
9513          loop {
9514            p2 = p2 + indx * vectype_size
9515            lsq = *(floor(p2))
9516            vec_dest = realign_load (msq, lsq, realignment_token)
9517            indx = indx + 1;
9518            msq = lsq;
9519          }   */
9520 
9521   /* If the misalignment remains the same throughout the execution of the
9522      loop, we can create the init_addr and permutation mask at the loop
9523      preheader.  Otherwise, it needs to be created inside the loop.
9524      This can only occur when vectorizing memory accesses in the inner-loop
9525      nested within an outer-loop that is being vectorized.  */
9526 
9527   if (nested_in_vect_loop
9528       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9529 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
9530     {
9531       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9532       compute_in_loop = true;
9533     }
9534 
9535   bool diff_first_stmt_info
9536     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9537 
9538   tree offset = NULL_TREE;
9539   if ((alignment_support_scheme == dr_explicit_realign_optimized
9540        || alignment_support_scheme == dr_explicit_realign)
9541       && !compute_in_loop)
9542     {
9543       /* If we have different first_stmt_info, we can't set up realignment
9544 	 here, since we can't guarantee first_stmt_info DR has been
9545 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9546 	 distance from first_stmt_info DR instead as below.  */
9547       if (!diff_first_stmt_info)
9548 	msq = vect_setup_realignment (vinfo,
9549 				      first_stmt_info, gsi, &realignment_token,
9550 				      alignment_support_scheme, NULL_TREE,
9551 				      &at_loop);
9552       if (alignment_support_scheme == dr_explicit_realign_optimized)
9553 	{
9554 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9555 	  offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9556 			       size_one_node);
9557 	  gcc_assert (!first_stmt_info_for_drptr);
9558 	}
9559     }
9560   else
9561     at_loop = loop;
9562 
9563   if (!known_eq (poffset, 0))
9564     offset = (offset
9565 	      ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9566 	      : size_int (poffset));
9567 
9568   tree bump;
9569   tree vec_offset = NULL_TREE;
9570   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9571     {
9572       aggr_type = NULL_TREE;
9573       bump = NULL_TREE;
9574     }
9575   else if (memory_access_type == VMAT_GATHER_SCATTER)
9576     {
9577       aggr_type = elem_type;
9578       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9579 				       &bump, &vec_offset);
9580     }
9581   else
9582     {
9583       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9584 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9585       else
9586 	aggr_type = vectype;
9587       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9588 					  memory_access_type);
9589     }
9590 
9591   auto_vec<tree> vec_offsets;
9592   auto_vec<tree> vec_masks;
9593   if (mask)
9594     {
9595       if (slp_node)
9596 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9597 			   &vec_masks);
9598       else
9599 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9600 				       &vec_masks, mask_vectype);
9601     }
9602   tree vec_mask = NULL_TREE;
9603   poly_uint64 group_elt = 0;
9604   for (j = 0; j < ncopies; j++)
9605     {
9606       /* 1. Create the vector or array pointer update chain.  */
9607       if (j == 0)
9608 	{
9609 	  bool simd_lane_access_p
9610 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9611 	  if (simd_lane_access_p
9612 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9613 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9614 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9615 	      && integer_zerop (DR_INIT (first_dr_info->dr))
9616 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
9617 					get_alias_set (TREE_TYPE (ref_type)))
9618 	      && (alignment_support_scheme == dr_aligned
9619 		  || alignment_support_scheme == dr_unaligned_supported))
9620 	    {
9621 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9622 	      dataref_offset = build_int_cst (ref_type, 0);
9623 	    }
9624 	  else if (diff_first_stmt_info)
9625 	    {
9626 	      dataref_ptr
9627 		= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9628 					    aggr_type, at_loop, offset, &dummy,
9629 					    gsi, &ptr_incr, simd_lane_access_p,
9630 					    bump);
9631 	      /* Adjust the pointer by the difference to first_stmt.  */
9632 	      data_reference_p ptrdr
9633 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9634 	      tree diff
9635 		= fold_convert (sizetype,
9636 				size_binop (MINUS_EXPR,
9637 					    DR_INIT (first_dr_info->dr),
9638 					    DR_INIT (ptrdr)));
9639 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9640 					     stmt_info, diff);
9641 	      if (alignment_support_scheme == dr_explicit_realign)
9642 		{
9643 		  msq = vect_setup_realignment (vinfo,
9644 						first_stmt_info_for_drptr, gsi,
9645 						&realignment_token,
9646 						alignment_support_scheme,
9647 						dataref_ptr, &at_loop);
9648 		  gcc_assert (!compute_in_loop);
9649 		}
9650 	    }
9651 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9652 	    {
9653 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9654 					   slp_node, &gs_info, &dataref_ptr,
9655 					   &vec_offsets);
9656 	    }
9657 	  else
9658 	    dataref_ptr
9659 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9660 					  at_loop,
9661 					  offset, &dummy, gsi, &ptr_incr,
9662 					  simd_lane_access_p, bump);
9663 	  if (mask)
9664 	    vec_mask = vec_masks[0];
9665 	}
9666       else
9667 	{
9668 	  if (dataref_offset)
9669 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9670 					      bump);
9671 	  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9672 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9673 					   stmt_info, bump);
9674 	  if (mask)
9675 	    vec_mask = vec_masks[j];
9676 	}
9677 
9678       if (grouped_load || slp_perm)
9679 	dr_chain.create (vec_num);
9680 
9681       gimple *new_stmt = NULL;
9682       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9683 	{
9684 	  tree vec_array;
9685 
9686 	  vec_array = create_vector_array (vectype, vec_num);
9687 
9688 	  tree final_mask = NULL_TREE;
9689 	  if (loop_masks)
9690 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9691 					     vectype, j);
9692 	  if (vec_mask)
9693 	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9694 					   final_mask, vec_mask, gsi);
9695 
9696 	  gcall *call;
9697 	  if (final_mask)
9698 	    {
9699 	      /* Emit:
9700 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9701 		                                VEC_MASK).  */
9702 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9703 	      tree alias_ptr = build_int_cst (ref_type, align);
9704 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9705 						 dataref_ptr, alias_ptr,
9706 						 final_mask);
9707 	    }
9708 	  else
9709 	    {
9710 	      /* Emit:
9711 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9712 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9713 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9714 	    }
9715 	  gimple_call_set_lhs (call, vec_array);
9716 	  gimple_call_set_nothrow (call, true);
9717 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9718 	  new_stmt = call;
9719 
9720 	  /* Extract each vector into an SSA_NAME.  */
9721 	  for (i = 0; i < vec_num; i++)
9722 	    {
9723 	      new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9724 					    vec_array, i);
9725 	      dr_chain.quick_push (new_temp);
9726 	    }
9727 
9728 	  /* Record the mapping between SSA_NAMEs and statements.  */
9729 	  vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9730 
9731 	  /* Record that VEC_ARRAY is now dead.  */
9732 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9733 	}
9734       else
9735 	{
9736 	  for (i = 0; i < vec_num; i++)
9737 	    {
9738 	      tree final_mask = NULL_TREE;
9739 	      if (loop_masks
9740 		  && memory_access_type != VMAT_INVARIANT)
9741 		final_mask = vect_get_loop_mask (gsi, loop_masks,
9742 						 vec_num * ncopies,
9743 						 vectype, vec_num * j + i);
9744 	      if (vec_mask)
9745 		final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9746 					       final_mask, vec_mask, gsi);
9747 
9748 	      if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9749 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9750 					       gsi, stmt_info, bump);
9751 
9752 	      /* 2. Create the vector-load in the loop.  */
9753 	      switch (alignment_support_scheme)
9754 		{
9755 		case dr_aligned:
9756 		case dr_unaligned_supported:
9757 		  {
9758 		    unsigned int misalign;
9759 		    unsigned HOST_WIDE_INT align;
9760 
9761 		    if (memory_access_type == VMAT_GATHER_SCATTER
9762 			&& gs_info.ifn != IFN_LAST)
9763 		      {
9764 			if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9765 			  vec_offset = vec_offsets[vec_num * j + i];
9766 			tree zero = build_zero_cst (vectype);
9767 			tree scale = size_int (gs_info.scale);
9768 			gcall *call;
9769 			if (final_mask)
9770 			  call = gimple_build_call_internal
9771 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9772 			     vec_offset, scale, zero, final_mask);
9773 			else
9774 			  call = gimple_build_call_internal
9775 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
9776 			     vec_offset, scale, zero);
9777 			gimple_call_set_nothrow (call, true);
9778 			new_stmt = call;
9779 			data_ref = NULL_TREE;
9780 			break;
9781 		      }
9782 		    else if (memory_access_type == VMAT_GATHER_SCATTER)
9783 		      {
9784 			/* Emulated gather-scatter.  */
9785 			gcc_assert (!final_mask);
9786 			unsigned HOST_WIDE_INT const_nunits
9787 			  = nunits.to_constant ();
9788 			unsigned HOST_WIDE_INT const_offset_nunits
9789 			  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9790 			      .to_constant ();
9791 			vec<constructor_elt, va_gc> *ctor_elts;
9792 			vec_alloc (ctor_elts, const_nunits);
9793 			gimple_seq stmts = NULL;
9794 			/* We support offset vectors with more elements
9795 			   than the data vector for now.  */
9796 			unsigned HOST_WIDE_INT factor
9797 			  = const_offset_nunits / const_nunits;
9798 			vec_offset = vec_offsets[j / factor];
9799 			unsigned elt_offset = (j % factor) * const_nunits;
9800 			tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9801 			tree scale = size_int (gs_info.scale);
9802 			align
9803 			  = get_object_alignment (DR_REF (first_dr_info->dr));
9804 			tree ltype = build_aligned_type (TREE_TYPE (vectype),
9805 							 align);
9806 			for (unsigned k = 0; k < const_nunits; ++k)
9807 			  {
9808 			    tree boff = size_binop (MULT_EXPR,
9809 						    TYPE_SIZE (idx_type),
9810 						    bitsize_int
9811 						      (k + elt_offset));
9812 			    tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9813 						     idx_type, vec_offset,
9814 						     TYPE_SIZE (idx_type),
9815 						     boff);
9816 			    idx = gimple_convert (&stmts, sizetype, idx);
9817 			    idx = gimple_build (&stmts, MULT_EXPR,
9818 						sizetype, idx, scale);
9819 			    tree ptr = gimple_build (&stmts, PLUS_EXPR,
9820 						     TREE_TYPE (dataref_ptr),
9821 						     dataref_ptr, idx);
9822 			    ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9823 			    tree elt = make_ssa_name (TREE_TYPE (vectype));
9824 			    tree ref = build2 (MEM_REF, ltype, ptr,
9825 					       build_int_cst (ref_type, 0));
9826 			    new_stmt = gimple_build_assign (elt, ref);
9827 			    gimple_seq_add_stmt (&stmts, new_stmt);
9828 			    CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9829 			  }
9830 			gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9831 			new_stmt = gimple_build_assign (NULL_TREE,
9832 							build_constructor
9833 							  (vectype, ctor_elts));
9834 			data_ref = NULL_TREE;
9835 			break;
9836 		      }
9837 
9838 		    align =
9839 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9840 		    if (alignment_support_scheme == dr_aligned)
9841 		      misalign = 0;
9842 		    else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9843 		      {
9844 			align = dr_alignment
9845 			  (vect_dr_behavior (vinfo, first_dr_info));
9846 			misalign = 0;
9847 		      }
9848 		    else
9849 		      misalign = misalignment;
9850 		    if (dataref_offset == NULL_TREE
9851 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
9852 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9853 					      align, misalign);
9854 		    align = least_bit_hwi (misalign | align);
9855 
9856 		    if (final_mask)
9857 		      {
9858 			tree ptr = build_int_cst (ref_type,
9859 						  align * BITS_PER_UNIT);
9860 			gcall *call
9861 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9862 							dataref_ptr, ptr,
9863 							final_mask);
9864 			gimple_call_set_nothrow (call, true);
9865 			new_stmt = call;
9866 			data_ref = NULL_TREE;
9867 		      }
9868 		    else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9869 		      {
9870 			tree final_len
9871 			  = vect_get_loop_len (loop_vinfo, loop_lens,
9872 					       vec_num * ncopies,
9873 					       vec_num * j + i);
9874 			tree ptr = build_int_cst (ref_type,
9875 						  align * BITS_PER_UNIT);
9876 
9877 			machine_mode vmode = TYPE_MODE (vectype);
9878 			opt_machine_mode new_ovmode
9879 			  = get_len_load_store_mode (vmode, true);
9880 			machine_mode new_vmode = new_ovmode.require ();
9881 			tree qi_type = unsigned_intQI_type_node;
9882 
9883 			signed char biasval =
9884 			  LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9885 
9886 			tree bias = build_int_cst (intQI_type_node, biasval);
9887 
9888 			gcall *call
9889 			  = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9890 							dataref_ptr, ptr,
9891 							final_len, bias);
9892 			gimple_call_set_nothrow (call, true);
9893 			new_stmt = call;
9894 			data_ref = NULL_TREE;
9895 
9896 			/* Need conversion if it's wrapped with VnQI.  */
9897 			if (vmode != new_vmode)
9898 			  {
9899 			    tree new_vtype
9900 			      = build_vector_type_for_mode (qi_type, new_vmode);
9901 			    tree var = vect_get_new_ssa_name (new_vtype,
9902 							      vect_simple_var);
9903 			    gimple_set_lhs (call, var);
9904 			    vect_finish_stmt_generation (vinfo, stmt_info, call,
9905 							 gsi);
9906 			    tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9907 			    new_stmt
9908 			      = gimple_build_assign (vec_dest,
9909 						     VIEW_CONVERT_EXPR, op);
9910 			  }
9911 		      }
9912 		    else
9913 		      {
9914 			tree ltype = vectype;
9915 			tree new_vtype = NULL_TREE;
9916 			unsigned HOST_WIDE_INT gap
9917 			  = DR_GROUP_GAP (first_stmt_info);
9918 			unsigned int vect_align
9919 			  = vect_known_alignment_in_bytes (first_dr_info,
9920 							   vectype);
9921 			unsigned int scalar_dr_size
9922 			  = vect_get_scalar_dr_size (first_dr_info);
9923 			/* If there's no peeling for gaps but we have a gap
9924 			   with slp loads then load the lower half of the
9925 			   vector only.  See get_group_load_store_type for
9926 			   when we apply this optimization.  */
9927 			if (slp
9928 			    && loop_vinfo
9929 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9930 			    && gap != 0
9931 			    && known_eq (nunits, (group_size - gap) * 2)
9932 			    && known_eq (nunits, group_size)
9933 			    && gap >= (vect_align / scalar_dr_size))
9934 			  {
9935 			    tree half_vtype;
9936 			    new_vtype
9937 			      = vector_vector_composition_type (vectype, 2,
9938 								&half_vtype);
9939 			    if (new_vtype != NULL_TREE)
9940 			      ltype = half_vtype;
9941 			  }
9942 			tree offset
9943 			  = (dataref_offset ? dataref_offset
9944 					    : build_int_cst (ref_type, 0));
9945 			if (ltype != vectype
9946 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9947 			  {
9948 			    unsigned HOST_WIDE_INT gap_offset
9949 			      = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9950 			    tree gapcst = build_int_cst (ref_type, gap_offset);
9951 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
9952 			  }
9953 			data_ref
9954 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9955 			if (alignment_support_scheme == dr_aligned)
9956 			  ;
9957 			else
9958 			  TREE_TYPE (data_ref)
9959 			    = build_aligned_type (TREE_TYPE (data_ref),
9960 						  align * BITS_PER_UNIT);
9961 			if (ltype != vectype)
9962 			  {
9963 			    vect_copy_ref_info (data_ref,
9964 						DR_REF (first_dr_info->dr));
9965 			    tree tem = make_ssa_name (ltype);
9966 			    new_stmt = gimple_build_assign (tem, data_ref);
9967 			    vect_finish_stmt_generation (vinfo, stmt_info,
9968 							 new_stmt, gsi);
9969 			    data_ref = NULL;
9970 			    vec<constructor_elt, va_gc> *v;
9971 			    vec_alloc (v, 2);
9972 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9973 			      {
9974 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9975 							build_zero_cst (ltype));
9976 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9977 			      }
9978 			    else
9979 			      {
9980 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9981 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9982 							build_zero_cst (ltype));
9983 			      }
9984 			    gcc_assert (new_vtype != NULL_TREE);
9985 			    if (new_vtype == vectype)
9986 			      new_stmt = gimple_build_assign (
9987 				vec_dest, build_constructor (vectype, v));
9988 			    else
9989 			      {
9990 				tree new_vname = make_ssa_name (new_vtype);
9991 				new_stmt = gimple_build_assign (
9992 				  new_vname, build_constructor (new_vtype, v));
9993 				vect_finish_stmt_generation (vinfo, stmt_info,
9994 							     new_stmt, gsi);
9995 				new_stmt = gimple_build_assign (
9996 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9997 						    new_vname));
9998 			      }
9999 			  }
10000 		      }
10001 		    break;
10002 		  }
10003 		case dr_explicit_realign:
10004 		  {
10005 		    tree ptr, bump;
10006 
10007 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10008 
10009 		    if (compute_in_loop)
10010 		      msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10011 						    &realignment_token,
10012 						    dr_explicit_realign,
10013 						    dataref_ptr, NULL);
10014 
10015 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
10016 		      ptr = copy_ssa_name (dataref_ptr);
10017 		    else
10018 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10019 		    // For explicit realign the target alignment should be
10020 		    // known at compile time.
10021 		    unsigned HOST_WIDE_INT align =
10022 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10023 		    new_stmt = gimple_build_assign
10024 				 (ptr, BIT_AND_EXPR, dataref_ptr,
10025 				  build_int_cst
10026 				  (TREE_TYPE (dataref_ptr),
10027 				   -(HOST_WIDE_INT) align));
10028 		    vect_finish_stmt_generation (vinfo, stmt_info,
10029 						 new_stmt, gsi);
10030 		    data_ref
10031 		      = build2 (MEM_REF, vectype, ptr,
10032 				build_int_cst (ref_type, 0));
10033 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10034 		    vec_dest = vect_create_destination_var (scalar_dest,
10035 							    vectype);
10036 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
10037 		    new_temp = make_ssa_name (vec_dest, new_stmt);
10038 		    gimple_assign_set_lhs (new_stmt, new_temp);
10039 		    gimple_move_vops (new_stmt, stmt_info->stmt);
10040 		    vect_finish_stmt_generation (vinfo, stmt_info,
10041 						 new_stmt, gsi);
10042 		    msq = new_temp;
10043 
10044 		    bump = size_binop (MULT_EXPR, vs,
10045 				       TYPE_SIZE_UNIT (elem_type));
10046 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
10047 		    ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10048 					   stmt_info, bump);
10049 		    new_stmt = gimple_build_assign
10050 				 (NULL_TREE, BIT_AND_EXPR, ptr,
10051 				  build_int_cst
10052 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10053 		    ptr = copy_ssa_name (ptr, new_stmt);
10054 		    gimple_assign_set_lhs (new_stmt, ptr);
10055 		    vect_finish_stmt_generation (vinfo, stmt_info,
10056 						 new_stmt, gsi);
10057 		    data_ref
10058 		      = build2 (MEM_REF, vectype, ptr,
10059 				build_int_cst (ref_type, 0));
10060 		    break;
10061 		  }
10062 		case dr_explicit_realign_optimized:
10063 		  {
10064 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
10065 		      new_temp = copy_ssa_name (dataref_ptr);
10066 		    else
10067 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10068 		    // We should only be doing this if we know the target
10069 		    // alignment at compile time.
10070 		    unsigned HOST_WIDE_INT align =
10071 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10072 		    new_stmt = gimple_build_assign
10073 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
10074 		       build_int_cst (TREE_TYPE (dataref_ptr),
10075 				     -(HOST_WIDE_INT) align));
10076 		    vect_finish_stmt_generation (vinfo, stmt_info,
10077 						 new_stmt, gsi);
10078 		    data_ref
10079 		      = build2 (MEM_REF, vectype, new_temp,
10080 				build_int_cst (ref_type, 0));
10081 		    break;
10082 		  }
10083 		default:
10084 		  gcc_unreachable ();
10085 		}
10086 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
10087 	      /* DATA_REF is null if we've already built the statement.  */
10088 	      if (data_ref)
10089 		{
10090 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10091 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
10092 		}
10093 	      new_temp = make_ssa_name (vec_dest, new_stmt);
10094 	      gimple_set_lhs (new_stmt, new_temp);
10095 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10096 
10097 	      /* 3. Handle explicit realignment if necessary/supported.
10098 		 Create in loop:
10099 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
10100 	      if (alignment_support_scheme == dr_explicit_realign_optimized
10101 		  || alignment_support_scheme == dr_explicit_realign)
10102 		{
10103 		  lsq = gimple_assign_lhs (new_stmt);
10104 		  if (!realignment_token)
10105 		    realignment_token = dataref_ptr;
10106 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
10107 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10108 						  msq, lsq, realignment_token);
10109 		  new_temp = make_ssa_name (vec_dest, new_stmt);
10110 		  gimple_assign_set_lhs (new_stmt, new_temp);
10111 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10112 
10113 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
10114 		    {
10115 		      gcc_assert (phi);
10116 		      if (i == vec_num - 1 && j == ncopies - 1)
10117 			add_phi_arg (phi, lsq,
10118 				     loop_latch_edge (containing_loop),
10119 				     UNKNOWN_LOCATION);
10120 		      msq = lsq;
10121 		    }
10122 		}
10123 
10124 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10125 		{
10126 		  tree perm_mask = perm_mask_for_reverse (vectype);
10127 		  new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10128 						   perm_mask, stmt_info, gsi);
10129 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
10130 		}
10131 
10132 	      /* Collect vector loads and later create their permutation in
10133 		 vect_transform_grouped_load ().  */
10134 	      if (grouped_load || slp_perm)
10135 		dr_chain.quick_push (new_temp);
10136 
10137 	      /* Store vector loads in the corresponding SLP_NODE.  */
10138 	      if (slp && !slp_perm)
10139 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10140 
10141 	      /* With SLP permutation we load the gaps as well, without
10142 	         we need to skip the gaps after we manage to fully load
10143 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
10144 	      group_elt += nunits;
10145 	      if (maybe_ne (group_gap_adj, 0U)
10146 		  && !slp_perm
10147 		  && known_eq (group_elt, group_size - group_gap_adj))
10148 		{
10149 		  poly_wide_int bump_val
10150 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10151 		       * group_gap_adj);
10152 		  if (tree_int_cst_sgn
10153 			(vect_dr_behavior (vinfo, dr_info)->step) == -1)
10154 		    bump_val = -bump_val;
10155 		  tree bump = wide_int_to_tree (sizetype, bump_val);
10156 		  dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10157 						 gsi, stmt_info, bump);
10158 		  group_elt = 0;
10159 		}
10160 	    }
10161 	  /* Bump the vector pointer to account for a gap or for excess
10162 	     elements loaded for a permuted SLP load.  */
10163 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10164 	    {
10165 	      poly_wide_int bump_val
10166 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10167 		   * group_gap_adj);
10168 	      if (tree_int_cst_sgn
10169 		    (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10170 		bump_val = -bump_val;
10171 	      tree bump = wide_int_to_tree (sizetype, bump_val);
10172 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10173 					     stmt_info, bump);
10174 	    }
10175 	}
10176 
10177       if (slp && !slp_perm)
10178 	continue;
10179 
10180       if (slp_perm)
10181         {
10182 	  unsigned n_perms;
10183 	  /* For SLP we know we've seen all possible uses of dr_chain so
10184 	     direct vect_transform_slp_perm_load to DCE the unused parts.
10185 	     ???  This is a hack to prevent compile-time issues as seen
10186 	     in PR101120 and friends.  */
10187 	  bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10188 						  gsi, vf, false, &n_perms,
10189 						  nullptr, true);
10190 	  gcc_assert (ok);
10191         }
10192       else
10193         {
10194           if (grouped_load)
10195   	    {
10196 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
10197 		vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10198 					     group_size, gsi);
10199 	      *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10200 	    }
10201           else
10202 	    {
10203 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10204 	    }
10205         }
10206       dr_chain.release ();
10207     }
10208   if (!slp)
10209     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10210 
10211   return true;
10212 }
10213 
10214 /* Function vect_is_simple_cond.
10215 
10216    Input:
10217    LOOP - the loop that is being vectorized.
10218    COND - Condition that is checked for simple use.
10219 
10220    Output:
10221    *COMP_VECTYPE - the vector type for the comparison.
10222    *DTS - The def types for the arguments of the comparison
10223 
10224    Returns whether a COND can be vectorized.  Checks whether
10225    condition operands are supportable using vec_is_simple_use.  */
10226 
10227 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)10228 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10229 		     slp_tree slp_node, tree *comp_vectype,
10230 		     enum vect_def_type *dts, tree vectype)
10231 {
10232   tree lhs, rhs;
10233   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10234   slp_tree slp_op;
10235 
10236   /* Mask case.  */
10237   if (TREE_CODE (cond) == SSA_NAME
10238       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10239     {
10240       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10241 			       &slp_op, &dts[0], comp_vectype)
10242 	  || !*comp_vectype
10243 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10244 	return false;
10245       return true;
10246     }
10247 
10248   if (!COMPARISON_CLASS_P (cond))
10249     return false;
10250 
10251   lhs = TREE_OPERAND (cond, 0);
10252   rhs = TREE_OPERAND (cond, 1);
10253 
10254   if (TREE_CODE (lhs) == SSA_NAME)
10255     {
10256       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10257 			       &lhs, &slp_op, &dts[0], &vectype1))
10258 	return false;
10259     }
10260   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10261 	   || TREE_CODE (lhs) == FIXED_CST)
10262     dts[0] = vect_constant_def;
10263   else
10264     return false;
10265 
10266   if (TREE_CODE (rhs) == SSA_NAME)
10267     {
10268       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10269 			       &rhs, &slp_op, &dts[1], &vectype2))
10270 	return false;
10271     }
10272   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10273 	   || TREE_CODE (rhs) == FIXED_CST)
10274     dts[1] = vect_constant_def;
10275   else
10276     return false;
10277 
10278   if (vectype1 && vectype2
10279       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10280 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10281     return false;
10282 
10283   *comp_vectype = vectype1 ? vectype1 : vectype2;
10284   /* Invariant comparison.  */
10285   if (! *comp_vectype)
10286     {
10287       tree scalar_type = TREE_TYPE (lhs);
10288       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10289 	*comp_vectype = truth_type_for (vectype);
10290       else
10291 	{
10292 	  /* If we can widen the comparison to match vectype do so.  */
10293 	  if (INTEGRAL_TYPE_P (scalar_type)
10294 	      && !slp_node
10295 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10296 				  TYPE_SIZE (TREE_TYPE (vectype))))
10297 	    scalar_type = build_nonstandard_integer_type
10298 	      (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10299 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10300 						       slp_node);
10301 	}
10302     }
10303 
10304   return true;
10305 }
10306 
10307 /* vectorizable_condition.
10308 
10309    Check if STMT_INFO is conditional modify expression that can be vectorized.
10310    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10311    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
10312    at GSI.
10313 
10314    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10315 
10316    Return true if STMT_INFO is vectorizable in this way.  */
10317 
10318 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10319 vectorizable_condition (vec_info *vinfo,
10320 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10321 			gimple **vec_stmt,
10322 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10323 {
10324   tree scalar_dest = NULL_TREE;
10325   tree vec_dest = NULL_TREE;
10326   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10327   tree then_clause, else_clause;
10328   tree comp_vectype = NULL_TREE;
10329   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10330   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10331   tree vec_compare;
10332   tree new_temp;
10333   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10334   enum vect_def_type dts[4]
10335     = {vect_unknown_def_type, vect_unknown_def_type,
10336        vect_unknown_def_type, vect_unknown_def_type};
10337   int ndts = 4;
10338   int ncopies;
10339   int vec_num;
10340   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10341   int i;
10342   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10343   vec<tree> vec_oprnds0 = vNULL;
10344   vec<tree> vec_oprnds1 = vNULL;
10345   vec<tree> vec_oprnds2 = vNULL;
10346   vec<tree> vec_oprnds3 = vNULL;
10347   tree vec_cmp_type;
10348   bool masked = false;
10349 
10350   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10351     return false;
10352 
10353   /* Is vectorizable conditional operation?  */
10354   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10355   if (!stmt)
10356     return false;
10357 
10358   code = gimple_assign_rhs_code (stmt);
10359   if (code != COND_EXPR)
10360     return false;
10361 
10362   stmt_vec_info reduc_info = NULL;
10363   int reduc_index = -1;
10364   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10365   bool for_reduction
10366     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10367   if (for_reduction)
10368     {
10369       if (slp_node)
10370 	return false;
10371       reduc_info = info_for_reduction (vinfo, stmt_info);
10372       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10373       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10374       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10375 		  || reduc_index != -1);
10376     }
10377   else
10378     {
10379       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10380 	return false;
10381     }
10382 
10383   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10384   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10385 
10386   if (slp_node)
10387     {
10388       ncopies = 1;
10389       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10390     }
10391   else
10392     {
10393       ncopies = vect_get_num_copies (loop_vinfo, vectype);
10394       vec_num = 1;
10395     }
10396 
10397   gcc_assert (ncopies >= 1);
10398   if (for_reduction && ncopies > 1)
10399     return false; /* FORNOW */
10400 
10401   cond_expr = gimple_assign_rhs1 (stmt);
10402 
10403   if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10404 			    &comp_vectype, &dts[0], vectype)
10405       || !comp_vectype)
10406     return false;
10407 
10408   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10409   slp_tree then_slp_node, else_slp_node;
10410   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10411 			   &then_clause, &then_slp_node, &dts[2], &vectype1))
10412     return false;
10413   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10414 			   &else_clause, &else_slp_node, &dts[3], &vectype2))
10415     return false;
10416 
10417   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10418     return false;
10419 
10420   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10421     return false;
10422 
10423   masked = !COMPARISON_CLASS_P (cond_expr);
10424   vec_cmp_type = truth_type_for (comp_vectype);
10425 
10426   if (vec_cmp_type == NULL_TREE)
10427     return false;
10428 
10429   cond_code = TREE_CODE (cond_expr);
10430   if (!masked)
10431     {
10432       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10433       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10434     }
10435 
10436   /* For conditional reductions, the "then" value needs to be the candidate
10437      value calculated by this iteration while the "else" value needs to be
10438      the result carried over from previous iterations.  If the COND_EXPR
10439      is the other way around, we need to swap it.  */
10440   bool must_invert_cmp_result = false;
10441   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10442     {
10443       if (masked)
10444 	must_invert_cmp_result = true;
10445       else
10446 	{
10447 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10448 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10449 	  if (new_code == ERROR_MARK)
10450 	    must_invert_cmp_result = true;
10451 	  else
10452 	    {
10453 	      cond_code = new_code;
10454 	      /* Make sure we don't accidentally use the old condition.  */
10455 	      cond_expr = NULL_TREE;
10456 	    }
10457 	}
10458       std::swap (then_clause, else_clause);
10459     }
10460 
10461   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10462     {
10463       /* Boolean values may have another representation in vectors
10464 	 and therefore we prefer bit operations over comparison for
10465 	 them (which also works for scalar masks).  We store opcodes
10466 	 to use in bitop1 and bitop2.  Statement is vectorized as
10467 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10468 	 depending on bitop1 and bitop2 arity.  */
10469       switch (cond_code)
10470 	{
10471 	case GT_EXPR:
10472 	  bitop1 = BIT_NOT_EXPR;
10473 	  bitop2 = BIT_AND_EXPR;
10474 	  break;
10475 	case GE_EXPR:
10476 	  bitop1 = BIT_NOT_EXPR;
10477 	  bitop2 = BIT_IOR_EXPR;
10478 	  break;
10479 	case LT_EXPR:
10480 	  bitop1 = BIT_NOT_EXPR;
10481 	  bitop2 = BIT_AND_EXPR;
10482 	  std::swap (cond_expr0, cond_expr1);
10483 	  break;
10484 	case LE_EXPR:
10485 	  bitop1 = BIT_NOT_EXPR;
10486 	  bitop2 = BIT_IOR_EXPR;
10487 	  std::swap (cond_expr0, cond_expr1);
10488 	  break;
10489 	case NE_EXPR:
10490 	  bitop1 = BIT_XOR_EXPR;
10491 	  break;
10492 	case EQ_EXPR:
10493 	  bitop1 = BIT_XOR_EXPR;
10494 	  bitop2 = BIT_NOT_EXPR;
10495 	  break;
10496 	default:
10497 	  return false;
10498 	}
10499       cond_code = SSA_NAME;
10500     }
10501 
10502   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10503       && reduction_type == EXTRACT_LAST_REDUCTION
10504       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10505     {
10506       if (dump_enabled_p ())
10507 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10508 			 "reduction comparison operation not supported.\n");
10509       return false;
10510     }
10511 
10512   if (!vec_stmt)
10513     {
10514       if (bitop1 != NOP_EXPR)
10515 	{
10516 	  machine_mode mode = TYPE_MODE (comp_vectype);
10517 	  optab optab;
10518 
10519 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10520 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10521 	    return false;
10522 
10523 	  if (bitop2 != NOP_EXPR)
10524 	    {
10525 	      optab = optab_for_tree_code (bitop2, comp_vectype,
10526 					   optab_default);
10527 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10528 		return false;
10529 	    }
10530 	}
10531 
10532       vect_cost_for_stmt kind = vector_stmt;
10533       if (reduction_type == EXTRACT_LAST_REDUCTION)
10534 	/* Count one reduction-like operation per vector.  */
10535 	kind = vec_to_scalar;
10536       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10537 	return false;
10538 
10539       if (slp_node
10540 	  && (!vect_maybe_update_slp_op_vectype
10541 		 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10542 	      || (op_adjust == 1
10543 		  && !vect_maybe_update_slp_op_vectype
10544 			(SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10545 	      || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10546 	      || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10547 	{
10548 	  if (dump_enabled_p ())
10549 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10550 			     "incompatible vector types for invariants\n");
10551 	  return false;
10552 	}
10553 
10554       if (loop_vinfo && for_reduction
10555 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10556 	{
10557 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10558 	    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10559 				   ncopies * vec_num, vectype, NULL);
10560 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
10561 	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10562 	    {
10563 	      if (dump_enabled_p ())
10564 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10565 				 "conditional reduction prevents the use"
10566 				 " of partial vectors.\n");
10567 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10568 	    }
10569 	}
10570 
10571       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10572       vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10573 			      cost_vec, kind);
10574       return true;
10575     }
10576 
10577   /* Transform.  */
10578 
10579   /* Handle def.  */
10580   scalar_dest = gimple_assign_lhs (stmt);
10581   if (reduction_type != EXTRACT_LAST_REDUCTION)
10582     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10583 
10584   bool swap_cond_operands = false;
10585 
10586   /* See whether another part of the vectorized code applies a loop
10587      mask to the condition, or to its inverse.  */
10588 
10589   vec_loop_masks *masks = NULL;
10590   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10591     {
10592       if (reduction_type == EXTRACT_LAST_REDUCTION)
10593 	masks = &LOOP_VINFO_MASKS (loop_vinfo);
10594       else
10595 	{
10596 	  scalar_cond_masked_key cond (cond_expr, ncopies);
10597 	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10598 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
10599 	  else
10600 	    {
10601 	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10602 	      tree_code orig_code = cond.code;
10603 	      cond.code = invert_tree_comparison (cond.code, honor_nans);
10604 	      if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10605 		{
10606 		  masks = &LOOP_VINFO_MASKS (loop_vinfo);
10607 		  cond_code = cond.code;
10608 		  swap_cond_operands = true;
10609 		}
10610 	      else
10611 		{
10612 		  /* Try the inverse of the current mask.  We check if the
10613 		     inverse mask is live and if so we generate a negate of
10614 		     the current mask such that we still honor NaNs.  */
10615 		  cond.inverted_p = true;
10616 		  cond.code = orig_code;
10617 		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10618 		    {
10619 		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
10620 		      cond_code = cond.code;
10621 		      swap_cond_operands = true;
10622 		      must_invert_cmp_result = true;
10623 		    }
10624 		}
10625 	    }
10626 	}
10627     }
10628 
10629   /* Handle cond expr.  */
10630   if (masked)
10631     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10632 		       cond_expr, &vec_oprnds0, comp_vectype,
10633 		       then_clause, &vec_oprnds2, vectype,
10634 		       reduction_type != EXTRACT_LAST_REDUCTION
10635 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10636   else
10637     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10638 		       cond_expr0, &vec_oprnds0, comp_vectype,
10639 		       cond_expr1, &vec_oprnds1, comp_vectype,
10640 		       then_clause, &vec_oprnds2, vectype,
10641 		       reduction_type != EXTRACT_LAST_REDUCTION
10642 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10643 
10644   /* Arguments are ready.  Create the new vector stmt.  */
10645   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10646     {
10647       vec_then_clause = vec_oprnds2[i];
10648       if (reduction_type != EXTRACT_LAST_REDUCTION)
10649 	vec_else_clause = vec_oprnds3[i];
10650 
10651       if (swap_cond_operands)
10652 	std::swap (vec_then_clause, vec_else_clause);
10653 
10654       if (masked)
10655 	vec_compare = vec_cond_lhs;
10656       else
10657 	{
10658 	  vec_cond_rhs = vec_oprnds1[i];
10659 	  if (bitop1 == NOP_EXPR)
10660 	    {
10661 	      gimple_seq stmts = NULL;
10662 	      vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10663 					   vec_cond_lhs, vec_cond_rhs);
10664 	      gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10665 	    }
10666 	  else
10667 	    {
10668 	      new_temp = make_ssa_name (vec_cmp_type);
10669 	      gassign *new_stmt;
10670 	      if (bitop1 == BIT_NOT_EXPR)
10671 		new_stmt = gimple_build_assign (new_temp, bitop1,
10672 						vec_cond_rhs);
10673 	      else
10674 		new_stmt
10675 		  = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10676 					 vec_cond_rhs);
10677 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10678 	      if (bitop2 == NOP_EXPR)
10679 		vec_compare = new_temp;
10680 	      else if (bitop2 == BIT_NOT_EXPR)
10681 		{
10682 		  /* Instead of doing ~x ? y : z do x ? z : y.  */
10683 		  vec_compare = new_temp;
10684 		  std::swap (vec_then_clause, vec_else_clause);
10685 		}
10686 	      else
10687 		{
10688 		  vec_compare = make_ssa_name (vec_cmp_type);
10689 		  new_stmt
10690 		    = gimple_build_assign (vec_compare, bitop2,
10691 					   vec_cond_lhs, new_temp);
10692 		  vect_finish_stmt_generation (vinfo, stmt_info,
10693 					       new_stmt, gsi);
10694 		}
10695 	    }
10696 	}
10697 
10698       /* If we decided to apply a loop mask to the result of the vector
10699 	 comparison, AND the comparison with the mask now.  Later passes
10700 	 should then be able to reuse the AND results between mulitple
10701 	 vector statements.
10702 
10703 	 For example:
10704 	 for (int i = 0; i < 100; ++i)
10705 	 x[i] = y[i] ? z[i] : 10;
10706 
10707 	 results in following optimized GIMPLE:
10708 
10709 	 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10710 	 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10711 	 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10712 	 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10713 	 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10714 	 vect_iftmp.11_47, { 10, ... }>;
10715 
10716 	 instead of using a masked and unmasked forms of
10717 	 vec != { 0, ... } (masked in the MASK_LOAD,
10718 	 unmasked in the VEC_COND_EXPR).  */
10719 
10720       /* Force vec_compare to be an SSA_NAME rather than a comparison,
10721 	 in cases where that's necessary.  */
10722 
10723       if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10724 	{
10725 	  if (!is_gimple_val (vec_compare))
10726 	    {
10727 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10728 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10729 						       vec_compare);
10730 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10731 	      vec_compare = vec_compare_name;
10732 	    }
10733 
10734 	  if (must_invert_cmp_result)
10735 	    {
10736 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10737 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10738 						       BIT_NOT_EXPR,
10739 						       vec_compare);
10740 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10741 	      vec_compare = vec_compare_name;
10742 	    }
10743 
10744 	  if (masks)
10745 	    {
10746 	      tree loop_mask
10747 		= vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10748 				      vectype, i);
10749 	      tree tmp2 = make_ssa_name (vec_cmp_type);
10750 	      gassign *g
10751 		= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10752 				       loop_mask);
10753 	      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10754 	      vec_compare = tmp2;
10755 	    }
10756 	}
10757 
10758       gimple *new_stmt;
10759       if (reduction_type == EXTRACT_LAST_REDUCTION)
10760 	{
10761 	  gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10762 	  tree lhs = gimple_get_lhs (old_stmt);
10763 	  new_stmt = gimple_build_call_internal
10764 	      (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10765 	       vec_then_clause);
10766 	  gimple_call_set_lhs (new_stmt, lhs);
10767 	  SSA_NAME_DEF_STMT (lhs) = new_stmt;
10768 	  if (old_stmt == gsi_stmt (*gsi))
10769 	    vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10770 	  else
10771 	    {
10772 	      /* In this case we're moving the definition to later in the
10773 		 block.  That doesn't matter because the only uses of the
10774 		 lhs are in phi statements.  */
10775 	      gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10776 	      gsi_remove (&old_gsi, true);
10777 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10778 	    }
10779 	}
10780       else
10781 	{
10782 	  new_temp = make_ssa_name (vec_dest);
10783 	  new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10784 					  vec_then_clause, vec_else_clause);
10785 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10786 	}
10787       if (slp_node)
10788 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10789       else
10790 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10791     }
10792 
10793   if (!slp_node)
10794     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10795 
10796   vec_oprnds0.release ();
10797   vec_oprnds1.release ();
10798   vec_oprnds2.release ();
10799   vec_oprnds3.release ();
10800 
10801   return true;
10802 }
10803 
10804 /* vectorizable_comparison.
10805 
10806    Check if STMT_INFO is comparison expression that can be vectorized.
10807    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10808    comparison, put it in VEC_STMT, and insert it at GSI.
10809 
10810    Return true if STMT_INFO is vectorizable in this way.  */
10811 
10812 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10813 vectorizable_comparison (vec_info *vinfo,
10814 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10815 			 gimple **vec_stmt,
10816 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10817 {
10818   tree lhs, rhs1, rhs2;
10819   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10820   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10821   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10822   tree new_temp;
10823   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10824   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10825   int ndts = 2;
10826   poly_uint64 nunits;
10827   int ncopies;
10828   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10829   int i;
10830   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10831   vec<tree> vec_oprnds0 = vNULL;
10832   vec<tree> vec_oprnds1 = vNULL;
10833   tree mask_type;
10834   tree mask;
10835 
10836   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10837     return false;
10838 
10839   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10840     return false;
10841 
10842   mask_type = vectype;
10843   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10844 
10845   if (slp_node)
10846     ncopies = 1;
10847   else
10848     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10849 
10850   gcc_assert (ncopies >= 1);
10851   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10852     return false;
10853 
10854   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10855   if (!stmt)
10856     return false;
10857 
10858   code = gimple_assign_rhs_code (stmt);
10859 
10860   if (TREE_CODE_CLASS (code) != tcc_comparison)
10861     return false;
10862 
10863   slp_tree slp_rhs1, slp_rhs2;
10864   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10865 			   0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10866     return false;
10867 
10868   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10869 			   1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10870     return false;
10871 
10872   if (vectype1 && vectype2
10873       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10874 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10875     return false;
10876 
10877   vectype = vectype1 ? vectype1 : vectype2;
10878 
10879   /* Invariant comparison.  */
10880   if (!vectype)
10881     {
10882       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10883 	vectype = mask_type;
10884       else
10885 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10886 					       slp_node);
10887       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10888 	return false;
10889     }
10890   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10891     return false;
10892 
10893   /* Can't compare mask and non-mask types.  */
10894   if (vectype1 && vectype2
10895       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10896     return false;
10897 
10898   /* Boolean values may have another representation in vectors
10899      and therefore we prefer bit operations over comparison for
10900      them (which also works for scalar masks).  We store opcodes
10901      to use in bitop1 and bitop2.  Statement is vectorized as
10902        BITOP2 (rhs1 BITOP1 rhs2) or
10903        rhs1 BITOP2 (BITOP1 rhs2)
10904      depending on bitop1 and bitop2 arity.  */
10905   bool swap_p = false;
10906   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10907     {
10908       if (code == GT_EXPR)
10909 	{
10910 	  bitop1 = BIT_NOT_EXPR;
10911 	  bitop2 = BIT_AND_EXPR;
10912 	}
10913       else if (code == GE_EXPR)
10914 	{
10915 	  bitop1 = BIT_NOT_EXPR;
10916 	  bitop2 = BIT_IOR_EXPR;
10917 	}
10918       else if (code == LT_EXPR)
10919 	{
10920 	  bitop1 = BIT_NOT_EXPR;
10921 	  bitop2 = BIT_AND_EXPR;
10922 	  swap_p = true;
10923 	}
10924       else if (code == LE_EXPR)
10925 	{
10926 	  bitop1 = BIT_NOT_EXPR;
10927 	  bitop2 = BIT_IOR_EXPR;
10928 	  swap_p = true;
10929 	}
10930       else
10931 	{
10932 	  bitop1 = BIT_XOR_EXPR;
10933 	  if (code == EQ_EXPR)
10934 	    bitop2 = BIT_NOT_EXPR;
10935 	}
10936     }
10937 
10938   if (!vec_stmt)
10939     {
10940       if (bitop1 == NOP_EXPR)
10941 	{
10942 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10943 	    return false;
10944 	}
10945       else
10946 	{
10947 	  machine_mode mode = TYPE_MODE (vectype);
10948 	  optab optab;
10949 
10950 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
10951 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10952 	    return false;
10953 
10954 	  if (bitop2 != NOP_EXPR)
10955 	    {
10956 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
10957 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10958 		return false;
10959 	    }
10960 	}
10961 
10962       /* Put types on constant and invariant SLP children.  */
10963       if (slp_node
10964 	  && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10965 	      || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10966 	{
10967 	  if (dump_enabled_p ())
10968 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10969 			     "incompatible vector types for invariants\n");
10970 	  return false;
10971 	}
10972 
10973       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10974       vect_model_simple_cost (vinfo, stmt_info,
10975 			      ncopies * (1 + (bitop2 != NOP_EXPR)),
10976 			      dts, ndts, slp_node, cost_vec);
10977       return true;
10978     }
10979 
10980   /* Transform.  */
10981 
10982   /* Handle def.  */
10983   lhs = gimple_assign_lhs (stmt);
10984   mask = vect_create_destination_var (lhs, mask_type);
10985 
10986   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10987 		     rhs1, &vec_oprnds0, vectype,
10988 		     rhs2, &vec_oprnds1, vectype);
10989   if (swap_p)
10990     std::swap (vec_oprnds0, vec_oprnds1);
10991 
10992   /* Arguments are ready.  Create the new vector stmt.  */
10993   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10994     {
10995       gimple *new_stmt;
10996       vec_rhs2 = vec_oprnds1[i];
10997 
10998       new_temp = make_ssa_name (mask);
10999       if (bitop1 == NOP_EXPR)
11000 	{
11001 	  new_stmt = gimple_build_assign (new_temp, code,
11002 					  vec_rhs1, vec_rhs2);
11003 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11004 	}
11005       else
11006 	{
11007 	  if (bitop1 == BIT_NOT_EXPR)
11008 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11009 	  else
11010 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11011 					    vec_rhs2);
11012 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11013 	  if (bitop2 != NOP_EXPR)
11014 	    {
11015 	      tree res = make_ssa_name (mask);
11016 	      if (bitop2 == BIT_NOT_EXPR)
11017 		new_stmt = gimple_build_assign (res, bitop2, new_temp);
11018 	      else
11019 		new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11020 						new_temp);
11021 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11022 	    }
11023 	}
11024       if (slp_node)
11025 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11026       else
11027 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11028     }
11029 
11030   if (!slp_node)
11031     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11032 
11033   vec_oprnds0.release ();
11034   vec_oprnds1.release ();
11035 
11036   return true;
11037 }
11038 
11039 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11040    can handle all live statements in the node.  Otherwise return true
11041    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11042    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
11043 
11044 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)11045 can_vectorize_live_stmts (vec_info *vinfo,
11046 			  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11047 			  slp_tree slp_node, slp_instance slp_node_instance,
11048 			  bool vec_stmt_p,
11049 			  stmt_vector_for_cost *cost_vec)
11050 {
11051   if (slp_node)
11052     {
11053       stmt_vec_info slp_stmt_info;
11054       unsigned int i;
11055       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11056 	{
11057 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
11058 	      && !vectorizable_live_operation (vinfo,
11059 					       slp_stmt_info, gsi, slp_node,
11060 					       slp_node_instance, i,
11061 					       vec_stmt_p, cost_vec))
11062 	    return false;
11063 	}
11064     }
11065   else if (STMT_VINFO_LIVE_P (stmt_info)
11066 	   && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11067 					    slp_node, slp_node_instance, -1,
11068 					    vec_stmt_p, cost_vec))
11069     return false;
11070 
11071   return true;
11072 }
11073 
11074 /* Make sure the statement is vectorizable.  */
11075 
11076 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)11077 vect_analyze_stmt (vec_info *vinfo,
11078 		   stmt_vec_info stmt_info, bool *need_to_vectorize,
11079 		   slp_tree node, slp_instance node_instance,
11080 		   stmt_vector_for_cost *cost_vec)
11081 {
11082   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11083   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11084   bool ok;
11085   gimple_seq pattern_def_seq;
11086 
11087   if (dump_enabled_p ())
11088     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11089 		     stmt_info->stmt);
11090 
11091   if (gimple_has_volatile_ops (stmt_info->stmt))
11092     return opt_result::failure_at (stmt_info->stmt,
11093 				   "not vectorized:"
11094 				   " stmt has volatile operands: %G\n",
11095 				   stmt_info->stmt);
11096 
11097   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11098       && node == NULL
11099       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11100     {
11101       gimple_stmt_iterator si;
11102 
11103       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11104 	{
11105 	  stmt_vec_info pattern_def_stmt_info
11106 	    = vinfo->lookup_stmt (gsi_stmt (si));
11107 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11108 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11109 	    {
11110 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
11111 	      if (dump_enabled_p ())
11112 		dump_printf_loc (MSG_NOTE, vect_location,
11113 				 "==> examining pattern def statement: %G",
11114 				 pattern_def_stmt_info->stmt);
11115 
11116 	      opt_result res
11117 		= vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11118 				     need_to_vectorize, node, node_instance,
11119 				     cost_vec);
11120 	      if (!res)
11121 		return res;
11122 	    }
11123 	}
11124     }
11125 
11126   /* Skip stmts that do not need to be vectorized. In loops this is expected
11127      to include:
11128      - the COND_EXPR which is the loop exit condition
11129      - any LABEL_EXPRs in the loop
11130      - computations that are used only for array indexing or loop control.
11131      In basic blocks we only analyze statements that are a part of some SLP
11132      instance, therefore, all the statements are relevant.
11133 
11134      Pattern statement needs to be analyzed instead of the original statement
11135      if the original statement is not relevant.  Otherwise, we analyze both
11136      statements.  In basic blocks we are called from some SLP instance
11137      traversal, don't analyze pattern stmts instead, the pattern stmts
11138      already will be part of SLP instance.  */
11139 
11140   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11141   if (!STMT_VINFO_RELEVANT_P (stmt_info)
11142       && !STMT_VINFO_LIVE_P (stmt_info))
11143     {
11144       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11145 	  && pattern_stmt_info
11146 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11147 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11148         {
11149           /* Analyze PATTERN_STMT instead of the original stmt.  */
11150 	  stmt_info = pattern_stmt_info;
11151           if (dump_enabled_p ())
11152 	    dump_printf_loc (MSG_NOTE, vect_location,
11153 			     "==> examining pattern statement: %G",
11154 			     stmt_info->stmt);
11155         }
11156       else
11157         {
11158           if (dump_enabled_p ())
11159             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11160 
11161           return opt_result::success ();
11162         }
11163     }
11164   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11165 	   && node == NULL
11166 	   && pattern_stmt_info
11167 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11168 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11169     {
11170       /* Analyze PATTERN_STMT too.  */
11171       if (dump_enabled_p ())
11172 	dump_printf_loc (MSG_NOTE, vect_location,
11173 			 "==> examining pattern statement: %G",
11174 			 pattern_stmt_info->stmt);
11175 
11176       opt_result res
11177 	= vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11178 			     node_instance, cost_vec);
11179       if (!res)
11180 	return res;
11181    }
11182 
11183   switch (STMT_VINFO_DEF_TYPE (stmt_info))
11184     {
11185       case vect_internal_def:
11186         break;
11187 
11188       case vect_reduction_def:
11189       case vect_nested_cycle:
11190          gcc_assert (!bb_vinfo
11191 		     && (relevance == vect_used_in_outer
11192 			 || relevance == vect_used_in_outer_by_reduction
11193 			 || relevance == vect_used_by_reduction
11194 			 || relevance == vect_unused_in_scope
11195 			 || relevance == vect_used_only_live));
11196          break;
11197 
11198       case vect_induction_def:
11199 	gcc_assert (!bb_vinfo);
11200 	break;
11201 
11202       case vect_constant_def:
11203       case vect_external_def:
11204       case vect_unknown_def_type:
11205       default:
11206         gcc_unreachable ();
11207     }
11208 
11209   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11210   if (node)
11211     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11212 
11213   if (STMT_VINFO_RELEVANT_P (stmt_info))
11214     {
11215       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11216       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11217 		  || (call && gimple_call_lhs (call) == NULL_TREE));
11218       *need_to_vectorize = true;
11219     }
11220 
11221   if (PURE_SLP_STMT (stmt_info) && !node)
11222     {
11223       if (dump_enabled_p ())
11224 	dump_printf_loc (MSG_NOTE, vect_location,
11225 			 "handled only by SLP analysis\n");
11226       return opt_result::success ();
11227     }
11228 
11229   ok = true;
11230   if (!bb_vinfo
11231       && (STMT_VINFO_RELEVANT_P (stmt_info)
11232 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11233     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11234        -mveclibabi= takes preference over library functions with
11235        the simd attribute.  */
11236     ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11237 	  || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11238 					   cost_vec)
11239 	  || vectorizable_conversion (vinfo, stmt_info,
11240 				      NULL, NULL, node, cost_vec)
11241 	  || vectorizable_operation (vinfo, stmt_info,
11242 				     NULL, NULL, node, cost_vec)
11243 	  || vectorizable_assignment (vinfo, stmt_info,
11244 				      NULL, NULL, node, cost_vec)
11245 	  || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11246 	  || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11247 	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11248 				     node, node_instance, cost_vec)
11249 	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11250 				     NULL, node, cost_vec)
11251 	  || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11252 	  || vectorizable_condition (vinfo, stmt_info,
11253 				     NULL, NULL, node, cost_vec)
11254 	  || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11255 				      cost_vec)
11256 	  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11257 				  stmt_info, NULL, node));
11258   else
11259     {
11260       if (bb_vinfo)
11261 	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11262 	      || vectorizable_simd_clone_call (vinfo, stmt_info,
11263 					       NULL, NULL, node, cost_vec)
11264 	      || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11265 					  cost_vec)
11266 	      || vectorizable_shift (vinfo, stmt_info,
11267 				     NULL, NULL, node, cost_vec)
11268 	      || vectorizable_operation (vinfo, stmt_info,
11269 					 NULL, NULL, node, cost_vec)
11270 	      || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11271 					  cost_vec)
11272 	      || vectorizable_load (vinfo, stmt_info,
11273 				    NULL, NULL, node, cost_vec)
11274 	      || vectorizable_store (vinfo, stmt_info,
11275 				     NULL, NULL, node, cost_vec)
11276 	      || vectorizable_condition (vinfo, stmt_info,
11277 					 NULL, NULL, node, cost_vec)
11278 	      || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11279 					  cost_vec)
11280 	      || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11281     }
11282 
11283   if (node)
11284     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11285 
11286   if (!ok)
11287     return opt_result::failure_at (stmt_info->stmt,
11288 				   "not vectorized:"
11289 				   " relevant stmt not supported: %G",
11290 				   stmt_info->stmt);
11291 
11292   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11293       need extra handling, except for vectorizable reductions.  */
11294   if (!bb_vinfo
11295       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11296       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11297       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11298 				    stmt_info, NULL, node, node_instance,
11299 				    false, cost_vec))
11300     return opt_result::failure_at (stmt_info->stmt,
11301 				   "not vectorized:"
11302 				   " live stmt not supported: %G",
11303 				   stmt_info->stmt);
11304 
11305   return opt_result::success ();
11306 }
11307 
11308 
11309 /* Function vect_transform_stmt.
11310 
11311    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
11312 
11313 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11314 vect_transform_stmt (vec_info *vinfo,
11315 		     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11316 		     slp_tree slp_node, slp_instance slp_node_instance)
11317 {
11318   bool is_store = false;
11319   gimple *vec_stmt = NULL;
11320   bool done;
11321 
11322   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11323 
11324   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11325   if (slp_node)
11326     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11327 
11328   switch (STMT_VINFO_TYPE (stmt_info))
11329     {
11330     case type_demotion_vec_info_type:
11331     case type_promotion_vec_info_type:
11332     case type_conversion_vec_info_type:
11333       done = vectorizable_conversion (vinfo, stmt_info,
11334 				      gsi, &vec_stmt, slp_node, NULL);
11335       gcc_assert (done);
11336       break;
11337 
11338     case induc_vec_info_type:
11339       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11340 				     stmt_info, &vec_stmt, slp_node,
11341 				     NULL);
11342       gcc_assert (done);
11343       break;
11344 
11345     case shift_vec_info_type:
11346       done = vectorizable_shift (vinfo, stmt_info,
11347 				 gsi, &vec_stmt, slp_node, NULL);
11348       gcc_assert (done);
11349       break;
11350 
11351     case op_vec_info_type:
11352       done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11353 				     NULL);
11354       gcc_assert (done);
11355       break;
11356 
11357     case assignment_vec_info_type:
11358       done = vectorizable_assignment (vinfo, stmt_info,
11359 				      gsi, &vec_stmt, slp_node, NULL);
11360       gcc_assert (done);
11361       break;
11362 
11363     case load_vec_info_type:
11364       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11365 				NULL);
11366       gcc_assert (done);
11367       break;
11368 
11369     case store_vec_info_type:
11370       done = vectorizable_store (vinfo, stmt_info,
11371 				 gsi, &vec_stmt, slp_node, NULL);
11372       gcc_assert (done);
11373       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11374 	{
11375 	  /* In case of interleaving, the whole chain is vectorized when the
11376 	     last store in the chain is reached.  Store stmts before the last
11377 	     one are skipped, and there vec_stmt_info shouldn't be freed
11378 	     meanwhile.  */
11379 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11380 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11381 	    is_store = true;
11382 	}
11383       else
11384 	is_store = true;
11385       break;
11386 
11387     case condition_vec_info_type:
11388       done = vectorizable_condition (vinfo, stmt_info,
11389 				     gsi, &vec_stmt, slp_node, NULL);
11390       gcc_assert (done);
11391       break;
11392 
11393     case comparison_vec_info_type:
11394       done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11395 				      slp_node, NULL);
11396       gcc_assert (done);
11397       break;
11398 
11399     case call_vec_info_type:
11400       done = vectorizable_call (vinfo, stmt_info,
11401 				gsi, &vec_stmt, slp_node, NULL);
11402       break;
11403 
11404     case call_simd_clone_vec_info_type:
11405       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11406 					   slp_node, NULL);
11407       break;
11408 
11409     case reduc_vec_info_type:
11410       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11411 				       gsi, &vec_stmt, slp_node);
11412       gcc_assert (done);
11413       break;
11414 
11415     case cycle_phi_info_type:
11416       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11417 				       &vec_stmt, slp_node, slp_node_instance);
11418       gcc_assert (done);
11419       break;
11420 
11421     case lc_phi_info_type:
11422       done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11423 				  stmt_info, &vec_stmt, slp_node);
11424       gcc_assert (done);
11425       break;
11426 
11427     case phi_info_type:
11428       done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11429       gcc_assert (done);
11430       break;
11431 
11432     default:
11433       if (!STMT_VINFO_LIVE_P (stmt_info))
11434 	{
11435 	  if (dump_enabled_p ())
11436 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11437                              "stmt not supported.\n");
11438 	  gcc_unreachable ();
11439 	}
11440       done = true;
11441     }
11442 
11443   if (!slp_node && vec_stmt)
11444     gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11445 
11446   if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11447     {
11448       /* Handle stmts whose DEF is used outside the loop-nest that is
11449 	 being vectorized.  */
11450       done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11451 				       slp_node_instance, true, NULL);
11452       gcc_assert (done);
11453     }
11454 
11455   if (slp_node)
11456     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11457 
11458   return is_store;
11459 }
11460 
11461 
11462 /* Remove a group of stores (for SLP or interleaving), free their
11463    stmt_vec_info.  */
11464 
11465 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11466 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11467 {
11468   stmt_vec_info next_stmt_info = first_stmt_info;
11469 
11470   while (next_stmt_info)
11471     {
11472       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11473       next_stmt_info = vect_orig_stmt (next_stmt_info);
11474       /* Free the attached stmt_vec_info and remove the stmt.  */
11475       vinfo->remove_stmt (next_stmt_info);
11476       next_stmt_info = tmp;
11477     }
11478 }
11479 
11480 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11481    elements of type SCALAR_TYPE, or null if the target doesn't support
11482    such a type.
11483 
11484    If NUNITS is zero, return a vector type that contains elements of
11485    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11486 
11487    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11488    for this vectorization region and want to "autodetect" the best choice.
11489    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11490    and we want the new type to be interoperable with it.   PREVAILING_MODE
11491    in this case can be a scalar integer mode or a vector mode; when it
11492    is a vector mode, the function acts like a tree-level version of
11493    related_vector_mode.  */
11494 
11495 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11496 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11497 				     tree scalar_type, poly_uint64 nunits)
11498 {
11499   tree orig_scalar_type = scalar_type;
11500   scalar_mode inner_mode;
11501   machine_mode simd_mode;
11502   tree vectype;
11503 
11504   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11505       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11506     return NULL_TREE;
11507 
11508   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11509 
11510   /* For vector types of elements whose mode precision doesn't
11511      match their types precision we use a element type of mode
11512      precision.  The vectorization routines will have to make sure
11513      they support the proper result truncation/extension.
11514      We also make sure to build vector types with INTEGER_TYPE
11515      component type only.  */
11516   if (INTEGRAL_TYPE_P (scalar_type)
11517       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11518 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
11519     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11520 						  TYPE_UNSIGNED (scalar_type));
11521 
11522   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11523      When the component mode passes the above test simply use a type
11524      corresponding to that mode.  The theory is that any use that
11525      would cause problems with this will disable vectorization anyway.  */
11526   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11527 	   && !INTEGRAL_TYPE_P (scalar_type))
11528     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11529 
11530   /* We can't build a vector type of elements with alignment bigger than
11531      their size.  */
11532   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11533     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11534 						  TYPE_UNSIGNED (scalar_type));
11535 
11536   /* If we felt back to using the mode fail if there was
11537      no scalar type for it.  */
11538   if (scalar_type == NULL_TREE)
11539     return NULL_TREE;
11540 
11541   /* If no prevailing mode was supplied, use the mode the target prefers.
11542      Otherwise lookup a vector mode based on the prevailing mode.  */
11543   if (prevailing_mode == VOIDmode)
11544     {
11545       gcc_assert (known_eq (nunits, 0U));
11546       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11547       if (SCALAR_INT_MODE_P (simd_mode))
11548 	{
11549 	  /* Traditional behavior is not to take the integer mode
11550 	     literally, but simply to use it as a way of determining
11551 	     the vector size.  It is up to mode_for_vector to decide
11552 	     what the TYPE_MODE should be.
11553 
11554 	     Note that nunits == 1 is allowed in order to support single
11555 	     element vector types.  */
11556 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11557 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11558 	    return NULL_TREE;
11559 	}
11560     }
11561   else if (SCALAR_INT_MODE_P (prevailing_mode)
11562 	   || !related_vector_mode (prevailing_mode,
11563 				    inner_mode, nunits).exists (&simd_mode))
11564     {
11565       /* Fall back to using mode_for_vector, mostly in the hope of being
11566 	 able to use an integer mode.  */
11567       if (known_eq (nunits, 0U)
11568 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11569 	return NULL_TREE;
11570 
11571       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11572 	return NULL_TREE;
11573     }
11574 
11575   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11576 
11577   /* In cases where the mode was chosen by mode_for_vector, check that
11578      the target actually supports the chosen mode, or that it at least
11579      allows the vector mode to be replaced by a like-sized integer.  */
11580   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11581       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11582     return NULL_TREE;
11583 
11584   /* Re-attach the address-space qualifier if we canonicalized the scalar
11585      type.  */
11586   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11587     return build_qualified_type
11588 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11589 
11590   return vectype;
11591 }
11592 
11593 /* Function get_vectype_for_scalar_type.
11594 
11595    Returns the vector type corresponding to SCALAR_TYPE as supported
11596    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11597    vectorization, make sure that the number of elements in the vector
11598    is no bigger than GROUP_SIZE.  */
11599 
11600 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11601 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11602 			     unsigned int group_size)
11603 {
11604   /* For BB vectorization, we should always have a group size once we've
11605      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11606      are tentative requests during things like early data reference
11607      analysis and pattern recognition.  */
11608   if (is_a <bb_vec_info> (vinfo))
11609     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11610   else
11611     group_size = 0;
11612 
11613   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11614 						      scalar_type);
11615   if (vectype && vinfo->vector_mode == VOIDmode)
11616     vinfo->vector_mode = TYPE_MODE (vectype);
11617 
11618   /* Register the natural choice of vector type, before the group size
11619      has been applied.  */
11620   if (vectype)
11621     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11622 
11623   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11624      try again with an explicit number of elements.  */
11625   if (vectype
11626       && group_size
11627       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11628     {
11629       /* Start with the biggest number of units that fits within
11630 	 GROUP_SIZE and halve it until we find a valid vector type.
11631 	 Usually either the first attempt will succeed or all will
11632 	 fail (in the latter case because GROUP_SIZE is too small
11633 	 for the target), but it's possible that a target could have
11634 	 a hole between supported vector types.
11635 
11636 	 If GROUP_SIZE is not a power of 2, this has the effect of
11637 	 trying the largest power of 2 that fits within the group,
11638 	 even though the group is not a multiple of that vector size.
11639 	 The BB vectorizer will then try to carve up the group into
11640 	 smaller pieces.  */
11641       unsigned int nunits = 1 << floor_log2 (group_size);
11642       do
11643 	{
11644 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11645 							 scalar_type, nunits);
11646 	  nunits /= 2;
11647 	}
11648       while (nunits > 1 && !vectype);
11649     }
11650 
11651   return vectype;
11652 }
11653 
11654 /* Return the vector type corresponding to SCALAR_TYPE as supported
11655    by the target.  NODE, if nonnull, is the SLP tree node that will
11656    use the returned vector type.  */
11657 
11658 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11659 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11660 {
11661   unsigned int group_size = 0;
11662   if (node)
11663     group_size = SLP_TREE_LANES (node);
11664   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11665 }
11666 
11667 /* Function get_mask_type_for_scalar_type.
11668 
11669    Returns the mask type corresponding to a result of comparison
11670    of vectors of specified SCALAR_TYPE as supported by target.
11671    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11672    make sure that the number of elements in the vector is no bigger
11673    than GROUP_SIZE.  */
11674 
11675 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11676 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11677 			       unsigned int group_size)
11678 {
11679   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11680 
11681   if (!vectype)
11682     return NULL;
11683 
11684   return truth_type_for (vectype);
11685 }
11686 
11687 /* Function get_same_sized_vectype
11688 
11689    Returns a vector type corresponding to SCALAR_TYPE of size
11690    VECTOR_TYPE if supported by the target.  */
11691 
11692 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11693 get_same_sized_vectype (tree scalar_type, tree vector_type)
11694 {
11695   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11696     return truth_type_for (vector_type);
11697 
11698   poly_uint64 nunits;
11699   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11700 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11701     return NULL_TREE;
11702 
11703   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11704 					      scalar_type, nunits);
11705 }
11706 
11707 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11708    would not change the chosen vector modes.  */
11709 
11710 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11711 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11712 {
11713   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11714        i != vinfo->used_vector_modes.end (); ++i)
11715     if (!VECTOR_MODE_P (*i)
11716 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11717       return false;
11718   return true;
11719 }
11720 
11721 /* Function vect_is_simple_use.
11722 
11723    Input:
11724    VINFO - the vect info of the loop or basic block that is being vectorized.
11725    OPERAND - operand in the loop or bb.
11726    Output:
11727    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11728      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11729    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11730      the definition could be anywhere in the function
11731    DT - the type of definition
11732 
11733    Returns whether a stmt with OPERAND can be vectorized.
11734    For loops, supportable operands are constants, loop invariants, and operands
11735    that are defined by the current iteration of the loop.  Unsupportable
11736    operands are those that are defined by a previous iteration of the loop (as
11737    is the case in reduction/induction computations).
11738    For basic blocks, supportable operands are constants and bb invariants.
11739    For now, operands defined outside the basic block are not supported.  */
11740 
11741 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11742 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11743 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11744 {
11745   if (def_stmt_info_out)
11746     *def_stmt_info_out = NULL;
11747   if (def_stmt_out)
11748     *def_stmt_out = NULL;
11749   *dt = vect_unknown_def_type;
11750 
11751   if (dump_enabled_p ())
11752     {
11753       dump_printf_loc (MSG_NOTE, vect_location,
11754                        "vect_is_simple_use: operand ");
11755       if (TREE_CODE (operand) == SSA_NAME
11756 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
11757 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11758       else
11759 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11760     }
11761 
11762   if (CONSTANT_CLASS_P (operand))
11763     *dt = vect_constant_def;
11764   else if (is_gimple_min_invariant (operand))
11765     *dt = vect_external_def;
11766   else if (TREE_CODE (operand) != SSA_NAME)
11767     *dt = vect_unknown_def_type;
11768   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11769     *dt = vect_external_def;
11770   else
11771     {
11772       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11773       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11774       if (!stmt_vinfo)
11775 	*dt = vect_external_def;
11776       else
11777 	{
11778 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11779 	  def_stmt = stmt_vinfo->stmt;
11780 	  *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11781 	  if (def_stmt_info_out)
11782 	    *def_stmt_info_out = stmt_vinfo;
11783 	}
11784       if (def_stmt_out)
11785 	*def_stmt_out = def_stmt;
11786     }
11787 
11788   if (dump_enabled_p ())
11789     {
11790       dump_printf (MSG_NOTE, ", type of def: ");
11791       switch (*dt)
11792 	{
11793 	case vect_uninitialized_def:
11794 	  dump_printf (MSG_NOTE, "uninitialized\n");
11795 	  break;
11796 	case vect_constant_def:
11797 	  dump_printf (MSG_NOTE, "constant\n");
11798 	  break;
11799 	case vect_external_def:
11800 	  dump_printf (MSG_NOTE, "external\n");
11801 	  break;
11802 	case vect_internal_def:
11803 	  dump_printf (MSG_NOTE, "internal\n");
11804 	  break;
11805 	case vect_induction_def:
11806 	  dump_printf (MSG_NOTE, "induction\n");
11807 	  break;
11808 	case vect_reduction_def:
11809 	  dump_printf (MSG_NOTE, "reduction\n");
11810 	  break;
11811 	case vect_double_reduction_def:
11812 	  dump_printf (MSG_NOTE, "double reduction\n");
11813 	  break;
11814 	case vect_nested_cycle:
11815 	  dump_printf (MSG_NOTE, "nested cycle\n");
11816 	  break;
11817 	case vect_unknown_def_type:
11818 	  dump_printf (MSG_NOTE, "unknown\n");
11819 	  break;
11820 	}
11821     }
11822 
11823   if (*dt == vect_unknown_def_type)
11824     {
11825       if (dump_enabled_p ())
11826         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11827                          "Unsupported pattern.\n");
11828       return false;
11829     }
11830 
11831   return true;
11832 }
11833 
11834 /* Function vect_is_simple_use.
11835 
11836    Same as vect_is_simple_use but also determines the vector operand
11837    type of OPERAND and stores it to *VECTYPE.  If the definition of
11838    OPERAND is vect_uninitialized_def, vect_constant_def or
11839    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11840    is responsible to compute the best suited vector type for the
11841    scalar operand.  */
11842 
11843 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11844 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11845 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
11846 		    gimple **def_stmt_out)
11847 {
11848   stmt_vec_info def_stmt_info;
11849   gimple *def_stmt;
11850   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11851     return false;
11852 
11853   if (def_stmt_out)
11854     *def_stmt_out = def_stmt;
11855   if (def_stmt_info_out)
11856     *def_stmt_info_out = def_stmt_info;
11857 
11858   /* Now get a vector type if the def is internal, otherwise supply
11859      NULL_TREE and leave it up to the caller to figure out a proper
11860      type for the use stmt.  */
11861   if (*dt == vect_internal_def
11862       || *dt == vect_induction_def
11863       || *dt == vect_reduction_def
11864       || *dt == vect_double_reduction_def
11865       || *dt == vect_nested_cycle)
11866     {
11867       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11868       gcc_assert (*vectype != NULL_TREE);
11869       if (dump_enabled_p ())
11870 	dump_printf_loc (MSG_NOTE, vect_location,
11871 			 "vect_is_simple_use: vectype %T\n", *vectype);
11872     }
11873   else if (*dt == vect_uninitialized_def
11874 	   || *dt == vect_constant_def
11875 	   || *dt == vect_external_def)
11876     *vectype = NULL_TREE;
11877   else
11878     gcc_unreachable ();
11879 
11880   return true;
11881 }
11882 
11883 /* Function vect_is_simple_use.
11884 
11885    Same as vect_is_simple_use but determines the operand by operand
11886    position OPERAND from either STMT or SLP_NODE, filling in *OP
11887    and *SLP_DEF (when SLP_NODE is not NULL).  */
11888 
11889 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11890 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11891 		    unsigned operand, tree *op, slp_tree *slp_def,
11892 		    enum vect_def_type *dt,
11893 		    tree *vectype, stmt_vec_info *def_stmt_info_out)
11894 {
11895   if (slp_node)
11896     {
11897       slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11898       *slp_def = child;
11899       *vectype = SLP_TREE_VECTYPE (child);
11900       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11901 	{
11902 	  *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11903 	  return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11904 	}
11905       else
11906 	{
11907 	  if (def_stmt_info_out)
11908 	    *def_stmt_info_out = NULL;
11909 	  *op = SLP_TREE_SCALAR_OPS (child)[0];
11910 	  *dt = SLP_TREE_DEF_TYPE (child);
11911 	  return true;
11912 	}
11913     }
11914   else
11915     {
11916       *slp_def = NULL;
11917       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11918 	{
11919 	  if (gimple_assign_rhs_code (ass) == COND_EXPR
11920 	      && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11921 	    {
11922 	      if (operand < 2)
11923 		*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11924 	      else
11925 		*op = gimple_op (ass, operand);
11926 	    }
11927 	  else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11928 	    *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11929 	  else
11930 	    *op = gimple_op (ass, operand + 1);
11931 	}
11932       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11933 	*op = gimple_call_arg (call, operand);
11934       else
11935 	gcc_unreachable ();
11936       return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11937     }
11938 }
11939 
11940 /* If OP is not NULL and is external or constant update its vector
11941    type with VECTYPE.  Returns true if successful or false if not,
11942    for example when conflicting vector types are present.  */
11943 
11944 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11945 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11946 {
11947   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11948     return true;
11949   if (SLP_TREE_VECTYPE (op))
11950     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11951   SLP_TREE_VECTYPE (op) = vectype;
11952   return true;
11953 }
11954 
11955 /* Function supportable_widening_operation
11956 
11957    Check whether an operation represented by the code CODE is a
11958    widening operation that is supported by the target platform in
11959    vector form (i.e., when operating on arguments of type VECTYPE_IN
11960    producing a result of type VECTYPE_OUT).
11961 
11962    Widening operations we currently support are NOP (CONVERT), FLOAT,
11963    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11964    are supported by the target platform either directly (via vector
11965    tree-codes), or via target builtins.
11966 
11967    Output:
11968    - CODE1 and CODE2 are codes of vector operations to be used when
11969    vectorizing the operation, if available.
11970    - MULTI_STEP_CVT determines the number of required intermediate steps in
11971    case of multi-step conversion (like char->short->int - in that case
11972    MULTI_STEP_CVT will be 1).
11973    - INTERM_TYPES contains the intermediate type required to perform the
11974    widening operation (short in the above example).  */
11975 
11976 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11977 supportable_widening_operation (vec_info *vinfo,
11978 				enum tree_code code, stmt_vec_info stmt_info,
11979 				tree vectype_out, tree vectype_in,
11980                                 enum tree_code *code1, enum tree_code *code2,
11981                                 int *multi_step_cvt,
11982                                 vec<tree> *interm_types)
11983 {
11984   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11985   class loop *vect_loop = NULL;
11986   machine_mode vec_mode;
11987   enum insn_code icode1, icode2;
11988   optab optab1, optab2;
11989   tree vectype = vectype_in;
11990   tree wide_vectype = vectype_out;
11991   enum tree_code c1, c2;
11992   int i;
11993   tree prev_type, intermediate_type;
11994   machine_mode intermediate_mode, prev_mode;
11995   optab optab3, optab4;
11996 
11997   *multi_step_cvt = 0;
11998   if (loop_info)
11999     vect_loop = LOOP_VINFO_LOOP (loop_info);
12000 
12001   switch (code)
12002     {
12003     case WIDEN_MULT_EXPR:
12004       /* The result of a vectorized widening operation usually requires
12005 	 two vectors (because the widened results do not fit into one vector).
12006 	 The generated vector results would normally be expected to be
12007 	 generated in the same order as in the original scalar computation,
12008 	 i.e. if 8 results are generated in each vector iteration, they are
12009 	 to be organized as follows:
12010 		vect1: [res1,res2,res3,res4],
12011 		vect2: [res5,res6,res7,res8].
12012 
12013 	 However, in the special case that the result of the widening
12014 	 operation is used in a reduction computation only, the order doesn't
12015 	 matter (because when vectorizing a reduction we change the order of
12016 	 the computation).  Some targets can take advantage of this and
12017 	 generate more efficient code.  For example, targets like Altivec,
12018 	 that support widen_mult using a sequence of {mult_even,mult_odd}
12019 	 generate the following vectors:
12020 		vect1: [res1,res3,res5,res7],
12021 		vect2: [res2,res4,res6,res8].
12022 
12023 	 When vectorizing outer-loops, we execute the inner-loop sequentially
12024 	 (each vectorized inner-loop iteration contributes to VF outer-loop
12025 	 iterations in parallel).  We therefore don't allow to change the
12026 	 order of the computation in the inner-loop during outer-loop
12027 	 vectorization.  */
12028       /* TODO: Another case in which order doesn't *really* matter is when we
12029 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
12030 	 Normally, pack_trunc performs an even/odd permute, whereas the
12031 	 repack from an even/odd expansion would be an interleave, which
12032 	 would be significantly simpler for e.g. AVX2.  */
12033       /* In any case, in order to avoid duplicating the code below, recurse
12034 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
12035 	 are properly set up for the caller.  If we fail, we'll continue with
12036 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
12037       if (vect_loop
12038 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12039 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
12040 	  && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12041 					     stmt_info, vectype_out,
12042 					     vectype_in, code1, code2,
12043 					     multi_step_cvt, interm_types))
12044         {
12045           /* Elements in a vector with vect_used_by_reduction property cannot
12046              be reordered if the use chain with this property does not have the
12047              same operation.  One such an example is s += a * b, where elements
12048              in a and b cannot be reordered.  Here we check if the vector defined
12049              by STMT is only directly used in the reduction statement.  */
12050 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
12051 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12052 	  if (use_stmt_info
12053 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12054 	    return true;
12055         }
12056       c1 = VEC_WIDEN_MULT_LO_EXPR;
12057       c2 = VEC_WIDEN_MULT_HI_EXPR;
12058       break;
12059 
12060     case DOT_PROD_EXPR:
12061       c1 = DOT_PROD_EXPR;
12062       c2 = DOT_PROD_EXPR;
12063       break;
12064 
12065     case SAD_EXPR:
12066       c1 = SAD_EXPR;
12067       c2 = SAD_EXPR;
12068       break;
12069 
12070     case VEC_WIDEN_MULT_EVEN_EXPR:
12071       /* Support the recursion induced just above.  */
12072       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12073       c2 = VEC_WIDEN_MULT_ODD_EXPR;
12074       break;
12075 
12076     case WIDEN_LSHIFT_EXPR:
12077       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12078       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12079       break;
12080 
12081     case WIDEN_PLUS_EXPR:
12082       c1 = VEC_WIDEN_PLUS_LO_EXPR;
12083       c2 = VEC_WIDEN_PLUS_HI_EXPR;
12084       break;
12085 
12086     case WIDEN_MINUS_EXPR:
12087       c1 = VEC_WIDEN_MINUS_LO_EXPR;
12088       c2 = VEC_WIDEN_MINUS_HI_EXPR;
12089       break;
12090 
12091     CASE_CONVERT:
12092       c1 = VEC_UNPACK_LO_EXPR;
12093       c2 = VEC_UNPACK_HI_EXPR;
12094       break;
12095 
12096     case FLOAT_EXPR:
12097       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12098       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12099       break;
12100 
12101     case FIX_TRUNC_EXPR:
12102       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12103       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12104       break;
12105 
12106     default:
12107       gcc_unreachable ();
12108     }
12109 
12110   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12111     std::swap (c1, c2);
12112 
12113   if (code == FIX_TRUNC_EXPR)
12114     {
12115       /* The signedness is determined from output operand.  */
12116       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12117       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12118     }
12119   else if (CONVERT_EXPR_CODE_P (code)
12120 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12121 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
12122 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12123 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12124     {
12125       /* If the input and result modes are the same, a different optab
12126 	 is needed where we pass in the number of units in vectype.  */
12127       optab1 = vec_unpacks_sbool_lo_optab;
12128       optab2 = vec_unpacks_sbool_hi_optab;
12129     }
12130   else
12131     {
12132       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12133       optab2 = optab_for_tree_code (c2, vectype, optab_default);
12134     }
12135 
12136   if (!optab1 || !optab2)
12137     return false;
12138 
12139   vec_mode = TYPE_MODE (vectype);
12140   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12141        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12142     return false;
12143 
12144   *code1 = c1;
12145   *code2 = c2;
12146 
12147   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12148       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12149     {
12150       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12151 	return true;
12152       /* For scalar masks we may have different boolean
12153 	 vector types having the same QImode.  Thus we
12154 	 add additional check for elements number.  */
12155       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12156 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12157 	return true;
12158     }
12159 
12160   /* Check if it's a multi-step conversion that can be done using intermediate
12161      types.  */
12162 
12163   prev_type = vectype;
12164   prev_mode = vec_mode;
12165 
12166   if (!CONVERT_EXPR_CODE_P (code))
12167     return false;
12168 
12169   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12170      intermediate steps in promotion sequence.  We try
12171      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12172      not.  */
12173   interm_types->create (MAX_INTERM_CVT_STEPS);
12174   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12175     {
12176       intermediate_mode = insn_data[icode1].operand[0].mode;
12177       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12178 	intermediate_type
12179 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
12180       else
12181 	intermediate_type
12182 	  = lang_hooks.types.type_for_mode (intermediate_mode,
12183 					    TYPE_UNSIGNED (prev_type));
12184 
12185       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12186 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12187 	  && intermediate_mode == prev_mode
12188 	  && SCALAR_INT_MODE_P (prev_mode))
12189 	{
12190 	  /* If the input and result modes are the same, a different optab
12191 	     is needed where we pass in the number of units in vectype.  */
12192 	  optab3 = vec_unpacks_sbool_lo_optab;
12193 	  optab4 = vec_unpacks_sbool_hi_optab;
12194 	}
12195       else
12196 	{
12197 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12198 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12199 	}
12200 
12201       if (!optab3 || !optab4
12202           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12203 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12204 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12205 	  || insn_data[icode2].operand[0].mode != intermediate_mode
12206 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
12207 	      == CODE_FOR_nothing)
12208 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
12209 	      == CODE_FOR_nothing))
12210 	break;
12211 
12212       interm_types->quick_push (intermediate_type);
12213       (*multi_step_cvt)++;
12214 
12215       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12216 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12217 	{
12218 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12219 	    return true;
12220 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12221 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12222 	    return true;
12223 	}
12224 
12225       prev_type = intermediate_type;
12226       prev_mode = intermediate_mode;
12227     }
12228 
12229   interm_types->release ();
12230   return false;
12231 }
12232 
12233 
12234 /* Function supportable_narrowing_operation
12235 
12236    Check whether an operation represented by the code CODE is a
12237    narrowing operation that is supported by the target platform in
12238    vector form (i.e., when operating on arguments of type VECTYPE_IN
12239    and producing a result of type VECTYPE_OUT).
12240 
12241    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12242    and FLOAT.  This function checks if these operations are supported by
12243    the target platform directly via vector tree-codes.
12244 
12245    Output:
12246    - CODE1 is the code of a vector operation to be used when
12247    vectorizing the operation, if available.
12248    - MULTI_STEP_CVT determines the number of required intermediate steps in
12249    case of multi-step conversion (like int->short->char - in that case
12250    MULTI_STEP_CVT will be 1).
12251    - INTERM_TYPES contains the intermediate type required to perform the
12252    narrowing operation (short in the above example).   */
12253 
12254 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)12255 supportable_narrowing_operation (enum tree_code code,
12256 				 tree vectype_out, tree vectype_in,
12257 				 enum tree_code *code1, int *multi_step_cvt,
12258                                  vec<tree> *interm_types)
12259 {
12260   machine_mode vec_mode;
12261   enum insn_code icode1;
12262   optab optab1, interm_optab;
12263   tree vectype = vectype_in;
12264   tree narrow_vectype = vectype_out;
12265   enum tree_code c1;
12266   tree intermediate_type, prev_type;
12267   machine_mode intermediate_mode, prev_mode;
12268   int i;
12269   unsigned HOST_WIDE_INT n_elts;
12270   bool uns;
12271 
12272   *multi_step_cvt = 0;
12273   switch (code)
12274     {
12275     CASE_CONVERT:
12276       c1 = VEC_PACK_TRUNC_EXPR;
12277       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12278 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
12279 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12280 	  && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12281 	  && n_elts < BITS_PER_UNIT)
12282 	optab1 = vec_pack_sbool_trunc_optab;
12283       else
12284 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
12285       break;
12286 
12287     case FIX_TRUNC_EXPR:
12288       c1 = VEC_PACK_FIX_TRUNC_EXPR;
12289       /* The signedness is determined from output operand.  */
12290       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12291       break;
12292 
12293     case FLOAT_EXPR:
12294       c1 = VEC_PACK_FLOAT_EXPR;
12295       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12296       break;
12297 
12298     default:
12299       gcc_unreachable ();
12300     }
12301 
12302   if (!optab1)
12303     return false;
12304 
12305   vec_mode = TYPE_MODE (vectype);
12306   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12307     return false;
12308 
12309   *code1 = c1;
12310 
12311   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12312     {
12313       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12314 	return true;
12315       /* For scalar masks we may have different boolean
12316 	 vector types having the same QImode.  Thus we
12317 	 add additional check for elements number.  */
12318       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12319 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12320 	return true;
12321     }
12322 
12323   if (code == FLOAT_EXPR)
12324     return false;
12325 
12326   /* Check if it's a multi-step conversion that can be done using intermediate
12327      types.  */
12328   prev_mode = vec_mode;
12329   prev_type = vectype;
12330   if (code == FIX_TRUNC_EXPR)
12331     uns = TYPE_UNSIGNED (vectype_out);
12332   else
12333     uns = TYPE_UNSIGNED (vectype);
12334 
12335   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12336      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12337      costly than signed.  */
12338   if (code == FIX_TRUNC_EXPR && uns)
12339     {
12340       enum insn_code icode2;
12341 
12342       intermediate_type
12343 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12344       interm_optab
12345 	= optab_for_tree_code (c1, intermediate_type, optab_default);
12346       if (interm_optab != unknown_optab
12347 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12348 	  && insn_data[icode1].operand[0].mode
12349 	     == insn_data[icode2].operand[0].mode)
12350 	{
12351 	  uns = false;
12352 	  optab1 = interm_optab;
12353 	  icode1 = icode2;
12354 	}
12355     }
12356 
12357   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12358      intermediate steps in promotion sequence.  We try
12359      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
12360   interm_types->create (MAX_INTERM_CVT_STEPS);
12361   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12362     {
12363       intermediate_mode = insn_data[icode1].operand[0].mode;
12364       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12365 	intermediate_type
12366 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
12367       else
12368 	intermediate_type
12369 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12370       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12371 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12372 	  && SCALAR_INT_MODE_P (prev_mode)
12373 	  && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12374 	  && n_elts < BITS_PER_UNIT)
12375 	interm_optab = vec_pack_sbool_trunc_optab;
12376       else
12377 	interm_optab
12378 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12379 				 optab_default);
12380       if (!interm_optab
12381 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12382 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12383 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12384 	      == CODE_FOR_nothing))
12385 	break;
12386 
12387       interm_types->quick_push (intermediate_type);
12388       (*multi_step_cvt)++;
12389 
12390       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12391 	{
12392 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12393 	    return true;
12394 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12395 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12396 	    return true;
12397 	}
12398 
12399       prev_mode = intermediate_mode;
12400       prev_type = intermediate_type;
12401       optab1 = interm_optab;
12402     }
12403 
12404   interm_types->release ();
12405   return false;
12406 }
12407 
12408 /* Generate and return a vector mask of MASK_TYPE such that
12409    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12410    Add the statements to SEQ.  */
12411 
12412 tree
vect_gen_while(gimple_seq * seq,tree mask_type,tree start_index,tree end_index,const char * name)12413 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12414 		tree end_index, const char *name)
12415 {
12416   tree cmp_type = TREE_TYPE (start_index);
12417   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12418 						       cmp_type, mask_type,
12419 						       OPTIMIZE_FOR_SPEED));
12420   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12421 					    start_index, end_index,
12422 					    build_zero_cst (mask_type));
12423   tree tmp;
12424   if (name)
12425     tmp = make_temp_ssa_name (mask_type, NULL, name);
12426   else
12427     tmp = make_ssa_name (mask_type);
12428   gimple_call_set_lhs (call, tmp);
12429   gimple_seq_add_stmt (seq, call);
12430   return tmp;
12431 }
12432 
12433 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12434    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
12435 
12436 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12437 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12438 		    tree end_index)
12439 {
12440   tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12441   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12442 }
12443 
12444 /* Try to compute the vector types required to vectorize STMT_INFO,
12445    returning true on success and false if vectorization isn't possible.
12446    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12447    take sure that the number of elements in the vectors is no bigger
12448    than GROUP_SIZE.
12449 
12450    On success:
12451 
12452    - Set *STMT_VECTYPE_OUT to:
12453      - NULL_TREE if the statement doesn't need to be vectorized;
12454      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12455 
12456    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12457      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12458      statement does not help to determine the overall number of units.  */
12459 
12460 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12461 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12462 				tree *stmt_vectype_out,
12463 				tree *nunits_vectype_out,
12464 				unsigned int group_size)
12465 {
12466   gimple *stmt = stmt_info->stmt;
12467 
12468   /* For BB vectorization, we should always have a group size once we've
12469      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12470      are tentative requests during things like early data reference
12471      analysis and pattern recognition.  */
12472   if (is_a <bb_vec_info> (vinfo))
12473     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12474   else
12475     group_size = 0;
12476 
12477   *stmt_vectype_out = NULL_TREE;
12478   *nunits_vectype_out = NULL_TREE;
12479 
12480   if (gimple_get_lhs (stmt) == NULL_TREE
12481       /* MASK_STORE has no lhs, but is ok.  */
12482       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12483     {
12484       if (is_a <gcall *> (stmt))
12485 	{
12486 	  /* Ignore calls with no lhs.  These must be calls to
12487 	     #pragma omp simd functions, and what vectorization factor
12488 	     it really needs can't be determined until
12489 	     vectorizable_simd_clone_call.  */
12490 	  if (dump_enabled_p ())
12491 	    dump_printf_loc (MSG_NOTE, vect_location,
12492 			     "defer to SIMD clone analysis.\n");
12493 	  return opt_result::success ();
12494 	}
12495 
12496       return opt_result::failure_at (stmt,
12497 				     "not vectorized: irregular stmt.%G", stmt);
12498     }
12499 
12500   tree vectype;
12501   tree scalar_type = NULL_TREE;
12502   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12503     {
12504       vectype = STMT_VINFO_VECTYPE (stmt_info);
12505       if (dump_enabled_p ())
12506 	dump_printf_loc (MSG_NOTE, vect_location,
12507 			 "precomputed vectype: %T\n", vectype);
12508     }
12509   else if (vect_use_mask_type_p (stmt_info))
12510     {
12511       unsigned int precision = stmt_info->mask_precision;
12512       scalar_type = build_nonstandard_integer_type (precision, 1);
12513       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12514       if (!vectype)
12515 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
12516 				       " data-type %T\n", scalar_type);
12517       if (dump_enabled_p ())
12518 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12519     }
12520   else
12521     {
12522       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12523 	scalar_type = TREE_TYPE (DR_REF (dr));
12524       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12525 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12526       else
12527 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12528 
12529       if (dump_enabled_p ())
12530 	{
12531 	  if (group_size)
12532 	    dump_printf_loc (MSG_NOTE, vect_location,
12533 			     "get vectype for scalar type (group size %d):"
12534 			     " %T\n", group_size, scalar_type);
12535 	  else
12536 	    dump_printf_loc (MSG_NOTE, vect_location,
12537 			     "get vectype for scalar type: %T\n", scalar_type);
12538 	}
12539       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12540       if (!vectype)
12541 	return opt_result::failure_at (stmt,
12542 				       "not vectorized:"
12543 				       " unsupported data-type %T\n",
12544 				       scalar_type);
12545 
12546       if (dump_enabled_p ())
12547 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12548     }
12549 
12550   if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12551     return opt_result::failure_at (stmt,
12552 				   "not vectorized: vector stmt in loop:%G",
12553 				   stmt);
12554 
12555   *stmt_vectype_out = vectype;
12556 
12557   /* Don't try to compute scalar types if the stmt produces a boolean
12558      vector; use the existing vector type instead.  */
12559   tree nunits_vectype = vectype;
12560   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12561     {
12562       /* The number of units is set according to the smallest scalar
12563 	 type (or the largest vector size, but we only support one
12564 	 vector size per vectorization).  */
12565       scalar_type = vect_get_smallest_scalar_type (stmt_info,
12566 						   TREE_TYPE (vectype));
12567       if (scalar_type != TREE_TYPE (vectype))
12568 	{
12569 	  if (dump_enabled_p ())
12570 	    dump_printf_loc (MSG_NOTE, vect_location,
12571 			     "get vectype for smallest scalar type: %T\n",
12572 			     scalar_type);
12573 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12574 							group_size);
12575 	  if (!nunits_vectype)
12576 	    return opt_result::failure_at
12577 	      (stmt, "not vectorized: unsupported data-type %T\n",
12578 	       scalar_type);
12579 	  if (dump_enabled_p ())
12580 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12581 			     nunits_vectype);
12582 	}
12583     }
12584 
12585   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12586 		   TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12587     return opt_result::failure_at (stmt,
12588 				   "Not vectorized: Incompatible number "
12589 				   "of vector subparts between %T and %T\n",
12590 				   nunits_vectype, *stmt_vectype_out);
12591 
12592   if (dump_enabled_p ())
12593     {
12594       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12595       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12596       dump_printf (MSG_NOTE, "\n");
12597     }
12598 
12599   *nunits_vectype_out = nunits_vectype;
12600   return opt_result::success ();
12601 }
12602 
12603 /* Generate and return statement sequence that sets vector length LEN that is:
12604 
12605    min_of_start_and_end = min (START_INDEX, END_INDEX);
12606    left_len = END_INDEX - min_of_start_and_end;
12607    rhs = min (left_len, LEN_LIMIT);
12608    LEN = rhs;
12609 
12610    Note: the cost of the code generated by this function is modeled
12611    by vect_estimate_min_profitable_iters, so changes here may need
12612    corresponding changes there.  */
12613 
12614 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12615 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12616 {
12617   gimple_seq stmts = NULL;
12618   tree len_type = TREE_TYPE (len);
12619   gcc_assert (TREE_TYPE (start_index) == len_type);
12620 
12621   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12622   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12623   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12624   gimple* stmt = gimple_build_assign (len, rhs);
12625   gimple_seq_add_stmt (&stmts, stmt);
12626 
12627   return stmts;
12628 }
12629 
12630