1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 		  tree vectype, int misalign,
96 		  enum vect_cost_model_location where)
97 {
98   if ((kind == vector_load || kind == unaligned_load)
99       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100     kind = vector_gather_load;
101   if ((kind == vector_store || kind == unaligned_store)
102       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103     kind = vector_scatter_store;
104 
105   stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106   body_cost_vec->safe_push (si);
107 
108   return (unsigned)
109       (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111 
112 /* Return a variable of type ELEM_TYPE[NELEMS].  */
113 
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 			 "vect_array");
119 }
120 
121 /* ARRAY is an array of vectors created by create_vector_array.
122    Return an SSA_NAME for the vector in index N.  The reference
123    is part of the vectorization of STMT_INFO and the vector is associated
124    with scalar destination SCALAR_DEST.  */
125 
126 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (vec_info *vinfo,
128 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131   tree vect_type, vect, vect_name, array_ref;
132   gimple *new_stmt;
133 
134   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135   vect_type = TREE_TYPE (TREE_TYPE (array));
136   vect = vect_create_destination_var (scalar_dest, vect_type);
137   array_ref = build4 (ARRAY_REF, vect_type, array,
138 		      build_int_cst (size_type_node, n),
139 		      NULL_TREE, NULL_TREE);
140 
141   new_stmt = gimple_build_assign (vect, array_ref);
142   vect_name = make_ssa_name (vect, new_stmt);
143   gimple_assign_set_lhs (new_stmt, vect_name);
144   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145 
146   return vect_name;
147 }
148 
149 /* ARRAY is an array of vectors created by create_vector_array.
150    Emit code to store SSA_NAME VECT in index N of the array.
151    The store is part of the vectorization of STMT_INFO.  */
152 
153 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)154 write_vector_array (vec_info *vinfo,
155 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158   tree array_ref;
159   gimple *new_stmt;
160 
161   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 		      build_int_cst (size_type_node, n),
163 		      NULL_TREE, NULL_TREE);
164 
165   new_stmt = gimple_build_assign (array_ref, vect);
166   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168 
169 /* PTR is a pointer to an array of type TYPE.  Return a representation
170    of *PTR.  The memory reference replaces those in FIRST_DR
171    (and its group).  */
172 
173 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176   tree mem_ref;
177 
178   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179   /* Arrays have the same alignment as their type.  */
180   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181   return mem_ref;
182 }
183 
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185    Emit the clobber before *GSI.  */
186 
187 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 		       gimple_stmt_iterator *gsi, tree var)
190 {
191   tree clobber = build_clobber (TREE_TYPE (var));
192   gimple *new_stmt = gimple_build_assign (var, clobber);
193   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195 
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
197 
198 /* Function vect_mark_relevant.
199 
200    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
201 
202 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 		    enum vect_relevant relevant, bool live_p)
205 {
206   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 
209   if (dump_enabled_p ())
210     dump_printf_loc (MSG_NOTE, vect_location,
211 		     "mark relevant %d, live %d: %G", relevant, live_p,
212 		     stmt_info->stmt);
213 
214   /* If this stmt is an original stmt in a pattern, we might need to mark its
215      related pattern stmt instead of the original stmt.  However, such stmts
216      may have their own uses that are not in any pattern, in such cases the
217      stmt itself should be marked.  */
218   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219     {
220       /* This is the last stmt in a sequence that was detected as a
221 	 pattern that can potentially be vectorized.  Don't mark the stmt
222 	 as relevant/live because it's not going to be vectorized.
223 	 Instead mark the pattern-stmt that replaces it.  */
224 
225       if (dump_enabled_p ())
226 	dump_printf_loc (MSG_NOTE, vect_location,
227 			 "last stmt in pattern. don't mark"
228 			 " relevant/live.\n");
229       stmt_vec_info old_stmt_info = stmt_info;
230       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234     }
235 
236   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238     STMT_VINFO_RELEVANT (stmt_info) = relevant;
239 
240   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242     {
243       if (dump_enabled_p ())
244         dump_printf_loc (MSG_NOTE, vect_location,
245                          "already marked relevant/live.\n");
246       return;
247     }
248 
249   worklist->safe_push (stmt_info);
250 }
251 
252 
253 /* Function is_simple_and_all_uses_invariant
254 
255    Return true if STMT_INFO is simple and all uses of it are invariant.  */
256 
257 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 				  loop_vec_info loop_vinfo)
260 {
261   tree op;
262   ssa_op_iter iter;
263 
264   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265   if (!stmt)
266     return false;
267 
268   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269     {
270       enum vect_def_type dt = vect_uninitialized_def;
271 
272       if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 	{
274 	  if (dump_enabled_p ())
275 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 			     "use not simple.\n");
277 	  return false;
278 	}
279 
280       if (dt != vect_external_def && dt != vect_constant_def)
281 	return false;
282     }
283   return true;
284 }
285 
286 /* Function vect_stmt_relevant_p.
287 
288    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289    is "relevant for vectorization".
290 
291    A stmt is considered "relevant for vectorization" if:
292    - it has uses outside the loop.
293    - it has vdefs (it alters memory).
294    - control stmts in the loop (except for the exit condition).
295 
296    CHECKME: what other side effects would the vectorizer allow?  */
297 
298 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 		      enum vect_relevant *relevant, bool *live_p)
301 {
302   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303   ssa_op_iter op_iter;
304   imm_use_iterator imm_iter;
305   use_operand_p use_p;
306   def_operand_p def_p;
307 
308   *relevant = vect_unused_in_scope;
309   *live_p = false;
310 
311   /* cond stmt other than loop exit cond.  */
312   if (is_ctrl_stmt (stmt_info->stmt)
313       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314     *relevant = vect_used_in_scope;
315 
316   /* changing memory.  */
317   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318     if (gimple_vdef (stmt_info->stmt)
319 	&& !gimple_clobber_p (stmt_info->stmt))
320       {
321 	if (dump_enabled_p ())
322 	  dump_printf_loc (MSG_NOTE, vect_location,
323                            "vec_stmt_relevant_p: stmt has vdefs.\n");
324 	*relevant = vect_used_in_scope;
325       }
326 
327   /* uses outside the loop.  */
328   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329     {
330       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 	{
332 	  basic_block bb = gimple_bb (USE_STMT (use_p));
333 	  if (!flow_bb_inside_loop_p (loop, bb))
334 	    {
335 	      if (is_gimple_debug (USE_STMT (use_p)))
336 		continue;
337 
338 	      if (dump_enabled_p ())
339 		dump_printf_loc (MSG_NOTE, vect_location,
340                                  "vec_stmt_relevant_p: used out of loop.\n");
341 
342 	      /* We expect all such uses to be in the loop exit phis
343 		 (because of loop closed form)   */
344 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 	      gcc_assert (bb == single_exit (loop)->dest);
346 
347               *live_p = true;
348 	    }
349 	}
350     }
351 
352   if (*live_p && *relevant == vect_unused_in_scope
353       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354     {
355       if (dump_enabled_p ())
356 	dump_printf_loc (MSG_NOTE, vect_location,
357 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358       *relevant = vect_used_only_live;
359     }
360 
361   return (*live_p || *relevant);
362 }
363 
364 
365 /* Function exist_non_indexing_operands_for_use_p
366 
367    USE is one of the uses attached to STMT_INFO.  Check if USE is
368    used in STMT_INFO for anything other than indexing an array.  */
369 
370 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373   tree operand;
374 
375   /* USE corresponds to some operand in STMT.  If there is no data
376      reference in STMT, then any operand that corresponds to USE
377      is not indexing an array.  */
378   if (!STMT_VINFO_DATA_REF (stmt_info))
379     return true;
380 
381   /* STMT has a data_ref. FORNOW this means that its of one of
382      the following forms:
383      -1- ARRAY_REF = var
384      -2- var = ARRAY_REF
385      (This should have been verified in analyze_data_refs).
386 
387      'var' in the second case corresponds to a def, not a use,
388      so USE cannot correspond to any operands that are not used
389      for array indexing.
390 
391      Therefore, all we need to check is if STMT falls into the
392      first case, and whether var corresponds to USE.  */
393 
394   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395   if (!assign || !gimple_assign_copy_p (assign))
396     {
397       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398       if (call && gimple_call_internal_p (call))
399 	{
400 	  internal_fn ifn = gimple_call_internal_fn (call);
401 	  int mask_index = internal_fn_mask_index (ifn);
402 	  if (mask_index >= 0
403 	      && use == gimple_call_arg (call, mask_index))
404 	    return true;
405 	  int stored_value_index = internal_fn_stored_value_index (ifn);
406 	  if (stored_value_index >= 0
407 	      && use == gimple_call_arg (call, stored_value_index))
408 	    return true;
409 	  if (internal_gather_scatter_fn_p (ifn)
410 	      && use == gimple_call_arg (call, 1))
411 	    return true;
412 	}
413       return false;
414     }
415 
416   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417     return false;
418   operand = gimple_assign_rhs1 (assign);
419   if (TREE_CODE (operand) != SSA_NAME)
420     return false;
421 
422   if (operand == use)
423     return true;
424 
425   return false;
426 }
427 
428 
429 /*
430    Function process_use.
431 
432    Inputs:
433    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435      that defined USE.  This is done by calling mark_relevant and passing it
436      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438      be performed.
439 
440    Outputs:
441    Generally, LIVE_P and RELEVANT are used to define the liveness and
442    relevance info of the DEF_STMT of this USE:
443        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445    Exceptions:
446    - case 1: If USE is used only for address computations (e.g. array indexing),
447    which does not need to be directly vectorized, then the liveness/relevance
448    of the respective DEF_STMT is left unchanged.
449    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450    we skip DEF_STMT cause it had already been processed.
451    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452    "relevant" will be modified accordingly.
453 
454    Return true if everything is as expected. Return false otherwise.  */
455 
456 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 	     bool force)
460 {
461   stmt_vec_info dstmt_vinfo;
462   enum vect_def_type dt;
463 
464   /* case 1: we are only interested in uses that need to be vectorized.  Uses
465      that are used for address computation are not considered relevant.  */
466   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467     return opt_result::success ();
468 
469   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470     return opt_result::failure_at (stmt_vinfo->stmt,
471 				   "not vectorized:"
472 				   " unsupported use in stmt.\n");
473 
474   if (!dstmt_vinfo)
475     return opt_result::success ();
476 
477   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478   basic_block bb = gimple_bb (stmt_vinfo->stmt);
479 
480   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481      We have to force the stmt live since the epilogue loop needs it to
482      continue computing the reduction.  */
483   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487       && bb->loop_father == def_bb->loop_father)
488     {
489       if (dump_enabled_p ())
490 	dump_printf_loc (MSG_NOTE, vect_location,
491 			 "reduc-stmt defining reduc-phi in the same nest.\n");
492       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493       return opt_result::success ();
494     }
495 
496   /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 	outer-loop-header-bb:
498 		d = dstmt_vinfo
499 	inner-loop:
500 		stmt # use (d)
501 	outer-loop-tail-bb:
502 		...		  */
503   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504     {
505       if (dump_enabled_p ())
506 	dump_printf_loc (MSG_NOTE, vect_location,
507                          "outer-loop def-stmt defining inner-loop stmt.\n");
508 
509       switch (relevant)
510 	{
511 	case vect_unused_in_scope:
512 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 		      vect_used_in_scope : vect_unused_in_scope;
514 	  break;
515 
516 	case vect_used_in_outer_by_reduction:
517           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 	  relevant = vect_used_by_reduction;
519 	  break;
520 
521 	case vect_used_in_outer:
522           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 	  relevant = vect_used_in_scope;
524 	  break;
525 
526 	case vect_used_in_scope:
527 	  break;
528 
529 	default:
530 	  gcc_unreachable ();
531 	}
532     }
533 
534   /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 	outer-loop-header-bb:
536 		...
537 	inner-loop:
538 		d = dstmt_vinfo
539 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 		stmt # use (d)		*/
541   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542     {
543       if (dump_enabled_p ())
544 	dump_printf_loc (MSG_NOTE, vect_location,
545                          "inner-loop def-stmt defining outer-loop stmt.\n");
546 
547       switch (relevant)
548         {
549         case vect_unused_in_scope:
550           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
553           break;
554 
555         case vect_used_by_reduction:
556 	case vect_used_only_live:
557           relevant = vect_used_in_outer_by_reduction;
558           break;
559 
560         case vect_used_in_scope:
561           relevant = vect_used_in_outer;
562           break;
563 
564         default:
565           gcc_unreachable ();
566         }
567     }
568   /* We are also not interested in uses on loop PHI backedges that are
569      inductions.  Otherwise we'll needlessly vectorize the IV increment
570      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
571      of course.  */
572   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 				      loop_latch_edge (bb->loop_father))
577 	       == use))
578     {
579       if (dump_enabled_p ())
580 	dump_printf_loc (MSG_NOTE, vect_location,
581                          "induction value on backedge.\n");
582       return opt_result::success ();
583     }
584 
585 
586   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587   return opt_result::success ();
588 }
589 
590 
591 /* Function vect_mark_stmts_to_be_vectorized.
592 
593    Not all stmts in the loop need to be vectorized. For example:
594 
595      for i...
596        for j...
597    1.    T0 = i + j
598    2.	 T1 = a[T0]
599 
600    3.    j = j + 1
601 
602    Stmt 1 and 3 do not need to be vectorized, because loop control and
603    addressing of vectorized data-refs are handled differently.
604 
605    This pass detects such stmts.  */
606 
607 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612   unsigned int nbbs = loop->num_nodes;
613   gimple_stmt_iterator si;
614   unsigned int i;
615   basic_block bb;
616   bool live_p;
617   enum vect_relevant relevant;
618 
619   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620 
621   auto_vec<stmt_vec_info, 64> worklist;
622 
623   /* 1. Init worklist.  */
624   for (i = 0; i < nbbs; i++)
625     {
626       bb = bbs[i];
627       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 	{
629 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 	  if (dump_enabled_p ())
631 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 			     phi_info->stmt);
633 
634 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 	}
637       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 	{
639 	  if (is_gimple_debug (gsi_stmt (si)))
640 	    continue;
641 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 	  if (dump_enabled_p ())
643 	      dump_printf_loc (MSG_NOTE, vect_location,
644 			       "init: stmt relevant? %G", stmt_info->stmt);
645 
646 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 	}
649     }
650 
651   /* 2. Process_worklist */
652   while (worklist.length () > 0)
653     {
654       use_operand_p use_p;
655       ssa_op_iter iter;
656 
657       stmt_vec_info stmt_vinfo = worklist.pop ();
658       if (dump_enabled_p ())
659 	dump_printf_loc (MSG_NOTE, vect_location,
660 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661 
662       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 	 of STMT.  */
665       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666 
667       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 	 propagated as is to the DEF_STMTs of its USEs.
669 
670 	 One exception is when STMT has been identified as defining a reduction
671 	 variable; in this case we set the relevance to vect_used_by_reduction.
672 	 This is because we distinguish between two kinds of relevant stmts -
673 	 those that are used by a reduction computation, and those that are
674 	 (also) used by a regular computation.  This allows us later on to
675 	 identify stmts that are used solely by a reduction, and therefore the
676 	 order of the results that they produce does not have to be kept.  */
677 
678       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679         {
680           case vect_reduction_def:
681 	    gcc_assert (relevant != vect_unused_in_scope);
682 	    if (relevant != vect_unused_in_scope
683 		&& relevant != vect_used_in_scope
684 		&& relevant != vect_used_by_reduction
685 		&& relevant != vect_used_only_live)
686 	      return opt_result::failure_at
687 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 	    break;
689 
690           case vect_nested_cycle:
691 	    if (relevant != vect_unused_in_scope
692 		&& relevant != vect_used_in_outer_by_reduction
693 		&& relevant != vect_used_in_outer)
694 	      return opt_result::failure_at
695 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696             break;
697 
698           case vect_double_reduction_def:
699 	    if (relevant != vect_unused_in_scope
700 		&& relevant != vect_used_by_reduction
701 		&& relevant != vect_used_only_live)
702 	      return opt_result::failure_at
703 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704             break;
705 
706           default:
707             break;
708         }
709 
710       if (is_pattern_stmt_p (stmt_vinfo))
711         {
712           /* Pattern statements are not inserted into the code, so
713              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714              have to scan the RHS or function arguments instead.  */
715 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 	    {
717 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 	      tree op = gimple_assign_rhs1 (assign);
719 
720 	      i = 1;
721 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 		{
723 		  opt_result res
724 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 				   loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 				     loop_vinfo, relevant, &worklist, false);
730 		  if (!res)
731 		    return res;
732 		  i = 2;
733 		}
734 	      for (; i < gimple_num_ops (assign); i++)
735 		{
736 		  op = gimple_op (assign, i);
737                   if (TREE_CODE (op) == SSA_NAME)
738 		    {
739 		      opt_result res
740 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 				       &worklist, false);
742 		      if (!res)
743 			return res;
744 		    }
745                  }
746             }
747 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 	    {
749 	      for (i = 0; i < gimple_call_num_args (call); i++)
750 		{
751 		  tree arg = gimple_call_arg (call, i);
752 		  opt_result res
753 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 				   &worklist, false);
755 		  if (!res)
756 		    return res;
757 		}
758 	    }
759         }
760       else
761 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762           {
763             tree op = USE_FROM_PTR (use_p);
764 	    opt_result res
765 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 			     &worklist, false);
767 	    if (!res)
768 	      return res;
769           }
770 
771       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 	{
773 	  gather_scatter_info gs_info;
774 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 	    gcc_unreachable ();
776 	  opt_result res
777 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 			   &worklist, true);
779 	  if (!res)
780 	    {
781 	      if (fatal)
782 		*fatal = false;
783 	      return res;
784 	    }
785 	}
786     } /* while worklist */
787 
788   return opt_result::success ();
789 }
790 
791 /* Function vect_model_simple_cost.
792 
793    Models cost for simple operations, i.e. those that only emit ncopies of a
794    single op.  Right now, this does not account for multiple insns that could
795    be generated for the single vector op.  We will handle that shortly.  */
796 
797 static void
798 vect_model_simple_cost (vec_info *,
799 			stmt_vec_info stmt_info, int ncopies,
800 			enum vect_def_type *dt,
801 			int ndts,
802 			slp_tree node,
803 			stmt_vector_for_cost *cost_vec,
804 			vect_cost_for_stmt kind = vector_stmt)
805 {
806   int inside_cost = 0, prologue_cost = 0;
807 
808   gcc_assert (cost_vec != NULL);
809 
810   /* ???  Somehow we need to fix this at the callers.  */
811   if (node)
812     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813 
814   if (!node)
815     /* Cost the "broadcast" of a scalar operand in to a vector operand.
816        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817        cost model.  */
818     for (int i = 0; i < ndts; i++)
819       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 					   stmt_info, 0, vect_prologue);
822 
823   /* Pass the inside-of-loop statements to the target-specific cost model.  */
824   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 				   stmt_info, 0, vect_body);
826 
827   if (dump_enabled_p ())
828     dump_printf_loc (MSG_NOTE, vect_location,
829                      "vect_model_simple_cost: inside_cost = %d, "
830                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832 
833 
834 /* Model cost for type demotion and promotion operations.  PWR is
835    normally zero for single-step promotions and demotions.  It will be
836    one if two-step promotion/demotion is required, and so on.  NCOPIES
837    is the number of vector results (and thus number of instructions)
838    for the narrowest end of the operation chain.  Each additional
839    step doubles the number of instructions required.  */
840 
841 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 				    enum vect_def_type *dt,
844 				    unsigned int ncopies, int pwr,
845 				    stmt_vector_for_cost *cost_vec)
846 {
847   int i;
848   int inside_cost = 0, prologue_cost = 0;
849 
850   for (i = 0; i < pwr + 1; i++)
851     {
852       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 				       stmt_info, 0, vect_body);
854       ncopies *= 2;
855     }
856 
857   /* FORNOW: Assuming maximum 2 args per stmts.  */
858   for (i = 0; i < 2; i++)
859     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 					 stmt_info, 0, vect_prologue);
862 
863   if (dump_enabled_p ())
864     dump_printf_loc (MSG_NOTE, vect_location,
865                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
866                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868 
869 /* Returns true if the current function returns DECL.  */
870 
871 static bool
cfun_returns(tree decl)872 cfun_returns (tree decl)
873 {
874   edge_iterator ei;
875   edge e;
876   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877     {
878       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879       if (!ret)
880 	continue;
881       if (gimple_return_retval (ret) == decl)
882 	return true;
883       /* We often end up with an aggregate copy to the result decl,
884          handle that case as well.  First skip intermediate clobbers
885 	 though.  */
886       gimple *def = ret;
887       do
888 	{
889 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 	}
891       while (gimple_clobber_p (def));
892       if (is_a <gassign *> (def)
893 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 	  && gimple_assign_rhs1 (def) == decl)
895 	return true;
896     }
897   return false;
898 }
899 
900 /* Function vect_model_store_cost
901 
902    Models cost for stores.  In the case of grouped accesses, one access
903    has the overhead of the grouped access attributed to it.  */
904 
905 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 		       vect_memory_access_type memory_access_type,
908 		       vec_load_store_type vls_type, slp_tree slp_node,
909 		       stmt_vector_for_cost *cost_vec)
910 {
911   unsigned int inside_cost = 0, prologue_cost = 0;
912   stmt_vec_info first_stmt_info = stmt_info;
913   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914 
915   /* ???  Somehow we need to fix this at the callers.  */
916   if (slp_node)
917     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918 
919   if (vls_type == VLS_STORE_INVARIANT)
920     {
921       if (!slp_node)
922 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 					   stmt_info, 0, vect_prologue);
924     }
925 
926   /* Grouped stores update all elements in the group at once,
927      so we want the DR for the first statement.  */
928   if (!slp_node && grouped_access_p)
929     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930 
931   /* True if we should include any once-per-group costs as well as
932      the cost of the statement itself.  For SLP we only get called
933      once per group anyhow.  */
934   bool first_stmt_p = (first_stmt_info == stmt_info);
935 
936   /* We assume that the cost of a single store-lanes instruction is
937      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
938      access is instead being provided by a permute-and-store operation,
939      include the cost of the permutes.  */
940   if (first_stmt_p
941       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942     {
943       /* Uses a high and low interleave or shuffle operations for each
944 	 needed permute.  */
945       int group_size = DR_GROUP_SIZE (first_stmt_info);
946       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 				      stmt_info, 0, vect_body);
949 
950       if (dump_enabled_p ())
951         dump_printf_loc (MSG_NOTE, vect_location,
952                          "vect_model_store_cost: strided group_size = %d .\n",
953                          group_size);
954     }
955 
956   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957   /* Costs of the stores.  */
958   if (memory_access_type == VMAT_ELEMENTWISE
959       || memory_access_type == VMAT_GATHER_SCATTER)
960     {
961       /* N scalar stores plus extracting the elements.  */
962       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963       inside_cost += record_stmt_cost (cost_vec,
964 				       ncopies * assumed_nunits,
965 				       scalar_store, stmt_info, 0, vect_body);
966     }
967   else
968     vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969 
970   if (memory_access_type == VMAT_ELEMENTWISE
971       || memory_access_type == VMAT_STRIDED_SLP)
972     {
973       /* N scalar stores plus extracting the elements.  */
974       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975       inside_cost += record_stmt_cost (cost_vec,
976 				       ncopies * assumed_nunits,
977 				       vec_to_scalar, stmt_info, 0, vect_body);
978     }
979 
980   /* When vectorizing a store into the function result assign
981      a penalty if the function returns in a multi-register location.
982      In this case we assume we'll end up with having to spill the
983      vector result and do piecewise loads as a conservative estimate.  */
984   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985   if (base
986       && (TREE_CODE (base) == RESULT_DECL
987 	  || (DECL_P (base) && cfun_returns (base)))
988       && !aggregate_value_p (base, cfun->decl))
989     {
990       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991       /* ???  Handle PARALLEL in some way.  */
992       if (REG_P (reg))
993 	{
994 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 	  /* Assume that a single reg-reg move is possible and cheap,
996 	     do not account for vector to gp register move cost.  */
997 	  if (nregs > 1)
998 	    {
999 	      /* Spill.  */
1000 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 						 vector_store,
1002 						 stmt_info, 0, vect_epilogue);
1003 	      /* Loads.  */
1004 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 						 scalar_load,
1006 						 stmt_info, 0, vect_epilogue);
1007 	    }
1008 	}
1009     }
1010 
1011   if (dump_enabled_p ())
1012     dump_printf_loc (MSG_NOTE, vect_location,
1013                      "vect_model_store_cost: inside_cost = %d, "
1014                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016 
1017 
1018 /* Calculate cost of DR's memory access.  */
1019 void
vect_get_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 		     unsigned int *inside_cost,
1022 		     stmt_vector_for_cost *body_cost_vec)
1023 {
1024   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025   int alignment_support_scheme
1026     = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027 
1028   switch (alignment_support_scheme)
1029     {
1030     case dr_aligned:
1031       {
1032 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 					  vector_store, stmt_info, 0,
1034 					  vect_body);
1035 
1036         if (dump_enabled_p ())
1037           dump_printf_loc (MSG_NOTE, vect_location,
1038                            "vect_model_store_cost: aligned.\n");
1039         break;
1040       }
1041 
1042     case dr_unaligned_supported:
1043       {
1044         /* Here, we assign an additional cost for the unaligned store.  */
1045 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 					  unaligned_store, stmt_info,
1047 					  DR_MISALIGNMENT (dr_info),
1048 					  vect_body);
1049         if (dump_enabled_p ())
1050           dump_printf_loc (MSG_NOTE, vect_location,
1051                            "vect_model_store_cost: unaligned supported by "
1052                            "hardware.\n");
1053         break;
1054       }
1055 
1056     case dr_unaligned_unsupported:
1057       {
1058         *inside_cost = VECT_MAX_COST;
1059 
1060         if (dump_enabled_p ())
1061           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062                            "vect_model_store_cost: unsupported access.\n");
1063         break;
1064       }
1065 
1066     default:
1067       gcc_unreachable ();
1068     }
1069 }
1070 
1071 
1072 /* Function vect_model_load_cost
1073 
1074    Models cost for loads.  In the case of grouped accesses, one access has
1075    the overhead of the grouped access attributed to it.  Since unaligned
1076    accesses are supported for loads, we also account for the costs of the
1077    access scheme chosen.  */
1078 
1079 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1080 vect_model_load_cost (vec_info *vinfo,
1081 		      stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 		      vect_memory_access_type memory_access_type,
1083 		      slp_tree slp_node,
1084 		      stmt_vector_for_cost *cost_vec)
1085 {
1086   unsigned int inside_cost = 0, prologue_cost = 0;
1087   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088 
1089   gcc_assert (cost_vec);
1090 
1091   /* ???  Somehow we need to fix this at the callers.  */
1092   if (slp_node)
1093     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094 
1095   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096     {
1097       /* If the load is permuted then the alignment is determined by
1098 	 the first group element not by the first scalar stmt DR.  */
1099       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100       /* Record the cost for the permutation.  */
1101       unsigned n_perms, n_loads;
1102       vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 				    vf, true, &n_perms, &n_loads);
1104       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 				       first_stmt_info, 0, vect_body);
1106 
1107       /* And adjust the number of loads performed.  This handles
1108 	 redundancies as well as loads that are later dead.  */
1109       ncopies = n_loads;
1110     }
1111 
1112   /* Grouped loads read all elements in the group at once,
1113      so we want the DR for the first statement.  */
1114   stmt_vec_info first_stmt_info = stmt_info;
1115   if (!slp_node && grouped_access_p)
1116     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117 
1118   /* True if we should include any once-per-group costs as well as
1119      the cost of the statement itself.  For SLP we only get called
1120      once per group anyhow.  */
1121   bool first_stmt_p = (first_stmt_info == stmt_info);
1122 
1123   /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1124      ones we actually need.  Account for the cost of unused results.  */
1125   if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1126     {
1127       unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1128       stmt_vec_info next_stmt_info = first_stmt_info;
1129       do
1130 	{
1131 	  gaps -= 1;
1132 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1133 	}
1134       while (next_stmt_info);
1135       if (gaps)
1136 	{
1137 	  if (dump_enabled_p ())
1138 	    dump_printf_loc (MSG_NOTE, vect_location,
1139 			     "vect_model_load_cost: %d unused vectors.\n",
1140 			     gaps);
1141 	  vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1142 			      &inside_cost, &prologue_cost,
1143 			      cost_vec, cost_vec, true);
1144 	}
1145     }
1146 
1147   /* We assume that the cost of a single load-lanes instruction is
1148      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1149      access is instead being provided by a load-and-permute operation,
1150      include the cost of the permutes.  */
1151   if (first_stmt_p
1152       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1153     {
1154       /* Uses an even and odd extract operations or shuffle operations
1155 	 for each needed permute.  */
1156       int group_size = DR_GROUP_SIZE (first_stmt_info);
1157       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 				       stmt_info, 0, vect_body);
1160 
1161       if (dump_enabled_p ())
1162         dump_printf_loc (MSG_NOTE, vect_location,
1163                          "vect_model_load_cost: strided group_size = %d .\n",
1164                          group_size);
1165     }
1166 
1167   /* The loads themselves.  */
1168   if (memory_access_type == VMAT_ELEMENTWISE
1169       || memory_access_type == VMAT_GATHER_SCATTER)
1170     {
1171       /* N scalar loads plus gathering them into a vector.  */
1172       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174       inside_cost += record_stmt_cost (cost_vec,
1175 				       ncopies * assumed_nunits,
1176 				       scalar_load, stmt_info, 0, vect_body);
1177     }
1178   else
1179     vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 			&inside_cost, &prologue_cost,
1181 			cost_vec, cost_vec, true);
1182   if (memory_access_type == VMAT_ELEMENTWISE
1183       || memory_access_type == VMAT_STRIDED_SLP)
1184     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 				     stmt_info, 0, vect_body);
1186 
1187   if (dump_enabled_p ())
1188     dump_printf_loc (MSG_NOTE, vect_location,
1189                      "vect_model_load_cost: inside_cost = %d, "
1190                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1191 }
1192 
1193 
1194 /* Calculate cost of DR's memory access.  */
1195 void
vect_get_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 		    bool add_realign_cost, unsigned int *inside_cost,
1198 		    unsigned int *prologue_cost,
1199 		    stmt_vector_for_cost *prologue_cost_vec,
1200 		    stmt_vector_for_cost *body_cost_vec,
1201 		    bool record_prologue_costs)
1202 {
1203   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204   int alignment_support_scheme
1205     = vect_supportable_dr_alignment (vinfo, dr_info, false);
1206 
1207   switch (alignment_support_scheme)
1208     {
1209     case dr_aligned:
1210       {
1211 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 					  stmt_info, 0, vect_body);
1213 
1214         if (dump_enabled_p ())
1215           dump_printf_loc (MSG_NOTE, vect_location,
1216                            "vect_model_load_cost: aligned.\n");
1217 
1218         break;
1219       }
1220     case dr_unaligned_supported:
1221       {
1222         /* Here, we assign an additional cost for the unaligned load.  */
1223 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 					  unaligned_load, stmt_info,
1225 					  DR_MISALIGNMENT (dr_info),
1226 					  vect_body);
1227 
1228         if (dump_enabled_p ())
1229           dump_printf_loc (MSG_NOTE, vect_location,
1230                            "vect_model_load_cost: unaligned supported by "
1231                            "hardware.\n");
1232 
1233         break;
1234       }
1235     case dr_explicit_realign:
1236       {
1237 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 					  vector_load, stmt_info, 0, vect_body);
1239 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 					  vec_perm, stmt_info, 0, vect_body);
1241 
1242         /* FIXME: If the misalignment remains fixed across the iterations of
1243            the containing loop, the following cost should be added to the
1244            prologue costs.  */
1245         if (targetm.vectorize.builtin_mask_for_load)
1246 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 					    stmt_info, 0, vect_body);
1248 
1249         if (dump_enabled_p ())
1250           dump_printf_loc (MSG_NOTE, vect_location,
1251                            "vect_model_load_cost: explicit realign\n");
1252 
1253         break;
1254       }
1255     case dr_explicit_realign_optimized:
1256       {
1257         if (dump_enabled_p ())
1258           dump_printf_loc (MSG_NOTE, vect_location,
1259                            "vect_model_load_cost: unaligned software "
1260                            "pipelined.\n");
1261 
1262         /* Unaligned software pipeline has a load of an address, an initial
1263            load, and possibly a mask operation to "prime" the loop.  However,
1264            if this is an access in a group of loads, which provide grouped
1265            access, then the above cost should only be considered for one
1266            access in the group.  Inside the loop, there is a load op
1267            and a realignment op.  */
1268 
1269         if (add_realign_cost && record_prologue_costs)
1270           {
1271 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 						vector_stmt, stmt_info,
1273 						0, vect_prologue);
1274             if (targetm.vectorize.builtin_mask_for_load)
1275 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 						  vector_stmt, stmt_info,
1277 						  0, vect_prologue);
1278           }
1279 
1280 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 					  stmt_info, 0, vect_body);
1282 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 					  stmt_info, 0, vect_body);
1284 
1285         if (dump_enabled_p ())
1286           dump_printf_loc (MSG_NOTE, vect_location,
1287                            "vect_model_load_cost: explicit realign optimized"
1288                            "\n");
1289 
1290         break;
1291       }
1292 
1293     case dr_unaligned_unsupported:
1294       {
1295         *inside_cost = VECT_MAX_COST;
1296 
1297         if (dump_enabled_p ())
1298           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299                            "vect_model_load_cost: unsupported access.\n");
1300         break;
1301       }
1302 
1303     default:
1304       gcc_unreachable ();
1305     }
1306 }
1307 
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309    the loop preheader for the vectorized stmt STMT_VINFO.  */
1310 
1311 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 		    gimple_stmt_iterator *gsi)
1314 {
1315   if (gsi)
1316     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317   else
1318     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1319 
1320   if (dump_enabled_p ())
1321     dump_printf_loc (MSG_NOTE, vect_location,
1322 		     "created new init_stmt: %G", new_stmt);
1323 }
1324 
1325 /* Function vect_init_vector.
1326 
1327    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1329    vector type a vector with all elements equal to VAL is created first.
1330    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1331    initialization at the loop preheader.
1332    Return the DEF of INIT_STMT.
1333    It will be used in the vectorization of STMT_INFO.  */
1334 
1335 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 		  gimple_stmt_iterator *gsi)
1338 {
1339   gimple *init_stmt;
1340   tree new_temp;
1341 
1342   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1343   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1344     {
1345       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1347 	{
1348 	  /* Scalar boolean value should be transformed into
1349 	     all zeros or all ones value before building a vector.  */
1350 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1351 	    {
1352 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1354 
1355 	      if (CONSTANT_CLASS_P (val))
1356 		val = integer_zerop (val) ? false_val : true_val;
1357 	      else
1358 		{
1359 		  new_temp = make_ssa_name (TREE_TYPE (type));
1360 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 						   val, true_val, false_val);
1362 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 		  val = new_temp;
1364 		}
1365 	    }
1366 	  else
1367 	    {
1368 	      gimple_seq stmts = NULL;
1369 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 				    TREE_TYPE (type), val);
1372 	      else
1373 		/* ???  Condition vectorization expects us to do
1374 		   promotion of invariant/external defs.  */
1375 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 		   !gsi_end_p (gsi2); )
1378 		{
1379 		  init_stmt = gsi_stmt (gsi2);
1380 		  gsi_remove (&gsi2, false);
1381 		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1382 		}
1383 	    }
1384 	}
1385       val = build_vector_from_val (type, val);
1386     }
1387 
1388   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389   init_stmt = gimple_build_assign (new_temp, val);
1390   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391   return new_temp;
1392 }
1393 
1394 
1395 /* Function vect_get_vec_defs_for_operand.
1396 
1397    OP is an operand in STMT_VINFO.  This function returns a vector of
1398    NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1399 
1400    In the case that OP is an SSA_NAME which is defined in the loop, then
1401    STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1402 
1403    In case OP is an invariant or constant, a new stmt that creates a vector def
1404    needs to be introduced.  VECTYPE may be used to specify a required type for
1405    vector invariant.  */
1406 
1407 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 			       unsigned ncopies,
1410 			       tree op, vec<tree> *vec_oprnds, tree vectype)
1411 {
1412   gimple *def_stmt;
1413   enum vect_def_type dt;
1414   bool is_simple_use;
1415   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1416 
1417   if (dump_enabled_p ())
1418     dump_printf_loc (MSG_NOTE, vect_location,
1419 		     "vect_get_vec_defs_for_operand: %T\n", op);
1420 
1421   stmt_vec_info def_stmt_info;
1422   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 				      &def_stmt_info, &def_stmt);
1424   gcc_assert (is_simple_use);
1425   if (def_stmt && dump_enabled_p ())
1426     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1427 
1428   vec_oprnds->create (ncopies);
1429   if (dt == vect_constant_def || dt == vect_external_def)
1430     {
1431       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432       tree vector_type;
1433 
1434       if (vectype)
1435 	vector_type = vectype;
1436       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 	vector_type = truth_type_for (stmt_vectype);
1439       else
1440 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1441 
1442       gcc_assert (vector_type);
1443       tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444       while (ncopies--)
1445 	vec_oprnds->quick_push (vop);
1446     }
1447   else
1448     {
1449       def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450       gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451       for (unsigned i = 0; i < ncopies; ++i)
1452 	vec_oprnds->quick_push (gimple_get_lhs
1453 				  (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1454     }
1455 }
1456 
1457 
1458 /* Get vectorized definitions for OP0 and OP1.  */
1459 
1460 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 		   unsigned ncopies,
1463 		   tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 		   tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 		   tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 		   tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1467 {
1468   if (slp_node)
1469     {
1470       if (op0)
1471 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472       if (op1)
1473 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474       if (op2)
1475 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476       if (op3)
1477 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1478     }
1479   else
1480     {
1481       if (op0)
1482 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 				       op0, vec_oprnds0, vectype0);
1484       if (op1)
1485 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 				       op1, vec_oprnds1, vectype1);
1487       if (op2)
1488 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 				       op2, vec_oprnds2, vectype2);
1490       if (op3)
1491 	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 				       op3, vec_oprnds3, vectype3);
1493     }
1494 }
1495 
1496 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 		   unsigned ncopies,
1499 		   tree op0, vec<tree> *vec_oprnds0,
1500 		   tree op1, vec<tree> *vec_oprnds1,
1501 		   tree op2, vec<tree> *vec_oprnds2,
1502 		   tree op3, vec<tree> *vec_oprnds3)
1503 {
1504   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 		     op0, vec_oprnds0, NULL_TREE,
1506 		     op1, vec_oprnds1, NULL_TREE,
1507 		     op2, vec_oprnds2, NULL_TREE,
1508 		     op3, vec_oprnds3, NULL_TREE);
1509 }
1510 
1511 /* Helper function called by vect_finish_replace_stmt and
1512    vect_finish_stmt_generation.  Set the location of the new
1513    statement and create and return a stmt_vec_info for it.  */
1514 
1515 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1516 vect_finish_stmt_generation_1 (vec_info *,
1517 			       stmt_vec_info stmt_info, gimple *vec_stmt)
1518 {
1519   if (dump_enabled_p ())
1520     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1521 
1522   if (stmt_info)
1523     {
1524       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1525 
1526       /* While EH edges will generally prevent vectorization, stmt might
1527 	 e.g. be in a must-not-throw region.  Ensure newly created stmts
1528 	 that could throw are part of the same region.  */
1529       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 	add_stmt_to_eh_lp (vec_stmt, lp_nr);
1532     }
1533   else
1534     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1535 }
1536 
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538    which sets the same scalar result as STMT_INFO did.  Create and return a
1539    stmt_vec_info for VEC_STMT.  */
1540 
1541 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 			  stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1547 
1548   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549   gsi_replace (&gsi, vec_stmt, true);
1550 
1551   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1552 }
1553 
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1556 
1557 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 			     stmt_vec_info stmt_info, gimple *vec_stmt,
1560 			     gimple_stmt_iterator *gsi)
1561 {
1562   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1563 
1564   if (!gsi_end_p (*gsi)
1565       && gimple_has_mem_ops (vec_stmt))
1566     {
1567       gimple *at_stmt = gsi_stmt (*gsi);
1568       tree vuse = gimple_vuse (at_stmt);
1569       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1570 	{
1571 	  tree vdef = gimple_vdef (at_stmt);
1572 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 	  gimple_set_modified (vec_stmt, true);
1574 	  /* If we have an SSA vuse and insert a store, update virtual
1575 	     SSA form to avoid triggering the renamer.  Do so only
1576 	     if we can easily see all uses - which is what almost always
1577 	     happens with the way vectorized stmts are inserted.  */
1578 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 	      && ((is_gimple_assign (vec_stmt)
1580 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 		  || (is_gimple_call (vec_stmt)
1582 		      && !(gimple_call_flags (vec_stmt)
1583 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1584 	    {
1585 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 	      gimple_set_vdef (vec_stmt, new_vdef);
1587 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1588 	    }
1589 	}
1590     }
1591   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1593 }
1594 
1595 /* We want to vectorize a call to combined function CFN with function
1596    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597    as the types of all inputs.  Check whether this is possible using
1598    an internal function, returning its code if so or IFN_LAST if not.  */
1599 
1600 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 				tree vectype_out, tree vectype_in)
1603 {
1604   internal_fn ifn;
1605   if (internal_fn_p (cfn))
1606     ifn = as_internal_fn (cfn);
1607   else
1608     ifn = associated_internal_fn (fndecl);
1609   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1610     {
1611       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612       if (info.vectorizable)
1613 	{
1614 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 					      OPTIMIZE_FOR_SPEED))
1618 	    return ifn;
1619 	}
1620     }
1621   return IFN_LAST;
1622 }
1623 
1624 
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 				  gimple_stmt_iterator *);
1627 
1628 /* Check whether a load or store statement in the loop described by
1629    LOOP_VINFO is possible in a loop using partial vectors.  This is
1630    testing whether the vectorizer pass has the appropriate support,
1631    as well as whether the target does.
1632 
1633    VLS_TYPE says whether the statement is a load or store and VECTYPE
1634    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1635    says how the load or store is going to be implemented and GROUP_SIZE
1636    is the number of load or store statements in the containing group.
1637    If the access is a gather load or scatter store, GS_INFO describes
1638    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1639    condition under which it occurs.
1640 
1641    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642    vectors is not supported, otherwise record the required rgroup control
1643    types.  */
1644 
1645 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 				      vec_load_store_type vls_type,
1648 				      int group_size,
1649 				      vect_memory_access_type
1650 				      memory_access_type,
1651 				      gather_scatter_info *gs_info,
1652 				      tree scalar_mask)
1653 {
1654   /* Invariant loads need no special support.  */
1655   if (memory_access_type == VMAT_INVARIANT)
1656     return;
1657 
1658   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659   machine_mode vecmode = TYPE_MODE (vectype);
1660   bool is_load = (vls_type == VLS_LOAD);
1661   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1662     {
1663       if (is_load
1664 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1665 	  : !vect_store_lanes_supported (vectype, group_size, true))
1666 	{
1667 	  if (dump_enabled_p ())
1668 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 			     "can't operate on partial vectors because"
1670 			     " the target doesn't have an appropriate"
1671 			     " load/store-lanes instruction.\n");
1672 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 	  return;
1674 	}
1675       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677       return;
1678     }
1679 
1680   if (memory_access_type == VMAT_GATHER_SCATTER)
1681     {
1682       internal_fn ifn = (is_load
1683 			 ? IFN_MASK_GATHER_LOAD
1684 			 : IFN_MASK_SCATTER_STORE);
1685       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 						   gs_info->memory_type,
1687 						   gs_info->offset_vectype,
1688 						   gs_info->scale))
1689 	{
1690 	  if (dump_enabled_p ())
1691 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 			     "can't operate on partial vectors because"
1693 			     " the target doesn't have an appropriate"
1694 			     " gather load or scatter store instruction.\n");
1695 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 	  return;
1697 	}
1698       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700       return;
1701     }
1702 
1703   if (memory_access_type != VMAT_CONTIGUOUS
1704       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1705     {
1706       /* Element X of the data must come from iteration i * VF + X of the
1707 	 scalar loop.  We need more work to support other mappings.  */
1708       if (dump_enabled_p ())
1709 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 			 "can't operate on partial vectors because an"
1711 			 " access isn't contiguous.\n");
1712       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713       return;
1714     }
1715 
1716   if (!VECTOR_MODE_P (vecmode))
1717     {
1718       if (dump_enabled_p ())
1719 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 			 "can't operate on partial vectors when emulating"
1721 			 " vector operations.\n");
1722       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723       return;
1724     }
1725 
1726   /* We might load more scalars than we need for permuting SLP loads.
1727      We checked in get_group_load_store_type that the extra elements
1728      don't leak into a new vector.  */
1729   auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1730   {
1731     unsigned int nvectors;
1732     if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733       return nvectors;
1734     gcc_unreachable ();
1735   };
1736 
1737   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739   machine_mode mask_mode;
1740   bool using_partial_vectors_p = false;
1741   if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742       && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1743     {
1744       unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746       using_partial_vectors_p = true;
1747     }
1748 
1749   machine_mode vmode;
1750   if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1751     {
1752       unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756       using_partial_vectors_p = true;
1757     }
1758 
1759   if (!using_partial_vectors_p)
1760     {
1761       if (dump_enabled_p ())
1762 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 			 "can't operate on partial vectors because the"
1764 			 " target doesn't have the appropriate partial"
1765 			 " vectorization load or store.\n");
1766       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1767     }
1768 }
1769 
1770 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1771    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772    that needs to be applied to all loads and stores in a vectorized loop.
1773    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1774 
1775    MASK_TYPE is the type of both masks.  If new statements are needed,
1776    insert them before GSI.  */
1777 
1778 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 			 gimple_stmt_iterator *gsi)
1781 {
1782   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783   if (!loop_mask)
1784     return vec_mask;
1785 
1786   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 					  vec_mask, loop_mask);
1790   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791   return and_res;
1792 }
1793 
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795    strided load or store STMT_INFO by truncating the current offset to a
1796    smaller width.  We need to be able to construct an offset vector:
1797 
1798      { 0, X, X*2, X*3, ... }
1799 
1800    without loss of precision, where X is STMT_INFO's DR_STEP.
1801 
1802    Return true if this is possible, describing the gather load or scatter
1803    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1804 
1805 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 				     loop_vec_info loop_vinfo, bool masked_p,
1808 				     gather_scatter_info *gs_info)
1809 {
1810   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811   data_reference *dr = dr_info->dr;
1812   tree step = DR_STEP (dr);
1813   if (TREE_CODE (step) != INTEGER_CST)
1814     {
1815       /* ??? Perhaps we could use range information here?  */
1816       if (dump_enabled_p ())
1817 	dump_printf_loc (MSG_NOTE, vect_location,
1818 			 "cannot truncate variable step.\n");
1819       return false;
1820     }
1821 
1822   /* Get the number of bits in an element.  */
1823   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1826 
1827   /* Set COUNT to the upper limit on the number of elements - 1.
1828      Start with the maximum vectorization factor.  */
1829   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1830 
1831   /* Try lowering COUNT to the number of scalar latch iterations.  */
1832   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833   widest_int max_iters;
1834   if (max_loop_iterations (loop, &max_iters)
1835       && max_iters < count)
1836     count = max_iters.to_shwi ();
1837 
1838   /* Try scales of 1 and the element size.  */
1839   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840   wi::overflow_type overflow = wi::OVF_NONE;
1841   for (int i = 0; i < 2; ++i)
1842     {
1843       int scale = scales[i];
1844       widest_int factor;
1845       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 	continue;
1847 
1848       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
1849       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850       if (overflow)
1851 	continue;
1852       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853       unsigned int min_offset_bits = wi::min_precision (range, sign);
1854 
1855       /* Find the narrowest viable offset type.  */
1856       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857       tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 							 sign == UNSIGNED);
1859 
1860       /* See whether the target supports the operation with an offset
1861 	 no narrower than OFFSET_TYPE.  */
1862       tree memory_type = TREE_TYPE (DR_REF (dr));
1863       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 				     vectype, memory_type, offset_type, scale,
1865 				     &gs_info->ifn, &gs_info->offset_vectype))
1866 	continue;
1867 
1868       gs_info->decl = NULL_TREE;
1869       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 	 but we don't need to store that here.  */
1871       gs_info->base = NULL_TREE;
1872       gs_info->element_type = TREE_TYPE (vectype);
1873       gs_info->offset = fold_convert (offset_type, step);
1874       gs_info->offset_dt = vect_constant_def;
1875       gs_info->scale = scale;
1876       gs_info->memory_type = memory_type;
1877       return true;
1878     }
1879 
1880   if (overflow && dump_enabled_p ())
1881     dump_printf_loc (MSG_NOTE, vect_location,
1882 		     "truncating gather/scatter offset to %d bits"
1883 		     " might change its value.\n", element_bits);
1884 
1885   return false;
1886 }
1887 
1888 /* Return true if we can use gather/scatter internal functions to
1889    vectorize STMT_INFO, which is a grouped or strided load or store.
1890    MASKED_P is true if load or store is conditional.  When returning
1891    true, fill in GS_INFO with the information required to perform the
1892    operation.  */
1893 
1894 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 				    loop_vec_info loop_vinfo, bool masked_p,
1897 				    gather_scatter_info *gs_info)
1898 {
1899   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900       || gs_info->decl)
1901     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 						masked_p, gs_info);
1903 
1904   tree old_offset_type = TREE_TYPE (gs_info->offset);
1905   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1906 
1907   gcc_assert (TYPE_PRECISION (new_offset_type)
1908 	      >= TYPE_PRECISION (old_offset_type));
1909   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1910 
1911   if (dump_enabled_p ())
1912     dump_printf_loc (MSG_NOTE, vect_location,
1913 		     "using gather/scatter for strided/grouped access,"
1914 		     " scale = %d\n", gs_info->scale);
1915 
1916   return true;
1917 }
1918 
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920    elements with a known constant step.  Return -1 if that step
1921    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1922 
1923 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1925 {
1926   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 			       size_zero_node);
1929 }
1930 
1931 /* If the target supports a permute mask that reverses the elements in
1932    a vector of type VECTYPE, return that mask, otherwise return null.  */
1933 
1934 static tree
perm_mask_for_reverse(tree vectype)1935 perm_mask_for_reverse (tree vectype)
1936 {
1937   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1938 
1939   /* The encoding has a single stepped pattern.  */
1940   vec_perm_builder sel (nunits, 1, 3);
1941   for (int i = 0; i < 3; ++i)
1942     sel.quick_push (nunits - 1 - i);
1943 
1944   vec_perm_indices indices (sel, 1, nunits);
1945   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946     return NULL_TREE;
1947   return vect_gen_perm_mask_checked (vectype, indices);
1948 }
1949 
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951    arguments.  Handle the case where STMT_INFO is a load or store that
1952    accesses consecutive elements with a negative step.  */
1953 
1954 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)1955 get_negative_load_store_type (vec_info *vinfo,
1956 			      stmt_vec_info stmt_info, tree vectype,
1957 			      vec_load_store_type vls_type,
1958 			      unsigned int ncopies)
1959 {
1960   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961   dr_alignment_support alignment_support_scheme;
1962 
1963   if (ncopies > 1)
1964     {
1965       if (dump_enabled_p ())
1966 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 			 "multiple types with negative step.\n");
1968       return VMAT_ELEMENTWISE;
1969     }
1970 
1971   alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 							    dr_info, false);
1973   if (alignment_support_scheme != dr_aligned
1974       && alignment_support_scheme != dr_unaligned_supported)
1975     {
1976       if (dump_enabled_p ())
1977 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 			 "negative step but alignment required.\n");
1979       return VMAT_ELEMENTWISE;
1980     }
1981 
1982   if (vls_type == VLS_STORE_INVARIANT)
1983     {
1984       if (dump_enabled_p ())
1985 	dump_printf_loc (MSG_NOTE, vect_location,
1986 			 "negative step with invariant source;"
1987 			 " no permute needed.\n");
1988       return VMAT_CONTIGUOUS_DOWN;
1989     }
1990 
1991   if (!perm_mask_for_reverse (vectype))
1992     {
1993       if (dump_enabled_p ())
1994 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 			 "negative step and reversing not supported.\n");
1996       return VMAT_ELEMENTWISE;
1997     }
1998 
1999   return VMAT_CONTIGUOUS_REVERSE;
2000 }
2001 
2002 /* STMT_INFO is either a masked or unconditional store.  Return the value
2003    being stored.  */
2004 
2005 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2007 {
2008   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2009     {
2010       gcc_assert (gimple_assign_single_p (assign));
2011       return gimple_assign_rhs1 (assign);
2012     }
2013   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2014     {
2015       internal_fn ifn = gimple_call_internal_fn (call);
2016       int index = internal_fn_stored_value_index (ifn);
2017       gcc_assert (index >= 0);
2018       return gimple_call_arg (call, index);
2019     }
2020   gcc_unreachable ();
2021 }
2022 
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2024 
2025    This function returns a vector type which can be composed with NETLS pieces,
2026    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2027    same vector size as the return vector.  It checks target whether supports
2028    pieces-size vector mode for construction firstly, if target fails to, check
2029    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2030    fails to find the available composition.
2031 
2032    For example, for (vtype=V16QI, nelts=4), we can probably get:
2033      - V16QI with PTYPE V4QI.
2034      - V4SI with PTYPE SI.
2035      - NULL_TREE.  */
2036 
2037 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2039 {
2040   gcc_assert (VECTOR_TYPE_P (vtype));
2041   gcc_assert (known_gt (nelts, 0U));
2042 
2043   machine_mode vmode = TYPE_MODE (vtype);
2044   if (!VECTOR_MODE_P (vmode))
2045     return NULL_TREE;
2046 
2047   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048   unsigned int pbsize;
2049   if (constant_multiple_p (vbsize, nelts, &pbsize))
2050     {
2051       /* First check if vec_init optab supports construction from
2052 	 vector pieces directly.  */
2053       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055       machine_mode rmode;
2056       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 	      != CODE_FOR_nothing))
2059 	{
2060 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 	  return vtype;
2062 	}
2063 
2064       /* Otherwise check if exists an integer type of the same piece size and
2065 	 if vec_init optab supports construction from it directly.  */
2066       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 	      != CODE_FOR_nothing))
2070 	{
2071 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 	  return build_vector_type (*ptype, nelts);
2073 	}
2074     }
2075 
2076   return NULL_TREE;
2077 }
2078 
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080    arguments.  Handle the case where STMT_INFO is part of a grouped load
2081    or store.
2082 
2083    For stores, the statements in the group are all consecutive
2084    and there is no gap at the end.  For loads, the statements in the
2085    group might not be consecutive; there can be gaps between statements
2086    as well as at the end.  */
2087 
2088 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,dr_alignment_support * alignment_support_scheme,gather_scatter_info * gs_info)2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 			   tree vectype, slp_tree slp_node,
2091 			   bool masked_p, vec_load_store_type vls_type,
2092 			   vect_memory_access_type *memory_access_type,
2093 			   dr_alignment_support *alignment_support_scheme,
2094 			   gather_scatter_info *gs_info)
2095 {
2096   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101   bool single_element_p = (stmt_info == first_stmt_info
2102 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2105 
2106   /* True if the vectorized statements would access beyond the last
2107      statement in the group.  */
2108   bool overrun_p = false;
2109 
2110   /* True if we can cope with such overrun by peeling for gaps, so that
2111      there is at least one final scalar iteration after the vector loop.  */
2112   bool can_overrun_p = (!masked_p
2113 			&& vls_type == VLS_LOAD
2114 			&& loop_vinfo
2115 			&& !loop->inner);
2116 
2117   /* There can only be a gap at the end of the group if the stride is
2118      known at compile time.  */
2119   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2120 
2121   /* Stores can't yet have gaps.  */
2122   gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2123 
2124   if (slp_node)
2125     {
2126       /* For SLP vectorization we directly vectorize a subchain
2127 	 without permutation.  */
2128       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 	first_dr_info
2130 	  = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2132 	{
2133 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 	     separated by the stride, until we have a complete vector.
2135 	     Fall back to scalar accesses if that isn't possible.  */
2136 	  if (multiple_p (nunits, group_size))
2137 	    *memory_access_type = VMAT_STRIDED_SLP;
2138 	  else
2139 	    *memory_access_type = VMAT_ELEMENTWISE;
2140 	}
2141       else
2142 	{
2143 	  overrun_p = loop_vinfo && gap != 0;
2144 	  if (overrun_p && vls_type != VLS_LOAD)
2145 	    {
2146 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 			       "Grouped store with gaps requires"
2148 			       " non-consecutive accesses\n");
2149 	      return false;
2150 	    }
2151 	  /* An overrun is fine if the trailing elements are smaller
2152 	     than the alignment boundary B.  Every vector access will
2153 	     be a multiple of B and so we are guaranteed to access a
2154 	     non-gap element in the same B-sized block.  */
2155 	  if (overrun_p
2156 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 			/ vect_get_scalar_dr_size (first_dr_info)))
2158 	    overrun_p = false;
2159 
2160 	  /* If the gap splits the vector in half and the target
2161 	     can do half-vector operations avoid the epilogue peeling
2162 	     by simply loading half of the vector only.  Usually
2163 	     the construction with an upper zero half will be elided.  */
2164 	  dr_alignment_support alignment_support_scheme;
2165 	  tree half_vtype;
2166 	  if (overrun_p
2167 	      && !masked_p
2168 	      && (((alignment_support_scheme
2169 		      = vect_supportable_dr_alignment (vinfo,
2170 						       first_dr_info, false)))
2171 		   == dr_aligned
2172 		  || alignment_support_scheme == dr_unaligned_supported)
2173 	      && known_eq (nunits, (group_size - gap) * 2)
2174 	      && known_eq (nunits, group_size)
2175 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 		  != NULL_TREE))
2177 	    overrun_p = false;
2178 
2179 	  if (overrun_p && !can_overrun_p)
2180 	    {
2181 	      if (dump_enabled_p ())
2182 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 				 "Peeling for outer loop is not supported\n");
2184 	      return false;
2185 	    }
2186 	  int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 	  if (cmp < 0)
2188 	    {
2189 	      if (single_element_p)
2190 		/* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
2191 		   only correct for single element "interleaving" SLP.  */
2192 		*memory_access_type = get_negative_load_store_type
2193 				       (vinfo, stmt_info, vectype, vls_type, 1);
2194 	      else
2195 		{
2196 		  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 		     separated by the stride, until we have a complete vector.
2198 		     Fall back to scalar accesses if that isn't possible.  */
2199 		  if (multiple_p (nunits, group_size))
2200 		    *memory_access_type = VMAT_STRIDED_SLP;
2201 		  else
2202 		    *memory_access_type = VMAT_ELEMENTWISE;
2203 		}
2204 	    }
2205 	  else
2206 	    {
2207 	      gcc_assert (!loop_vinfo || cmp > 0);
2208 	      *memory_access_type = VMAT_CONTIGUOUS;
2209 	    }
2210 	}
2211     }
2212   else
2213     {
2214       /* We can always handle this case using elementwise accesses,
2215 	 but see if something more efficient is available.  */
2216       *memory_access_type = VMAT_ELEMENTWISE;
2217 
2218       /* If there is a gap at the end of the group then these optimizations
2219 	 would access excess elements in the last iteration.  */
2220       bool would_overrun_p = (gap != 0);
2221       /* An overrun is fine if the trailing elements are smaller than the
2222 	 alignment boundary B.  Every vector access will be a multiple of B
2223 	 and so we are guaranteed to access a non-gap element in the
2224 	 same B-sized block.  */
2225       if (would_overrun_p
2226 	  && !masked_p
2227 	  && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 		    / vect_get_scalar_dr_size (first_dr_info)))
2229 	would_overrun_p = false;
2230 
2231       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 	  && (can_overrun_p || !would_overrun_p)
2233 	  && compare_step_with_zero (vinfo, stmt_info) > 0)
2234 	{
2235 	  /* First cope with the degenerate case of a single-element
2236 	     vector.  */
2237 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2238 	    ;
2239 
2240 	  /* Otherwise try using LOAD/STORE_LANES.  */
2241 	  else if (vls_type == VLS_LOAD
2242 		   ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 		   : vect_store_lanes_supported (vectype, group_size,
2244 						 masked_p))
2245 	    {
2246 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2247 	      overrun_p = would_overrun_p;
2248 	    }
2249 
2250 	  /* If that fails, try using permuting loads.  */
2251 	  else if (vls_type == VLS_LOAD
2252 		   ? vect_grouped_load_supported (vectype, single_element_p,
2253 						  group_size)
2254 		   : vect_grouped_store_supported (vectype, group_size))
2255 	    {
2256 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2257 	      overrun_p = would_overrun_p;
2258 	    }
2259 	}
2260 
2261       /* As a last resort, trying using a gather load or scatter store.
2262 
2263 	 ??? Although the code can handle all group sizes correctly,
2264 	 it probably isn't a win to use separate strided accesses based
2265 	 on nearby locations.  Or, even if it's a win over scalar code,
2266 	 it might not be a win over vectorizing at a lower VF, if that
2267 	 allows us to use contiguous accesses.  */
2268       if (*memory_access_type == VMAT_ELEMENTWISE
2269 	  && single_element_p
2270 	  && loop_vinfo
2271 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2272 						 masked_p, gs_info))
2273 	*memory_access_type = VMAT_GATHER_SCATTER;
2274     }
2275 
2276   if (*memory_access_type == VMAT_GATHER_SCATTER
2277       || *memory_access_type == VMAT_ELEMENTWISE)
2278     *alignment_support_scheme = dr_unaligned_supported;
2279   else
2280     *alignment_support_scheme
2281       = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2282 
2283   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2284     {
2285       /* STMT is the leader of the group. Check the operands of all the
2286 	 stmts of the group.  */
2287       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288       while (next_stmt_info)
2289 	{
2290 	  tree op = vect_get_store_rhs (next_stmt_info);
2291 	  enum vect_def_type dt;
2292 	  if (!vect_is_simple_use (op, vinfo, &dt))
2293 	    {
2294 	      if (dump_enabled_p ())
2295 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 				 "use not simple.\n");
2297 	      return false;
2298 	    }
2299 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2300 	}
2301     }
2302 
2303   if (overrun_p)
2304     {
2305       gcc_assert (can_overrun_p);
2306       if (dump_enabled_p ())
2307 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 			 "Data access with gaps requires scalar "
2309 			 "epilogue loop\n");
2310       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2311     }
2312 
2313   return true;
2314 }
2315 
2316 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2317    if there is a memory access type that the vectorized form can use,
2318    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2319    or scatters, fill in GS_INFO accordingly.  In addition
2320    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2321    the target does not support the alignment scheme.
2322 
2323    SLP says whether we're performing SLP rather than loop vectorization.
2324    MASKED_P is true if the statement is conditional on a vectorized mask.
2325    VECTYPE is the vector type that the vectorized statements will use.
2326    NCOPIES is the number of vector statements that will be needed.  */
2327 
2328 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,dr_alignment_support * alignment_support_scheme,gather_scatter_info * gs_info)2329 get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
2330 		     tree vectype, slp_tree slp_node,
2331 		     bool masked_p, vec_load_store_type vls_type,
2332 		     unsigned int ncopies,
2333 		     vect_memory_access_type *memory_access_type,
2334 		     dr_alignment_support *alignment_support_scheme,
2335 		     gather_scatter_info *gs_info)
2336 {
2337   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2338   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2339   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2340     {
2341       *memory_access_type = VMAT_GATHER_SCATTER;
2342       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2343 	gcc_unreachable ();
2344       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2345 				    &gs_info->offset_dt,
2346 				    &gs_info->offset_vectype))
2347 	{
2348 	  if (dump_enabled_p ())
2349 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 			     "%s index use not simple.\n",
2351 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2352 	  return false;
2353 	}
2354       /* Gather-scatter accesses perform only component accesses, alignment
2355 	 is irrelevant for them.  */
2356       *alignment_support_scheme = dr_unaligned_supported;
2357     }
2358   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2359     {
2360       if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2361 				      masked_p,
2362 				      vls_type, memory_access_type,
2363 				      alignment_support_scheme, gs_info))
2364 	return false;
2365     }
2366   else if (STMT_VINFO_STRIDED_P (stmt_info))
2367     {
2368       gcc_assert (!slp_node);
2369       if (loop_vinfo
2370 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 						 masked_p, gs_info))
2372 	*memory_access_type = VMAT_GATHER_SCATTER;
2373       else
2374 	*memory_access_type = VMAT_ELEMENTWISE;
2375       /* Alignment is irrelevant here.  */
2376       *alignment_support_scheme = dr_unaligned_supported;
2377     }
2378   else
2379     {
2380       int cmp = compare_step_with_zero (vinfo, stmt_info);
2381       if (cmp == 0)
2382 	{
2383 	  gcc_assert (vls_type == VLS_LOAD);
2384 	  *memory_access_type = VMAT_INVARIANT;
2385 	  /* Invariant accesses perform only component accesses, alignment
2386 	     is irrelevant for them.  */
2387 	  *alignment_support_scheme = dr_unaligned_supported;
2388 	}
2389       else
2390 	{
2391 	  if (cmp < 0)
2392 	    *memory_access_type = get_negative_load_store_type
2393 	       (vinfo, stmt_info, vectype, vls_type, ncopies);
2394 	  else
2395 	    *memory_access_type = VMAT_CONTIGUOUS;
2396 	  *alignment_support_scheme
2397 	    = vect_supportable_dr_alignment (vinfo,
2398 					     STMT_VINFO_DR_INFO (stmt_info),
2399 					     false);
2400 	}
2401     }
2402 
2403   if ((*memory_access_type == VMAT_ELEMENTWISE
2404        || *memory_access_type == VMAT_STRIDED_SLP)
2405       && !nunits.is_constant ())
2406     {
2407       if (dump_enabled_p ())
2408 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 			 "Not using elementwise accesses due to variable "
2410 			 "vectorization factor.\n");
2411       return false;
2412     }
2413 
2414   if (*alignment_support_scheme == dr_unaligned_unsupported)
2415     {
2416       if (dump_enabled_p ())
2417 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 			 "unsupported unaligned access\n");
2419       return false;
2420     }
2421 
2422   /* FIXME: At the moment the cost model seems to underestimate the
2423      cost of using elementwise accesses.  This check preserves the
2424      traditional behavior until that can be fixed.  */
2425   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2426   if (!first_stmt_info)
2427     first_stmt_info = stmt_info;
2428   if (*memory_access_type == VMAT_ELEMENTWISE
2429       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2430       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2431 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2432 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2433     {
2434       if (dump_enabled_p ())
2435 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 			 "not falling back to elementwise accesses\n");
2437       return false;
2438     }
2439   return true;
2440 }
2441 
2442 /* Return true if boolean argument MASK is suitable for vectorizing
2443    conditional operation STMT_INFO.  When returning true, store the type
2444    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2445    in *MASK_VECTYPE_OUT.  */
2446 
2447 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2448 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2449 			vect_def_type *mask_dt_out,
2450 			tree *mask_vectype_out)
2451 {
2452   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2453     {
2454       if (dump_enabled_p ())
2455 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 			 "mask argument is not a boolean.\n");
2457       return false;
2458     }
2459 
2460   if (TREE_CODE (mask) != SSA_NAME)
2461     {
2462       if (dump_enabled_p ())
2463 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 			 "mask argument is not an SSA name.\n");
2465       return false;
2466     }
2467 
2468   enum vect_def_type mask_dt;
2469   tree mask_vectype;
2470   if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2471     {
2472       if (dump_enabled_p ())
2473 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 			 "mask use not simple.\n");
2475       return false;
2476     }
2477 
2478   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479   if (!mask_vectype)
2480     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2481 
2482   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2483     {
2484       if (dump_enabled_p ())
2485 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 			 "could not find an appropriate vector mask type.\n");
2487       return false;
2488     }
2489 
2490   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2491 		TYPE_VECTOR_SUBPARTS (vectype)))
2492     {
2493       if (dump_enabled_p ())
2494 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495 			 "vector mask type %T"
2496 			 " does not match vector data type %T.\n",
2497 			 mask_vectype, vectype);
2498 
2499       return false;
2500     }
2501 
2502   *mask_dt_out = mask_dt;
2503   *mask_vectype_out = mask_vectype;
2504   return true;
2505 }
2506 
2507 /* Return true if stored value RHS is suitable for vectorizing store
2508    statement STMT_INFO.  When returning true, store the type of the
2509    definition in *RHS_DT_OUT, the type of the vectorized store value in
2510    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2511 
2512 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2513 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2514 		      slp_tree slp_node, tree rhs,
2515 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2516 		      vec_load_store_type *vls_type_out)
2517 {
2518   /* In the case this is a store from a constant make sure
2519      native_encode_expr can handle it.  */
2520   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2521     {
2522       if (dump_enabled_p ())
2523 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2524 			 "cannot encode constant as a byte sequence.\n");
2525       return false;
2526     }
2527 
2528   enum vect_def_type rhs_dt;
2529   tree rhs_vectype;
2530   slp_tree slp_op;
2531   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2532 			   &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2533     {
2534       if (dump_enabled_p ())
2535 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 			 "use not simple.\n");
2537       return false;
2538     }
2539 
2540   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2541   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2542     {
2543       if (dump_enabled_p ())
2544 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 			 "incompatible vector types.\n");
2546       return false;
2547     }
2548 
2549   *rhs_dt_out = rhs_dt;
2550   *rhs_vectype_out = rhs_vectype;
2551   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2552     *vls_type_out = VLS_STORE_INVARIANT;
2553   else
2554     *vls_type_out = VLS_STORE;
2555   return true;
2556 }
2557 
2558 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2559    Note that we support masks with floating-point type, in which case the
2560    floats are interpreted as a bitmask.  */
2561 
2562 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2563 vect_build_all_ones_mask (vec_info *vinfo,
2564 			  stmt_vec_info stmt_info, tree masktype)
2565 {
2566   if (TREE_CODE (masktype) == INTEGER_TYPE)
2567     return build_int_cst (masktype, -1);
2568   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2569     {
2570       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2571       mask = build_vector_from_val (masktype, mask);
2572       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2573     }
2574   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2575     {
2576       REAL_VALUE_TYPE r;
2577       long tmp[6];
2578       for (int j = 0; j < 6; ++j)
2579 	tmp[j] = -1;
2580       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2581       tree mask = build_real (TREE_TYPE (masktype), r);
2582       mask = build_vector_from_val (masktype, mask);
2583       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2584     }
2585   gcc_unreachable ();
2586 }
2587 
2588 /* Build an all-zero merge value of type VECTYPE while vectorizing
2589    STMT_INFO as a gather load.  */
2590 
2591 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2592 vect_build_zero_merge_argument (vec_info *vinfo,
2593 				stmt_vec_info stmt_info, tree vectype)
2594 {
2595   tree merge;
2596   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2597     merge = build_int_cst (TREE_TYPE (vectype), 0);
2598   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2599     {
2600       REAL_VALUE_TYPE r;
2601       long tmp[6];
2602       for (int j = 0; j < 6; ++j)
2603 	tmp[j] = 0;
2604       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2605       merge = build_real (TREE_TYPE (vectype), r);
2606     }
2607   else
2608     gcc_unreachable ();
2609   merge = build_vector_from_val (vectype, merge);
2610   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2611 }
2612 
2613 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2614    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2615    the gather load operation.  If the load is conditional, MASK is the
2616    unvectorized condition and MASK_DT is its definition type, otherwise
2617    MASK is null.  */
2618 
2619 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2620 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2621 			      gimple_stmt_iterator *gsi,
2622 			      gimple **vec_stmt,
2623 			      gather_scatter_info *gs_info,
2624 			      tree mask)
2625 {
2626   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2627   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2628   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2629   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2630   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2631   edge pe = loop_preheader_edge (loop);
2632   enum { NARROW, NONE, WIDEN } modifier;
2633   poly_uint64 gather_off_nunits
2634     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2635 
2636   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2637   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2638   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2639   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2640   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2641   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2642   tree scaletype = TREE_VALUE (arglist);
2643   tree real_masktype = masktype;
2644   gcc_checking_assert (types_compatible_p (srctype, rettype)
2645 		       && (!mask
2646 			   || TREE_CODE (masktype) == INTEGER_TYPE
2647 			   || types_compatible_p (srctype, masktype)));
2648   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2649     masktype = truth_type_for (srctype);
2650 
2651   tree mask_halftype = masktype;
2652   tree perm_mask = NULL_TREE;
2653   tree mask_perm_mask = NULL_TREE;
2654   if (known_eq (nunits, gather_off_nunits))
2655     modifier = NONE;
2656   else if (known_eq (nunits * 2, gather_off_nunits))
2657     {
2658       modifier = WIDEN;
2659 
2660       /* Currently widening gathers and scatters are only supported for
2661 	 fixed-length vectors.  */
2662       int count = gather_off_nunits.to_constant ();
2663       vec_perm_builder sel (count, count, 1);
2664       for (int i = 0; i < count; ++i)
2665 	sel.quick_push (i | (count / 2));
2666 
2667       vec_perm_indices indices (sel, 1, count);
2668       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2669 					      indices);
2670     }
2671   else if (known_eq (nunits, gather_off_nunits * 2))
2672     {
2673       modifier = NARROW;
2674 
2675       /* Currently narrowing gathers and scatters are only supported for
2676 	 fixed-length vectors.  */
2677       int count = nunits.to_constant ();
2678       vec_perm_builder sel (count, count, 1);
2679       sel.quick_grow (count);
2680       for (int i = 0; i < count; ++i)
2681 	sel[i] = i < count / 2 ? i : i + count / 2;
2682       vec_perm_indices indices (sel, 2, count);
2683       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2684 
2685       ncopies *= 2;
2686 
2687       if (mask && masktype == real_masktype)
2688 	{
2689 	  for (int i = 0; i < count; ++i)
2690 	    sel[i] = i | (count / 2);
2691 	  indices.new_vector (sel, 2, count);
2692 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2693 	}
2694       else if (mask)
2695 	mask_halftype = truth_type_for (gs_info->offset_vectype);
2696     }
2697   else
2698     gcc_unreachable ();
2699 
2700   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2701   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2702 
2703   tree ptr = fold_convert (ptrtype, gs_info->base);
2704   if (!is_gimple_min_invariant (ptr))
2705     {
2706       gimple_seq seq;
2707       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2708       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2709       gcc_assert (!new_bb);
2710     }
2711 
2712   tree scale = build_int_cst (scaletype, gs_info->scale);
2713 
2714   tree vec_oprnd0 = NULL_TREE;
2715   tree vec_mask = NULL_TREE;
2716   tree src_op = NULL_TREE;
2717   tree mask_op = NULL_TREE;
2718   tree prev_res = NULL_TREE;
2719 
2720   if (!mask)
2721     {
2722       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2723       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2724     }
2725 
2726   auto_vec<tree> vec_oprnds0;
2727   auto_vec<tree> vec_masks;
2728   vect_get_vec_defs_for_operand (vinfo, stmt_info,
2729 				 modifier == WIDEN ? ncopies / 2 : ncopies,
2730 				 gs_info->offset, &vec_oprnds0);
2731   if (mask)
2732     vect_get_vec_defs_for_operand (vinfo, stmt_info,
2733 				   modifier == NARROW ? ncopies / 2 : ncopies,
2734 				   mask, &vec_masks);
2735   for (int j = 0; j < ncopies; ++j)
2736     {
2737       tree op, var;
2738       if (modifier == WIDEN && (j & 1))
2739 	op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2740 				   perm_mask, stmt_info, gsi);
2741       else
2742 	op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2743 
2744       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2745 	{
2746 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2747 				TYPE_VECTOR_SUBPARTS (idxtype)));
2748 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2749 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2750 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2751 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2752 	  op = var;
2753 	}
2754 
2755       if (mask)
2756 	{
2757 	  if (mask_perm_mask && (j & 1))
2758 	    mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2759 					    mask_perm_mask, stmt_info, gsi);
2760 	  else
2761 	    {
2762 	      if (modifier == NARROW)
2763 		{
2764 		  if ((j & 1) == 0)
2765 		    vec_mask = vec_masks[j / 2];
2766 		}
2767 	      else
2768 		vec_mask = vec_masks[j];
2769 
2770 	      mask_op = vec_mask;
2771 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2772 		{
2773 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2774 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2775 		  gcc_assert (known_eq (sub1, sub2));
2776 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2777 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2778 		  gassign *new_stmt
2779 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2780 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2781 		  mask_op = var;
2782 		}
2783 	    }
2784 	  if (modifier == NARROW && masktype != real_masktype)
2785 	    {
2786 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2787 	      gassign *new_stmt
2788 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2789 						    : VEC_UNPACK_LO_EXPR,
2790 				       mask_op);
2791 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2792 	      mask_op = var;
2793 	    }
2794 	  src_op = mask_op;
2795 	}
2796 
2797       tree mask_arg = mask_op;
2798       if (masktype != real_masktype)
2799 	{
2800 	  tree utype, optype = TREE_TYPE (mask_op);
2801 	  if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2802 	    utype = real_masktype;
2803 	  else
2804 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2805 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2806 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2807 	  gassign *new_stmt
2808 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2809 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2810 	  mask_arg = var;
2811 	  if (!useless_type_conversion_p (real_masktype, utype))
2812 	    {
2813 	      gcc_assert (TYPE_PRECISION (utype)
2814 			  <= TYPE_PRECISION (real_masktype));
2815 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2816 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2817 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2818 	      mask_arg = var;
2819 	    }
2820 	  src_op = build_zero_cst (srctype);
2821 	}
2822       gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2823 					    mask_arg, scale);
2824 
2825       if (!useless_type_conversion_p (vectype, rettype))
2826 	{
2827 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2828 				TYPE_VECTOR_SUBPARTS (rettype)));
2829 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2830 	  gimple_call_set_lhs (new_stmt, op);
2831 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2832 	  var = make_ssa_name (vec_dest);
2833 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2834 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2835 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2836 	}
2837       else
2838 	{
2839 	  var = make_ssa_name (vec_dest, new_stmt);
2840 	  gimple_call_set_lhs (new_stmt, var);
2841 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2842 	}
2843 
2844       if (modifier == NARROW)
2845 	{
2846 	  if ((j & 1) == 0)
2847 	    {
2848 	      prev_res = var;
2849 	      continue;
2850 	    }
2851 	  var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2852 				      stmt_info, gsi);
2853 	  new_stmt = SSA_NAME_DEF_STMT (var);
2854 	}
2855 
2856       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2857     }
2858   *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2859 }
2860 
2861 /* Prepare the base and offset in GS_INFO for vectorization.
2862    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2863    to the vectorized offset argument for the first copy of STMT_INFO.
2864    STMT_INFO is the statement described by GS_INFO and LOOP is the
2865    containing loop.  */
2866 
2867 static void
vect_get_gather_scatter_ops(vec_info * vinfo,class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset,unsigned ncopies)2868 vect_get_gather_scatter_ops (vec_info *vinfo,
2869 			     class loop *loop, stmt_vec_info stmt_info,
2870 			     gather_scatter_info *gs_info,
2871 			     tree *dataref_ptr, vec<tree> *vec_offset,
2872 			     unsigned ncopies)
2873 {
2874   gimple_seq stmts = NULL;
2875   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2876   if (stmts != NULL)
2877     {
2878       basic_block new_bb;
2879       edge pe = loop_preheader_edge (loop);
2880       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2881       gcc_assert (!new_bb);
2882     }
2883   vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2884 				 vec_offset, gs_info->offset_vectype);
2885 }
2886 
2887 /* Prepare to implement a grouped or strided load or store using
2888    the gather load or scatter store operation described by GS_INFO.
2889    STMT_INFO is the load or store statement.
2890 
2891    Set *DATAREF_BUMP to the amount that should be added to the base
2892    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2893    to an invariant offset vector in which element I has the value
2894    I * DR_STEP / SCALE.  */
2895 
2896 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2897 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2898 				 loop_vec_info loop_vinfo,
2899 				 gather_scatter_info *gs_info,
2900 				 tree *dataref_bump, tree *vec_offset)
2901 {
2902   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2903   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2904 
2905   tree bump = size_binop (MULT_EXPR,
2906 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2907 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2908   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2909 
2910   /* The offset given in GS_INFO can have pointer type, so use the element
2911      type of the vector instead.  */
2912   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2913 
2914   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2915   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2916 			  ssize_int (gs_info->scale));
2917   step = fold_convert (offset_type, step);
2918 
2919   /* Create {0, X, X*2, X*3, ...}.  */
2920   tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2921 			     build_zero_cst (offset_type), step);
2922   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2923 }
2924 
2925 /* Return the amount that should be added to a vector pointer to move
2926    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
2927    being vectorized and MEMORY_ACCESS_TYPE describes the type of
2928    vectorization.  */
2929 
2930 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)2931 vect_get_data_ptr_increment (vec_info *vinfo,
2932 			     dr_vec_info *dr_info, tree aggr_type,
2933 			     vect_memory_access_type memory_access_type)
2934 {
2935   if (memory_access_type == VMAT_INVARIANT)
2936     return size_zero_node;
2937 
2938   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2939   tree step = vect_dr_behavior (vinfo, dr_info)->step;
2940   if (tree_int_cst_sgn (step) == -1)
2941     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2942   return iv_step;
2943 }
2944 
2945 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
2946 
2947 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)2948 vectorizable_bswap (vec_info *vinfo,
2949 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2950 		    gimple **vec_stmt, slp_tree slp_node,
2951 		    slp_tree *slp_op,
2952 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
2953 {
2954   tree op, vectype;
2955   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2956   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2957   unsigned ncopies;
2958 
2959   op = gimple_call_arg (stmt, 0);
2960   vectype = STMT_VINFO_VECTYPE (stmt_info);
2961   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2962 
2963   /* Multiple types in SLP are handled by creating the appropriate number of
2964      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2965      case of SLP.  */
2966   if (slp_node)
2967     ncopies = 1;
2968   else
2969     ncopies = vect_get_num_copies (loop_vinfo, vectype);
2970 
2971   gcc_assert (ncopies >= 1);
2972 
2973   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2974   if (! char_vectype)
2975     return false;
2976 
2977   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2978   unsigned word_bytes;
2979   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2980     return false;
2981 
2982   /* The encoding uses one stepped pattern for each byte in the word.  */
2983   vec_perm_builder elts (num_bytes, word_bytes, 3);
2984   for (unsigned i = 0; i < 3; ++i)
2985     for (unsigned j = 0; j < word_bytes; ++j)
2986       elts.quick_push ((i + 1) * word_bytes - j - 1);
2987 
2988   vec_perm_indices indices (elts, 1, num_bytes);
2989   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2990     return false;
2991 
2992   if (! vec_stmt)
2993     {
2994       if (slp_node
2995 	  && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2996 	{
2997 	  if (dump_enabled_p ())
2998 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2999 			     "incompatible vector types for invariants\n");
3000 	  return false;
3001 	}
3002 
3003       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3004       DUMP_VECT_SCOPE ("vectorizable_bswap");
3005       record_stmt_cost (cost_vec,
3006 			1, vector_stmt, stmt_info, 0, vect_prologue);
3007       record_stmt_cost (cost_vec,
3008 			slp_node
3009 			? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3010 			vec_perm, stmt_info, 0, vect_body);
3011       return true;
3012     }
3013 
3014   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3015 
3016   /* Transform.  */
3017   vec<tree> vec_oprnds = vNULL;
3018   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3019 		     op, &vec_oprnds);
3020   /* Arguments are ready. create the new vector stmt.  */
3021   unsigned i;
3022   tree vop;
3023   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3024     {
3025       gimple *new_stmt;
3026       tree tem = make_ssa_name (char_vectype);
3027       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3028 						   char_vectype, vop));
3029       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3030       tree tem2 = make_ssa_name (char_vectype);
3031       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3032 				      tem, tem, bswap_vconst);
3033       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3034       tem = make_ssa_name (vectype);
3035       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3036 						   vectype, tem2));
3037       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3038       if (slp_node)
3039 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3040       else
3041 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3042     }
3043 
3044   if (!slp_node)
3045     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3046 
3047   vec_oprnds.release ();
3048   return true;
3049 }
3050 
3051 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3052    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3053    in a single step.  On success, store the binary pack code in
3054    *CONVERT_CODE.  */
3055 
3056 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3057 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3058 			  tree_code *convert_code)
3059 {
3060   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3061       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3062     return false;
3063 
3064   tree_code code;
3065   int multi_step_cvt = 0;
3066   auto_vec <tree, 8> interm_types;
3067   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3068 					&code, &multi_step_cvt, &interm_types)
3069       || multi_step_cvt)
3070     return false;
3071 
3072   *convert_code = code;
3073   return true;
3074 }
3075 
3076 /* Function vectorizable_call.
3077 
3078    Check if STMT_INFO performs a function call that can be vectorized.
3079    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3080    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3081    Return true if STMT_INFO is vectorizable in this way.  */
3082 
3083 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3084 vectorizable_call (vec_info *vinfo,
3085 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3086 		   gimple **vec_stmt, slp_tree slp_node,
3087 		   stmt_vector_for_cost *cost_vec)
3088 {
3089   gcall *stmt;
3090   tree vec_dest;
3091   tree scalar_dest;
3092   tree op;
3093   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3094   tree vectype_out, vectype_in;
3095   poly_uint64 nunits_in;
3096   poly_uint64 nunits_out;
3097   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3098   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3099   tree fndecl, new_temp, rhs_type;
3100   enum vect_def_type dt[4]
3101     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3102 	vect_unknown_def_type };
3103   tree vectypes[ARRAY_SIZE (dt)] = {};
3104   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3105   int ndts = ARRAY_SIZE (dt);
3106   int ncopies, j;
3107   auto_vec<tree, 8> vargs;
3108   auto_vec<tree, 8> orig_vargs;
3109   enum { NARROW, NONE, WIDEN } modifier;
3110   size_t i, nargs;
3111   tree lhs;
3112 
3113   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3114     return false;
3115 
3116   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3117       && ! vec_stmt)
3118     return false;
3119 
3120   /* Is STMT_INFO a vectorizable call?   */
3121   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3122   if (!stmt)
3123     return false;
3124 
3125   if (gimple_call_internal_p (stmt)
3126       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3127 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3128     /* Handled by vectorizable_load and vectorizable_store.  */
3129     return false;
3130 
3131   if (gimple_call_lhs (stmt) == NULL_TREE
3132       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3133     return false;
3134 
3135   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3136 
3137   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3138 
3139   /* Process function arguments.  */
3140   rhs_type = NULL_TREE;
3141   vectype_in = NULL_TREE;
3142   nargs = gimple_call_num_args (stmt);
3143 
3144   /* Bail out if the function has more than four arguments, we do not have
3145      interesting builtin functions to vectorize with more than two arguments
3146      except for fma.  No arguments is also not good.  */
3147   if (nargs == 0 || nargs > 4)
3148     return false;
3149 
3150   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3151   combined_fn cfn = gimple_call_combined_fn (stmt);
3152   if (cfn == CFN_GOMP_SIMD_LANE)
3153     {
3154       nargs = 0;
3155       rhs_type = unsigned_type_node;
3156     }
3157 
3158   int mask_opno = -1;
3159   if (internal_fn_p (cfn))
3160     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3161 
3162   for (i = 0; i < nargs; i++)
3163     {
3164       if ((int) i == mask_opno)
3165 	{
3166 	  op = gimple_call_arg (stmt, i);
3167 	  if (!vect_check_scalar_mask (vinfo,
3168 				       stmt_info, op, &dt[i], &vectypes[i]))
3169 	    return false;
3170 	  continue;
3171 	}
3172 
3173       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3174 			       i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3175 	{
3176 	  if (dump_enabled_p ())
3177 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3178 			     "use not simple.\n");
3179 	  return false;
3180 	}
3181 
3182       /* We can only handle calls with arguments of the same type.  */
3183       if (rhs_type
3184 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3185 	{
3186 	  if (dump_enabled_p ())
3187 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3188                              "argument types differ.\n");
3189 	  return false;
3190 	}
3191       if (!rhs_type)
3192 	rhs_type = TREE_TYPE (op);
3193 
3194       if (!vectype_in)
3195 	vectype_in = vectypes[i];
3196       else if (vectypes[i]
3197 	       && !types_compatible_p (vectypes[i], vectype_in))
3198 	{
3199 	  if (dump_enabled_p ())
3200 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3201                              "argument vector types differ.\n");
3202 	  return false;
3203 	}
3204     }
3205   /* If all arguments are external or constant defs, infer the vector type
3206      from the scalar type.  */
3207   if (!vectype_in)
3208     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3209   if (vec_stmt)
3210     gcc_assert (vectype_in);
3211   if (!vectype_in)
3212     {
3213       if (dump_enabled_p ())
3214 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3215 			 "no vectype for scalar type %T\n", rhs_type);
3216 
3217       return false;
3218     }
3219   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3220      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3221      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3222      by a pack of the two vectors into an SI vector.  We would need
3223      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3224   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3225     {
3226       if (dump_enabled_p ())
3227 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3228 			 "mismatched vector sizes %T and %T\n",
3229 			 vectype_in, vectype_out);
3230       return false;
3231     }
3232 
3233   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3234       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3235     {
3236       if (dump_enabled_p ())
3237 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238 			 "mixed mask and nonmask vector types\n");
3239       return false;
3240     }
3241 
3242   /* FORNOW */
3243   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3244   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3245   if (known_eq (nunits_in * 2, nunits_out))
3246     modifier = NARROW;
3247   else if (known_eq (nunits_out, nunits_in))
3248     modifier = NONE;
3249   else if (known_eq (nunits_out * 2, nunits_in))
3250     modifier = WIDEN;
3251   else
3252     return false;
3253 
3254   /* We only handle functions that do not read or clobber memory.  */
3255   if (gimple_vuse (stmt))
3256     {
3257       if (dump_enabled_p ())
3258 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259 			 "function reads from or writes to memory.\n");
3260       return false;
3261     }
3262 
3263   /* For now, we only vectorize functions if a target specific builtin
3264      is available.  TODO -- in some cases, it might be profitable to
3265      insert the calls for pieces of the vector, in order to be able
3266      to vectorize other operations in the loop.  */
3267   fndecl = NULL_TREE;
3268   internal_fn ifn = IFN_LAST;
3269   tree callee = gimple_call_fndecl (stmt);
3270 
3271   /* First try using an internal function.  */
3272   tree_code convert_code = ERROR_MARK;
3273   if (cfn != CFN_LAST
3274       && (modifier == NONE
3275 	  || (modifier == NARROW
3276 	      && simple_integer_narrowing (vectype_out, vectype_in,
3277 					   &convert_code))))
3278     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3279 					  vectype_in);
3280 
3281   /* If that fails, try asking for a target-specific built-in function.  */
3282   if (ifn == IFN_LAST)
3283     {
3284       if (cfn != CFN_LAST)
3285 	fndecl = targetm.vectorize.builtin_vectorized_function
3286 	  (cfn, vectype_out, vectype_in);
3287       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3288 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3289 	  (callee, vectype_out, vectype_in);
3290     }
3291 
3292   if (ifn == IFN_LAST && !fndecl)
3293     {
3294       if (cfn == CFN_GOMP_SIMD_LANE
3295 	  && !slp_node
3296 	  && loop_vinfo
3297 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3298 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3299 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3300 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3301 	{
3302 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3303 	     { 0, 1, 2, ... vf - 1 } vector.  */
3304 	  gcc_assert (nargs == 0);
3305 	}
3306       else if (modifier == NONE
3307 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3308 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3309 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3310 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3311 	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3312 				   slp_op, vectype_in, cost_vec);
3313       else
3314 	{
3315 	  if (dump_enabled_p ())
3316 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 			     "function is not vectorizable.\n");
3318 	  return false;
3319 	}
3320     }
3321 
3322   if (slp_node)
3323     ncopies = 1;
3324   else if (modifier == NARROW && ifn == IFN_LAST)
3325     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3326   else
3327     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3328 
3329   /* Sanity check: make sure that at least one copy of the vectorized stmt
3330      needs to be generated.  */
3331   gcc_assert (ncopies >= 1);
3332 
3333   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3334   if (!vec_stmt) /* transformation not required.  */
3335     {
3336       if (slp_node)
3337 	for (i = 0; i < nargs; ++i)
3338 	  if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3339 	    {
3340 	      if (dump_enabled_p ())
3341 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3342 				 "incompatible vector types for invariants\n");
3343 	      return false;
3344 	    }
3345       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3346       DUMP_VECT_SCOPE ("vectorizable_call");
3347       vect_model_simple_cost (vinfo, stmt_info,
3348 			      ncopies, dt, ndts, slp_node, cost_vec);
3349       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3350 	record_stmt_cost (cost_vec, ncopies / 2,
3351 			  vec_promote_demote, stmt_info, 0, vect_body);
3352 
3353       if (loop_vinfo && mask_opno >= 0)
3354 	{
3355 	  unsigned int nvectors = (slp_node
3356 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3357 				   : ncopies);
3358 	  tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3359 	  vect_record_loop_mask (loop_vinfo, masks, nvectors,
3360 				 vectype_out, scalar_mask);
3361 	}
3362       return true;
3363     }
3364 
3365   /* Transform.  */
3366 
3367   if (dump_enabled_p ())
3368     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3369 
3370   /* Handle def.  */
3371   scalar_dest = gimple_call_lhs (stmt);
3372   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3373 
3374   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3375 
3376   if (modifier == NONE || ifn != IFN_LAST)
3377     {
3378       tree prev_res = NULL_TREE;
3379       vargs.safe_grow (nargs, true);
3380       orig_vargs.safe_grow (nargs, true);
3381       auto_vec<vec<tree> > vec_defs (nargs);
3382       for (j = 0; j < ncopies; ++j)
3383 	{
3384 	  /* Build argument list for the vectorized call.  */
3385 	  if (slp_node)
3386 	    {
3387 	      vec<tree> vec_oprnds0;
3388 
3389 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3390 	      vec_oprnds0 = vec_defs[0];
3391 
3392 	      /* Arguments are ready.  Create the new vector stmt.  */
3393 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3394 		{
3395 		  size_t k;
3396 		  for (k = 0; k < nargs; k++)
3397 		    {
3398 		      vec<tree> vec_oprndsk = vec_defs[k];
3399 		      vargs[k] = vec_oprndsk[i];
3400 		    }
3401 		  gimple *new_stmt;
3402 		  if (modifier == NARROW)
3403 		    {
3404 		      /* We don't define any narrowing conditional functions
3405 			 at present.  */
3406 		      gcc_assert (mask_opno < 0);
3407 		      tree half_res = make_ssa_name (vectype_in);
3408 		      gcall *call
3409 			= gimple_build_call_internal_vec (ifn, vargs);
3410 		      gimple_call_set_lhs (call, half_res);
3411 		      gimple_call_set_nothrow (call, true);
3412 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3413 		      if ((i & 1) == 0)
3414 			{
3415 			  prev_res = half_res;
3416 			  continue;
3417 			}
3418 		      new_temp = make_ssa_name (vec_dest);
3419 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3420 						      prev_res, half_res);
3421 		      vect_finish_stmt_generation (vinfo, stmt_info,
3422 						   new_stmt, gsi);
3423 		    }
3424 		  else
3425 		    {
3426 		      if (mask_opno >= 0 && masked_loop_p)
3427 			{
3428 			  unsigned int vec_num = vec_oprnds0.length ();
3429 			  /* Always true for SLP.  */
3430 			  gcc_assert (ncopies == 1);
3431 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3432 							  vectype_out, i);
3433 			  vargs[mask_opno] = prepare_load_store_mask
3434 			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3435 			}
3436 
3437 		      gcall *call;
3438 		      if (ifn != IFN_LAST)
3439 			call = gimple_build_call_internal_vec (ifn, vargs);
3440 		      else
3441 			call = gimple_build_call_vec (fndecl, vargs);
3442 		      new_temp = make_ssa_name (vec_dest, call);
3443 		      gimple_call_set_lhs (call, new_temp);
3444 		      gimple_call_set_nothrow (call, true);
3445 		      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3446 		      new_stmt = call;
3447 		    }
3448 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3449 		}
3450 	      continue;
3451 	    }
3452 
3453 	  for (i = 0; i < nargs; i++)
3454 	    {
3455 	      op = gimple_call_arg (stmt, i);
3456 	      if (j == 0)
3457 		{
3458 		  vec_defs.quick_push (vNULL);
3459 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3460 						 op, &vec_defs[i],
3461 						 vectypes[i]);
3462 		}
3463 	      orig_vargs[i] = vargs[i] = vec_defs[i][j];
3464 	    }
3465 
3466 	  if (mask_opno >= 0 && masked_loop_p)
3467 	    {
3468 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3469 					      vectype_out, j);
3470 	      vargs[mask_opno]
3471 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
3472 					   vargs[mask_opno], gsi);
3473 	    }
3474 
3475 	  gimple *new_stmt;
3476 	  if (cfn == CFN_GOMP_SIMD_LANE)
3477 	    {
3478 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3479 	      tree new_var
3480 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3481 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3482 	      vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3483 	      new_temp = make_ssa_name (vec_dest);
3484 	      new_stmt = gimple_build_assign (new_temp, new_var);
3485 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3486 	    }
3487 	  else if (modifier == NARROW)
3488 	    {
3489 	      /* We don't define any narrowing conditional functions at
3490 		 present.  */
3491 	      gcc_assert (mask_opno < 0);
3492 	      tree half_res = make_ssa_name (vectype_in);
3493 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3494 	      gimple_call_set_lhs (call, half_res);
3495 	      gimple_call_set_nothrow (call, true);
3496 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3497 	      if ((j & 1) == 0)
3498 		{
3499 		  prev_res = half_res;
3500 		  continue;
3501 		}
3502 	      new_temp = make_ssa_name (vec_dest);
3503 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3504 					      prev_res, half_res);
3505 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3506 	    }
3507 	  else
3508 	    {
3509 	      gcall *call;
3510 	      if (ifn != IFN_LAST)
3511 		call = gimple_build_call_internal_vec (ifn, vargs);
3512 	      else
3513 		call = gimple_build_call_vec (fndecl, vargs);
3514 	      new_temp = make_ssa_name (vec_dest, call);
3515 	      gimple_call_set_lhs (call, new_temp);
3516 	      gimple_call_set_nothrow (call, true);
3517 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3518 	      new_stmt = call;
3519 	    }
3520 
3521 	  if (j == (modifier == NARROW ? 1 : 0))
3522 	    *vec_stmt = new_stmt;
3523 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3524 	}
3525       for (i = 0; i < nargs; i++)
3526 	{
3527 	  vec<tree> vec_oprndsi = vec_defs[i];
3528 	  vec_oprndsi.release ();
3529 	}
3530     }
3531   else if (modifier == NARROW)
3532     {
3533       auto_vec<vec<tree> > vec_defs (nargs);
3534       /* We don't define any narrowing conditional functions at present.  */
3535       gcc_assert (mask_opno < 0);
3536       for (j = 0; j < ncopies; ++j)
3537 	{
3538 	  /* Build argument list for the vectorized call.  */
3539 	  if (j == 0)
3540 	    vargs.create (nargs * 2);
3541 	  else
3542 	    vargs.truncate (0);
3543 
3544 	  if (slp_node)
3545 	    {
3546 	      vec<tree> vec_oprnds0;
3547 
3548 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3549 	      vec_oprnds0 = vec_defs[0];
3550 
3551 	      /* Arguments are ready.  Create the new vector stmt.  */
3552 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3553 		{
3554 		  size_t k;
3555 		  vargs.truncate (0);
3556 		  for (k = 0; k < nargs; k++)
3557 		    {
3558 		      vec<tree> vec_oprndsk = vec_defs[k];
3559 		      vargs.quick_push (vec_oprndsk[i]);
3560 		      vargs.quick_push (vec_oprndsk[i + 1]);
3561 		    }
3562 		  gcall *call;
3563 		  if (ifn != IFN_LAST)
3564 		    call = gimple_build_call_internal_vec (ifn, vargs);
3565 		  else
3566 		    call = gimple_build_call_vec (fndecl, vargs);
3567 		  new_temp = make_ssa_name (vec_dest, call);
3568 		  gimple_call_set_lhs (call, new_temp);
3569 		  gimple_call_set_nothrow (call, true);
3570 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3571 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3572 		}
3573 	      continue;
3574 	    }
3575 
3576 	  for (i = 0; i < nargs; i++)
3577 	    {
3578 	      op = gimple_call_arg (stmt, i);
3579 	      if (j == 0)
3580 		{
3581 		  vec_defs.quick_push (vNULL);
3582 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3583 						 op, &vec_defs[i], vectypes[i]);
3584 		}
3585 	      vec_oprnd0 = vec_defs[i][2*j];
3586 	      vec_oprnd1 = vec_defs[i][2*j+1];
3587 
3588 	      vargs.quick_push (vec_oprnd0);
3589 	      vargs.quick_push (vec_oprnd1);
3590 	    }
3591 
3592 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3593 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3594 	  gimple_call_set_lhs (new_stmt, new_temp);
3595 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3596 
3597 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3598 	}
3599 
3600       if (!slp_node)
3601 	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3602 
3603       for (i = 0; i < nargs; i++)
3604 	{
3605 	  vec<tree> vec_oprndsi = vec_defs[i];
3606 	  vec_oprndsi.release ();
3607 	}
3608     }
3609   else
3610     /* No current target implements this case.  */
3611     return false;
3612 
3613   vargs.release ();
3614 
3615   /* The call in STMT might prevent it from being removed in dce.
3616      We however cannot remove it here, due to the way the ssa name
3617      it defines is mapped to the new definition.  So just replace
3618      rhs of the statement with something harmless.  */
3619 
3620   if (slp_node)
3621     return true;
3622 
3623   stmt_info = vect_orig_stmt (stmt_info);
3624   lhs = gimple_get_lhs (stmt_info->stmt);
3625 
3626   gassign *new_stmt
3627     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3628   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3629 
3630   return true;
3631 }
3632 
3633 
3634 struct simd_call_arg_info
3635 {
3636   tree vectype;
3637   tree op;
3638   HOST_WIDE_INT linear_step;
3639   enum vect_def_type dt;
3640   unsigned int align;
3641   bool simd_lane_linear;
3642 };
3643 
3644 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3645    is linear within simd lane (but not within whole loop), note it in
3646    *ARGINFO.  */
3647 
3648 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3649 vect_simd_lane_linear (tree op, class loop *loop,
3650 		       struct simd_call_arg_info *arginfo)
3651 {
3652   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3653 
3654   if (!is_gimple_assign (def_stmt)
3655       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3656       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3657     return;
3658 
3659   tree base = gimple_assign_rhs1 (def_stmt);
3660   HOST_WIDE_INT linear_step = 0;
3661   tree v = gimple_assign_rhs2 (def_stmt);
3662   while (TREE_CODE (v) == SSA_NAME)
3663     {
3664       tree t;
3665       def_stmt = SSA_NAME_DEF_STMT (v);
3666       if (is_gimple_assign (def_stmt))
3667 	switch (gimple_assign_rhs_code (def_stmt))
3668 	  {
3669 	  case PLUS_EXPR:
3670 	    t = gimple_assign_rhs2 (def_stmt);
3671 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3672 	      return;
3673 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3674 	    v = gimple_assign_rhs1 (def_stmt);
3675 	    continue;
3676 	  case MULT_EXPR:
3677 	    t = gimple_assign_rhs2 (def_stmt);
3678 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3679 	      return;
3680 	    linear_step = tree_to_shwi (t);
3681 	    v = gimple_assign_rhs1 (def_stmt);
3682 	    continue;
3683 	  CASE_CONVERT:
3684 	    t = gimple_assign_rhs1 (def_stmt);
3685 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3686 		|| (TYPE_PRECISION (TREE_TYPE (v))
3687 		    < TYPE_PRECISION (TREE_TYPE (t))))
3688 	      return;
3689 	    if (!linear_step)
3690 	      linear_step = 1;
3691 	    v = t;
3692 	    continue;
3693 	  default:
3694 	    return;
3695 	  }
3696       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3697 	       && loop->simduid
3698 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3699 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3700 		   == loop->simduid))
3701 	{
3702 	  if (!linear_step)
3703 	    linear_step = 1;
3704 	  arginfo->linear_step = linear_step;
3705 	  arginfo->op = base;
3706 	  arginfo->simd_lane_linear = true;
3707 	  return;
3708 	}
3709     }
3710 }
3711 
3712 /* Return the number of elements in vector type VECTYPE, which is associated
3713    with a SIMD clone.  At present these vectors always have a constant
3714    length.  */
3715 
3716 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3717 simd_clone_subparts (tree vectype)
3718 {
3719   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3720 }
3721 
3722 /* Function vectorizable_simd_clone_call.
3723 
3724    Check if STMT_INFO performs a function call that can be vectorized
3725    by calling a simd clone of the function.
3726    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3727    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3728    Return true if STMT_INFO is vectorizable in this way.  */
3729 
3730 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3731 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3732 			      gimple_stmt_iterator *gsi,
3733 			      gimple **vec_stmt, slp_tree slp_node,
3734 			      stmt_vector_for_cost *)
3735 {
3736   tree vec_dest;
3737   tree scalar_dest;
3738   tree op, type;
3739   tree vec_oprnd0 = NULL_TREE;
3740   tree vectype;
3741   poly_uint64 nunits;
3742   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3743   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3744   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3745   tree fndecl, new_temp;
3746   int ncopies, j;
3747   auto_vec<simd_call_arg_info> arginfo;
3748   vec<tree> vargs = vNULL;
3749   size_t i, nargs;
3750   tree lhs, rtype, ratype;
3751   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3752 
3753   /* Is STMT a vectorizable call?   */
3754   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3755   if (!stmt)
3756     return false;
3757 
3758   fndecl = gimple_call_fndecl (stmt);
3759   if (fndecl == NULL_TREE)
3760     return false;
3761 
3762   struct cgraph_node *node = cgraph_node::get (fndecl);
3763   if (node == NULL || node->simd_clones == NULL)
3764     return false;
3765 
3766   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3767     return false;
3768 
3769   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3770       && ! vec_stmt)
3771     return false;
3772 
3773   if (gimple_call_lhs (stmt)
3774       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3775     return false;
3776 
3777   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3778 
3779   vectype = STMT_VINFO_VECTYPE (stmt_info);
3780 
3781   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3782     return false;
3783 
3784   /* FORNOW */
3785   if (slp_node)
3786     return false;
3787 
3788   /* Process function arguments.  */
3789   nargs = gimple_call_num_args (stmt);
3790 
3791   /* Bail out if the function has zero arguments.  */
3792   if (nargs == 0)
3793     return false;
3794 
3795   arginfo.reserve (nargs, true);
3796 
3797   for (i = 0; i < nargs; i++)
3798     {
3799       simd_call_arg_info thisarginfo;
3800       affine_iv iv;
3801 
3802       thisarginfo.linear_step = 0;
3803       thisarginfo.align = 0;
3804       thisarginfo.op = NULL_TREE;
3805       thisarginfo.simd_lane_linear = false;
3806 
3807       op = gimple_call_arg (stmt, i);
3808       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3809 			       &thisarginfo.vectype)
3810 	  || thisarginfo.dt == vect_uninitialized_def)
3811 	{
3812 	  if (dump_enabled_p ())
3813 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3814 			     "use not simple.\n");
3815 	  return false;
3816 	}
3817 
3818       if (thisarginfo.dt == vect_constant_def
3819 	  || thisarginfo.dt == vect_external_def)
3820 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3821       else
3822 	{
3823 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
3824 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3825 	    {
3826 	      if (dump_enabled_p ())
3827 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 				 "vector mask arguments are not supported\n");
3829 	      return false;
3830 	    }
3831 	}
3832 
3833       /* For linear arguments, the analyze phase should have saved
3834 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3835       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3836 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3837 	{
3838 	  gcc_assert (vec_stmt);
3839 	  thisarginfo.linear_step
3840 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3841 	  thisarginfo.op
3842 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3843 	  thisarginfo.simd_lane_linear
3844 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3845 	       == boolean_true_node);
3846 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3847 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3848 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3849 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3850 	    {
3851 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3852 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3853 	      tree opt = TREE_TYPE (thisarginfo.op);
3854 	      bias = fold_convert (TREE_TYPE (step), bias);
3855 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3856 	      thisarginfo.op
3857 		= fold_build2 (POINTER_TYPE_P (opt)
3858 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3859 			       thisarginfo.op, bias);
3860 	    }
3861 	}
3862       else if (!vec_stmt
3863 	       && thisarginfo.dt != vect_constant_def
3864 	       && thisarginfo.dt != vect_external_def
3865 	       && loop_vinfo
3866 	       && TREE_CODE (op) == SSA_NAME
3867 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3868 			     &iv, false)
3869 	       && tree_fits_shwi_p (iv.step))
3870 	{
3871 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3872 	  thisarginfo.op = iv.base;
3873 	}
3874       else if ((thisarginfo.dt == vect_constant_def
3875 		|| thisarginfo.dt == vect_external_def)
3876 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3877 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3878       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3879 	 linear too.  */
3880       if (POINTER_TYPE_P (TREE_TYPE (op))
3881 	  && !thisarginfo.linear_step
3882 	  && !vec_stmt
3883 	  && thisarginfo.dt != vect_constant_def
3884 	  && thisarginfo.dt != vect_external_def
3885 	  && loop_vinfo
3886 	  && !slp_node
3887 	  && TREE_CODE (op) == SSA_NAME)
3888 	vect_simd_lane_linear (op, loop, &thisarginfo);
3889 
3890       arginfo.quick_push (thisarginfo);
3891     }
3892 
3893   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3894   if (!vf.is_constant ())
3895     {
3896       if (dump_enabled_p ())
3897 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3898 			 "not considering SIMD clones; not yet supported"
3899 			 " for variable-width vectors.\n");
3900       return false;
3901     }
3902 
3903   unsigned int badness = 0;
3904   struct cgraph_node *bestn = NULL;
3905   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3906     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3907   else
3908     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3909 	 n = n->simdclone->next_clone)
3910       {
3911 	unsigned int this_badness = 0;
3912 	unsigned int num_calls;
3913 	if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3914 	    || n->simdclone->nargs != nargs)
3915 	  continue;
3916 	if (num_calls != 1)
3917 	  this_badness += exact_log2 (num_calls) * 4096;
3918 	if (n->simdclone->inbranch)
3919 	  this_badness += 8192;
3920 	int target_badness = targetm.simd_clone.usable (n);
3921 	if (target_badness < 0)
3922 	  continue;
3923 	this_badness += target_badness * 512;
3924 	/* FORNOW: Have to add code to add the mask argument.  */
3925 	if (n->simdclone->inbranch)
3926 	  continue;
3927 	for (i = 0; i < nargs; i++)
3928 	  {
3929 	    switch (n->simdclone->args[i].arg_type)
3930 	      {
3931 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 		if (!useless_type_conversion_p
3933 			(n->simdclone->args[i].orig_type,
3934 			 TREE_TYPE (gimple_call_arg (stmt, i))))
3935 		  i = -1;
3936 		else if (arginfo[i].dt == vect_constant_def
3937 			 || arginfo[i].dt == vect_external_def
3938 			 || arginfo[i].linear_step)
3939 		  this_badness += 64;
3940 		break;
3941 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
3942 		if (arginfo[i].dt != vect_constant_def
3943 		    && arginfo[i].dt != vect_external_def)
3944 		  i = -1;
3945 		break;
3946 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3947 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3948 		if (arginfo[i].dt == vect_constant_def
3949 		    || arginfo[i].dt == vect_external_def
3950 		    || (arginfo[i].linear_step
3951 			!= n->simdclone->args[i].linear_step))
3952 		  i = -1;
3953 		break;
3954 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3955 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3956 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3957 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3958 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3959 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3960 		/* FORNOW */
3961 		i = -1;
3962 		break;
3963 	      case SIMD_CLONE_ARG_TYPE_MASK:
3964 		gcc_unreachable ();
3965 	      }
3966 	    if (i == (size_t) -1)
3967 	      break;
3968 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
3969 	      {
3970 		i = -1;
3971 		break;
3972 	      }
3973 	    if (arginfo[i].align)
3974 	      this_badness += (exact_log2 (arginfo[i].align)
3975 			       - exact_log2 (n->simdclone->args[i].alignment));
3976 	  }
3977 	if (i == (size_t) -1)
3978 	  continue;
3979 	if (bestn == NULL || this_badness < badness)
3980 	  {
3981 	    bestn = n;
3982 	    badness = this_badness;
3983 	  }
3984       }
3985 
3986   if (bestn == NULL)
3987     return false;
3988 
3989   for (i = 0; i < nargs; i++)
3990     if ((arginfo[i].dt == vect_constant_def
3991 	 || arginfo[i].dt == vect_external_def)
3992 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3993       {
3994 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3995 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3996 							  slp_node);
3997 	if (arginfo[i].vectype == NULL
3998 	    || !constant_multiple_p (bestn->simdclone->simdlen,
3999 				     simd_clone_subparts (arginfo[i].vectype)))
4000 	  return false;
4001       }
4002 
4003   fndecl = bestn->decl;
4004   nunits = bestn->simdclone->simdlen;
4005   ncopies = vector_unroll_factor (vf, nunits);
4006 
4007   /* If the function isn't const, only allow it in simd loops where user
4008      has asserted that at least nunits consecutive iterations can be
4009      performed using SIMD instructions.  */
4010   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4011       && gimple_vuse (stmt))
4012     return false;
4013 
4014   /* Sanity check: make sure that at least one copy of the vectorized stmt
4015      needs to be generated.  */
4016   gcc_assert (ncopies >= 1);
4017 
4018   if (!vec_stmt) /* transformation not required.  */
4019     {
4020       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4021       for (i = 0; i < nargs; i++)
4022 	if ((bestn->simdclone->args[i].arg_type
4023 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4024 	    || (bestn->simdclone->args[i].arg_type
4025 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4026 	  {
4027 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4028 									+ 1,
4029 								      true);
4030 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4031 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4032 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4033 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4034 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4035 	    tree sll = arginfo[i].simd_lane_linear
4036 		       ? boolean_true_node : boolean_false_node;
4037 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4038 	  }
4039       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4040       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4041 /*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
4042 				dt, slp_node, cost_vec); */
4043       return true;
4044     }
4045 
4046   /* Transform.  */
4047 
4048   if (dump_enabled_p ())
4049     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4050 
4051   /* Handle def.  */
4052   scalar_dest = gimple_call_lhs (stmt);
4053   vec_dest = NULL_TREE;
4054   rtype = NULL_TREE;
4055   ratype = NULL_TREE;
4056   if (scalar_dest)
4057     {
4058       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4059       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4060       if (TREE_CODE (rtype) == ARRAY_TYPE)
4061 	{
4062 	  ratype = rtype;
4063 	  rtype = TREE_TYPE (ratype);
4064 	}
4065     }
4066 
4067   auto_vec<vec<tree> > vec_oprnds;
4068   auto_vec<unsigned> vec_oprnds_i;
4069   vec_oprnds.safe_grow_cleared (nargs, true);
4070   vec_oprnds_i.safe_grow_cleared (nargs, true);
4071   for (j = 0; j < ncopies; ++j)
4072     {
4073       /* Build argument list for the vectorized call.  */
4074       if (j == 0)
4075 	vargs.create (nargs);
4076       else
4077 	vargs.truncate (0);
4078 
4079       for (i = 0; i < nargs; i++)
4080 	{
4081 	  unsigned int k, l, m, o;
4082 	  tree atype;
4083 	  op = gimple_call_arg (stmt, i);
4084 	  switch (bestn->simdclone->args[i].arg_type)
4085 	    {
4086 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4087 	      atype = bestn->simdclone->args[i].vector_type;
4088 	      o = vector_unroll_factor (nunits,
4089 					simd_clone_subparts (atype));
4090 	      for (m = j * o; m < (j + 1) * o; m++)
4091 		{
4092 		  if (simd_clone_subparts (atype)
4093 		      < simd_clone_subparts (arginfo[i].vectype))
4094 		    {
4095 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4096 		      k = (simd_clone_subparts (arginfo[i].vectype)
4097 			   / simd_clone_subparts (atype));
4098 		      gcc_assert ((k & (k - 1)) == 0);
4099 		      if (m == 0)
4100 			{
4101 			  vect_get_vec_defs_for_operand (vinfo, stmt_info,
4102 							 ncopies * o / k, op,
4103 							 &vec_oprnds[i]);
4104 			  vec_oprnds_i[i] = 0;
4105 			  vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4106 			}
4107 		      else
4108 			{
4109 			  vec_oprnd0 = arginfo[i].op;
4110 			  if ((m & (k - 1)) == 0)
4111 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4112 			}
4113 		      arginfo[i].op = vec_oprnd0;
4114 		      vec_oprnd0
4115 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4116 				  bitsize_int (prec),
4117 				  bitsize_int ((m & (k - 1)) * prec));
4118 		      gassign *new_stmt
4119 			= gimple_build_assign (make_ssa_name (atype),
4120 					       vec_oprnd0);
4121 		      vect_finish_stmt_generation (vinfo, stmt_info,
4122 						   new_stmt, gsi);
4123 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4124 		    }
4125 		  else
4126 		    {
4127 		      k = (simd_clone_subparts (atype)
4128 			   / simd_clone_subparts (arginfo[i].vectype));
4129 		      gcc_assert ((k & (k - 1)) == 0);
4130 		      vec<constructor_elt, va_gc> *ctor_elts;
4131 		      if (k != 1)
4132 			vec_alloc (ctor_elts, k);
4133 		      else
4134 			ctor_elts = NULL;
4135 		      for (l = 0; l < k; l++)
4136 			{
4137 			  if (m == 0 && l == 0)
4138 			    {
4139 			      vect_get_vec_defs_for_operand (vinfo, stmt_info,
4140 							     k * o * ncopies,
4141 							     op,
4142 							     &vec_oprnds[i]);
4143 			      vec_oprnds_i[i] = 0;
4144 			      vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4145 			    }
4146 			  else
4147 			    vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4148 			  arginfo[i].op = vec_oprnd0;
4149 			  if (k == 1)
4150 			    break;
4151 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4152 						  vec_oprnd0);
4153 			}
4154 		      if (k == 1)
4155 			if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4156 						       atype))
4157 			  {
4158 			    vec_oprnd0
4159 			      = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4160 			    gassign *new_stmt
4161 			      = gimple_build_assign (make_ssa_name (atype),
4162 						     vec_oprnd0);
4163 			    vect_finish_stmt_generation (vinfo, stmt_info,
4164 							 new_stmt, gsi);
4165 			    vargs.safe_push (gimple_assign_lhs (new_stmt));
4166 			  }
4167 			else
4168 			  vargs.safe_push (vec_oprnd0);
4169 		      else
4170 			{
4171 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4172 			  gassign *new_stmt
4173 			    = gimple_build_assign (make_ssa_name (atype),
4174 						   vec_oprnd0);
4175 			  vect_finish_stmt_generation (vinfo, stmt_info,
4176 						       new_stmt, gsi);
4177 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4178 			}
4179 		    }
4180 		}
4181 	      break;
4182 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4183 	      vargs.safe_push (op);
4184 	      break;
4185 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4186 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4187 	      if (j == 0)
4188 		{
4189 		  gimple_seq stmts;
4190 		  arginfo[i].op
4191 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
4192 					    &stmts, true, NULL_TREE);
4193 		  if (stmts != NULL)
4194 		    {
4195 		      basic_block new_bb;
4196 		      edge pe = loop_preheader_edge (loop);
4197 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4198 		      gcc_assert (!new_bb);
4199 		    }
4200 		  if (arginfo[i].simd_lane_linear)
4201 		    {
4202 		      vargs.safe_push (arginfo[i].op);
4203 		      break;
4204 		    }
4205 		  tree phi_res = copy_ssa_name (op);
4206 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4207 		  add_phi_arg (new_phi, arginfo[i].op,
4208 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4209 		  enum tree_code code
4210 		    = POINTER_TYPE_P (TREE_TYPE (op))
4211 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4212 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4213 			      ? sizetype : TREE_TYPE (op);
4214 		  poly_widest_int cst
4215 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4216 			       ncopies * nunits);
4217 		  tree tcst = wide_int_to_tree (type, cst);
4218 		  tree phi_arg = copy_ssa_name (op);
4219 		  gassign *new_stmt
4220 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4221 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4222 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4223 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4224 			       UNKNOWN_LOCATION);
4225 		  arginfo[i].op = phi_res;
4226 		  vargs.safe_push (phi_res);
4227 		}
4228 	      else
4229 		{
4230 		  enum tree_code code
4231 		    = POINTER_TYPE_P (TREE_TYPE (op))
4232 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4233 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4234 			      ? sizetype : TREE_TYPE (op);
4235 		  poly_widest_int cst
4236 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4237 			       j * nunits);
4238 		  tree tcst = wide_int_to_tree (type, cst);
4239 		  new_temp = make_ssa_name (TREE_TYPE (op));
4240 		  gassign *new_stmt
4241 		    = gimple_build_assign (new_temp, code,
4242 					   arginfo[i].op, tcst);
4243 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4244 		  vargs.safe_push (new_temp);
4245 		}
4246 	      break;
4247 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4248 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4249 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4250 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4251 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4252 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4253 	    default:
4254 	      gcc_unreachable ();
4255 	    }
4256 	}
4257 
4258       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4259       if (vec_dest)
4260 	{
4261 	  gcc_assert (ratype
4262 		      || known_eq (simd_clone_subparts (rtype), nunits));
4263 	  if (ratype)
4264 	    new_temp = create_tmp_var (ratype);
4265 	  else if (useless_type_conversion_p (vectype, rtype))
4266 	    new_temp = make_ssa_name (vec_dest, new_call);
4267 	  else
4268 	    new_temp = make_ssa_name (rtype, new_call);
4269 	  gimple_call_set_lhs (new_call, new_temp);
4270 	}
4271       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4272       gimple *new_stmt = new_call;
4273 
4274       if (vec_dest)
4275 	{
4276 	  if (!multiple_p (simd_clone_subparts (vectype), nunits))
4277 	    {
4278 	      unsigned int k, l;
4279 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4280 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4281 	      k = vector_unroll_factor (nunits,
4282 					simd_clone_subparts (vectype));
4283 	      gcc_assert ((k & (k - 1)) == 0);
4284 	      for (l = 0; l < k; l++)
4285 		{
4286 		  tree t;
4287 		  if (ratype)
4288 		    {
4289 		      t = build_fold_addr_expr (new_temp);
4290 		      t = build2 (MEM_REF, vectype, t,
4291 				  build_int_cst (TREE_TYPE (t), l * bytes));
4292 		    }
4293 		  else
4294 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4295 				bitsize_int (prec), bitsize_int (l * prec));
4296 		  new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4297 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4298 
4299 		  if (j == 0 && l == 0)
4300 		    *vec_stmt = new_stmt;
4301 		  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4302 		}
4303 
4304 	      if (ratype)
4305 		vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4306 	      continue;
4307 	    }
4308 	  else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4309 	    {
4310 	      unsigned int k = (simd_clone_subparts (vectype)
4311 				/ simd_clone_subparts (rtype));
4312 	      gcc_assert ((k & (k - 1)) == 0);
4313 	      if ((j & (k - 1)) == 0)
4314 		vec_alloc (ret_ctor_elts, k);
4315 	      if (ratype)
4316 		{
4317 		  unsigned int m, o;
4318 		  o = vector_unroll_factor (nunits,
4319 					    simd_clone_subparts (rtype));
4320 		  for (m = 0; m < o; m++)
4321 		    {
4322 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4323 					 size_int (m), NULL_TREE, NULL_TREE);
4324 		      new_stmt = gimple_build_assign (make_ssa_name (rtype),
4325 						      tem);
4326 		      vect_finish_stmt_generation (vinfo, stmt_info,
4327 						   new_stmt, gsi);
4328 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4329 					      gimple_assign_lhs (new_stmt));
4330 		    }
4331 		  vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4332 		}
4333 	      else
4334 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4335 	      if ((j & (k - 1)) != k - 1)
4336 		continue;
4337 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4338 	      new_stmt
4339 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4340 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4341 
4342 	      if ((unsigned) j == k - 1)
4343 		*vec_stmt = new_stmt;
4344 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4345 	      continue;
4346 	    }
4347 	  else if (ratype)
4348 	    {
4349 	      tree t = build_fold_addr_expr (new_temp);
4350 	      t = build2 (MEM_REF, vectype, t,
4351 			  build_int_cst (TREE_TYPE (t), 0));
4352 	      new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4353 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4354 	      vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4355 	    }
4356 	  else if (!useless_type_conversion_p (vectype, rtype))
4357 	    {
4358 	      vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4359 	      new_stmt
4360 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4361 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4362 	    }
4363 	}
4364 
4365       if (j == 0)
4366 	*vec_stmt = new_stmt;
4367       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4368     }
4369 
4370   for (i = 0; i < nargs; ++i)
4371     {
4372       vec<tree> oprndsi = vec_oprnds[i];
4373       oprndsi.release ();
4374     }
4375   vargs.release ();
4376 
4377   /* The call in STMT might prevent it from being removed in dce.
4378      We however cannot remove it here, due to the way the ssa name
4379      it defines is mapped to the new definition.  So just replace
4380      rhs of the statement with something harmless.  */
4381 
4382   if (slp_node)
4383     return true;
4384 
4385   gimple *new_stmt;
4386   if (scalar_dest)
4387     {
4388       type = TREE_TYPE (scalar_dest);
4389       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4390       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4391     }
4392   else
4393     new_stmt = gimple_build_nop ();
4394   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4395   unlink_stmt_vdef (stmt);
4396 
4397   return true;
4398 }
4399 
4400 
4401 /* Function vect_gen_widened_results_half
4402 
4403    Create a vector stmt whose code, type, number of arguments, and result
4404    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4405    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4406    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4407    needs to be created (DECL is a function-decl of a target-builtin).
4408    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4409 
4410 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4411 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4412                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4413 			       tree vec_dest, gimple_stmt_iterator *gsi,
4414 			       stmt_vec_info stmt_info)
4415 {
4416   gimple *new_stmt;
4417   tree new_temp;
4418 
4419   /* Generate half of the widened result:  */
4420   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4421   if (op_type != binary_op)
4422     vec_oprnd1 = NULL;
4423   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4424   new_temp = make_ssa_name (vec_dest, new_stmt);
4425   gimple_assign_set_lhs (new_stmt, new_temp);
4426   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4427 
4428   return new_stmt;
4429 }
4430 
4431 
4432 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4433    For multi-step conversions store the resulting vectors and call the function
4434    recursively.  */
4435 
4436 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4437 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4438 				       int multi_step_cvt,
4439 				       stmt_vec_info stmt_info,
4440 				       vec<tree> vec_dsts,
4441 				       gimple_stmt_iterator *gsi,
4442 				       slp_tree slp_node, enum tree_code code)
4443 {
4444   unsigned int i;
4445   tree vop0, vop1, new_tmp, vec_dest;
4446 
4447   vec_dest = vec_dsts.pop ();
4448 
4449   for (i = 0; i < vec_oprnds->length (); i += 2)
4450     {
4451       /* Create demotion operation.  */
4452       vop0 = (*vec_oprnds)[i];
4453       vop1 = (*vec_oprnds)[i + 1];
4454       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4455       new_tmp = make_ssa_name (vec_dest, new_stmt);
4456       gimple_assign_set_lhs (new_stmt, new_tmp);
4457       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4458 
4459       if (multi_step_cvt)
4460 	/* Store the resulting vector for next recursive call.  */
4461 	(*vec_oprnds)[i/2] = new_tmp;
4462       else
4463 	{
4464 	  /* This is the last step of the conversion sequence. Store the
4465 	     vectors in SLP_NODE or in vector info of the scalar statement
4466 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4467 	  if (slp_node)
4468 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4469 	  else
4470 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4471 	}
4472     }
4473 
4474   /* For multi-step demotion operations we first generate demotion operations
4475      from the source type to the intermediate types, and then combine the
4476      results (stored in VEC_OPRNDS) in demotion operation to the destination
4477      type.  */
4478   if (multi_step_cvt)
4479     {
4480       /* At each level of recursion we have half of the operands we had at the
4481 	 previous level.  */
4482       vec_oprnds->truncate ((i+1)/2);
4483       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4484 					     multi_step_cvt - 1,
4485 					     stmt_info, vec_dsts, gsi,
4486 					     slp_node, VEC_PACK_TRUNC_EXPR);
4487     }
4488 
4489   vec_dsts.quick_push (vec_dest);
4490 }
4491 
4492 
4493 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4494    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4495    STMT_INFO.  For multi-step conversions store the resulting vectors and
4496    call the function recursively.  */
4497 
4498 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4499 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4500 					vec<tree> *vec_oprnds0,
4501 					vec<tree> *vec_oprnds1,
4502 					stmt_vec_info stmt_info, tree vec_dest,
4503 					gimple_stmt_iterator *gsi,
4504 					enum tree_code code1,
4505 					enum tree_code code2, int op_type)
4506 {
4507   int i;
4508   tree vop0, vop1, new_tmp1, new_tmp2;
4509   gimple *new_stmt1, *new_stmt2;
4510   vec<tree> vec_tmp = vNULL;
4511 
4512   vec_tmp.create (vec_oprnds0->length () * 2);
4513   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4514     {
4515       if (op_type == binary_op)
4516 	vop1 = (*vec_oprnds1)[i];
4517       else
4518 	vop1 = NULL_TREE;
4519 
4520       /* Generate the two halves of promotion operation.  */
4521       new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4522 						 op_type, vec_dest, gsi,
4523 						 stmt_info);
4524       new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4525 						 op_type, vec_dest, gsi,
4526 						 stmt_info);
4527       if (is_gimple_call (new_stmt1))
4528 	{
4529 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4530 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4531 	}
4532       else
4533 	{
4534 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4535 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4536 	}
4537 
4538       /* Store the results for the next step.  */
4539       vec_tmp.quick_push (new_tmp1);
4540       vec_tmp.quick_push (new_tmp2);
4541     }
4542 
4543   vec_oprnds0->release ();
4544   *vec_oprnds0 = vec_tmp;
4545 }
4546 
4547 /* Create vectorized promotion stmts for widening stmts using only half the
4548    potential vector size for input.  */
4549 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4550 vect_create_half_widening_stmts (vec_info *vinfo,
4551 					vec<tree> *vec_oprnds0,
4552 					vec<tree> *vec_oprnds1,
4553 					stmt_vec_info stmt_info, tree vec_dest,
4554 					gimple_stmt_iterator *gsi,
4555 					enum tree_code code1,
4556 					int op_type)
4557 {
4558   int i;
4559   tree vop0, vop1;
4560   gimple *new_stmt1;
4561   gimple *new_stmt2;
4562   gimple *new_stmt3;
4563   vec<tree> vec_tmp = vNULL;
4564 
4565   vec_tmp.create (vec_oprnds0->length ());
4566   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4567     {
4568       tree new_tmp1, new_tmp2, new_tmp3, out_type;
4569 
4570       gcc_assert (op_type == binary_op);
4571       vop1 = (*vec_oprnds1)[i];
4572 
4573       /* Widen the first vector input.  */
4574       out_type = TREE_TYPE (vec_dest);
4575       new_tmp1 = make_ssa_name (out_type);
4576       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4577       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4578       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4579 	{
4580 	  /* Widen the second vector input.  */
4581 	  new_tmp2 = make_ssa_name (out_type);
4582 	  new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4583 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4584 	  /* Perform the operation.  With both vector inputs widened.  */
4585 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4586 	}
4587       else
4588 	{
4589 	  /* Perform the operation.  With the single vector input widened.  */
4590 	  new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4591       }
4592 
4593       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4594       gimple_assign_set_lhs (new_stmt3, new_tmp3);
4595       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4596 
4597       /* Store the results for the next step.  */
4598       vec_tmp.quick_push (new_tmp3);
4599     }
4600 
4601   vec_oprnds0->release ();
4602   *vec_oprnds0 = vec_tmp;
4603 }
4604 
4605 
4606 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4607    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4608    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4609    Return true if STMT_INFO is vectorizable in this way.  */
4610 
4611 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4612 vectorizable_conversion (vec_info *vinfo,
4613 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4614 			 gimple **vec_stmt, slp_tree slp_node,
4615 			 stmt_vector_for_cost *cost_vec)
4616 {
4617   tree vec_dest;
4618   tree scalar_dest;
4619   tree op0, op1 = NULL_TREE;
4620   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4621   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4622   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4623   tree new_temp;
4624   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4625   int ndts = 2;
4626   poly_uint64 nunits_in;
4627   poly_uint64 nunits_out;
4628   tree vectype_out, vectype_in;
4629   int ncopies, i;
4630   tree lhs_type, rhs_type;
4631   enum { NARROW, NONE, WIDEN } modifier;
4632   vec<tree> vec_oprnds0 = vNULL;
4633   vec<tree> vec_oprnds1 = vNULL;
4634   tree vop0;
4635   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4636   int multi_step_cvt = 0;
4637   vec<tree> interm_types = vNULL;
4638   tree intermediate_type, cvt_type = NULL_TREE;
4639   int op_type;
4640   unsigned short fltsz;
4641 
4642   /* Is STMT a vectorizable conversion?   */
4643 
4644   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4645     return false;
4646 
4647   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4648       && ! vec_stmt)
4649     return false;
4650 
4651   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4652   if (!stmt)
4653     return false;
4654 
4655   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4656     return false;
4657 
4658   code = gimple_assign_rhs_code (stmt);
4659   if (!CONVERT_EXPR_CODE_P (code)
4660       && code != FIX_TRUNC_EXPR
4661       && code != FLOAT_EXPR
4662       && code != WIDEN_PLUS_EXPR
4663       && code != WIDEN_MINUS_EXPR
4664       && code != WIDEN_MULT_EXPR
4665       && code != WIDEN_LSHIFT_EXPR)
4666     return false;
4667 
4668   op_type = TREE_CODE_LENGTH (code);
4669 
4670   /* Check types of lhs and rhs.  */
4671   scalar_dest = gimple_assign_lhs (stmt);
4672   lhs_type = TREE_TYPE (scalar_dest);
4673   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4674 
4675   /* Check the operands of the operation.  */
4676   slp_tree slp_op0, slp_op1 = NULL;
4677   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4678 			   0, &op0, &slp_op0, &dt[0], &vectype_in))
4679     {
4680       if (dump_enabled_p ())
4681 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4682 			 "use not simple.\n");
4683       return false;
4684     }
4685 
4686   rhs_type = TREE_TYPE (op0);
4687   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4688       && !((INTEGRAL_TYPE_P (lhs_type)
4689 	    && INTEGRAL_TYPE_P (rhs_type))
4690 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4691 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4692     return false;
4693 
4694   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4695       && ((INTEGRAL_TYPE_P (lhs_type)
4696 	   && !type_has_mode_precision_p (lhs_type))
4697 	  || (INTEGRAL_TYPE_P (rhs_type)
4698 	      && !type_has_mode_precision_p (rhs_type))))
4699     {
4700       if (dump_enabled_p ())
4701 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702                          "type conversion to/from bit-precision unsupported."
4703                          "\n");
4704       return false;
4705     }
4706 
4707   if (op_type == binary_op)
4708     {
4709       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4710 		  || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4711 
4712       op1 = gimple_assign_rhs2 (stmt);
4713       tree vectype1_in;
4714       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4715 			       &op1, &slp_op1, &dt[1], &vectype1_in))
4716 	{
4717           if (dump_enabled_p ())
4718             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4719                              "use not simple.\n");
4720 	  return false;
4721 	}
4722       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4723 	 OP1.  */
4724       if (!vectype_in)
4725 	vectype_in = vectype1_in;
4726     }
4727 
4728   /* If op0 is an external or constant def, infer the vector type
4729      from the scalar type.  */
4730   if (!vectype_in)
4731     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4732   if (vec_stmt)
4733     gcc_assert (vectype_in);
4734   if (!vectype_in)
4735     {
4736       if (dump_enabled_p ())
4737 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 			 "no vectype for scalar type %T\n", rhs_type);
4739 
4740       return false;
4741     }
4742 
4743   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4744       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4745     {
4746       if (dump_enabled_p ())
4747 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4748 			 "can't convert between boolean and non "
4749 			 "boolean vectors %T\n", rhs_type);
4750 
4751       return false;
4752     }
4753 
4754   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4755   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4756   if (known_eq (nunits_out, nunits_in))
4757     if (code == WIDEN_MINUS_EXPR
4758 	|| code == WIDEN_PLUS_EXPR
4759 	|| code == WIDEN_LSHIFT_EXPR
4760 	|| code == WIDEN_MULT_EXPR)
4761       modifier = WIDEN;
4762     else
4763       modifier = NONE;
4764   else if (multiple_p (nunits_out, nunits_in))
4765     modifier = NARROW;
4766   else
4767     {
4768       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4769       modifier = WIDEN;
4770     }
4771 
4772   /* Multiple types in SLP are handled by creating the appropriate number of
4773      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4774      case of SLP.  */
4775   if (slp_node)
4776     ncopies = 1;
4777   else if (modifier == NARROW)
4778     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4779   else
4780     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4781 
4782   /* Sanity check: make sure that at least one copy of the vectorized stmt
4783      needs to be generated.  */
4784   gcc_assert (ncopies >= 1);
4785 
4786   bool found_mode = false;
4787   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4788   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4789   opt_scalar_mode rhs_mode_iter;
4790 
4791   /* Supportable by target?  */
4792   switch (modifier)
4793     {
4794     case NONE:
4795       if (code != FIX_TRUNC_EXPR
4796 	  && code != FLOAT_EXPR
4797 	  && !CONVERT_EXPR_CODE_P (code))
4798 	return false;
4799       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4800 	break;
4801       /* FALLTHRU */
4802     unsupported:
4803       if (dump_enabled_p ())
4804 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805                          "conversion not supported by target.\n");
4806       return false;
4807 
4808     case WIDEN:
4809       if (known_eq (nunits_in, nunits_out))
4810 	{
4811 	  if (!supportable_half_widening_operation (code, vectype_out,
4812 						   vectype_in, &code1))
4813 	    goto unsupported;
4814 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4815 	  break;
4816 	}
4817       if (supportable_widening_operation (vinfo, code, stmt_info,
4818 					       vectype_out, vectype_in, &code1,
4819 					       &code2, &multi_step_cvt,
4820 					       &interm_types))
4821 	{
4822 	  /* Binary widening operation can only be supported directly by the
4823 	     architecture.  */
4824 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4825 	  break;
4826 	}
4827 
4828       if (code != FLOAT_EXPR
4829 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4830 	goto unsupported;
4831 
4832       fltsz = GET_MODE_SIZE (lhs_mode);
4833       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4834 	{
4835 	  rhs_mode = rhs_mode_iter.require ();
4836 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4837 	    break;
4838 
4839 	  cvt_type
4840 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4841 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4842 	  if (cvt_type == NULL_TREE)
4843 	    goto unsupported;
4844 
4845 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4846 	    {
4847 	      if (!supportable_convert_operation (code, vectype_out,
4848 						  cvt_type, &codecvt1))
4849 		goto unsupported;
4850 	    }
4851 	  else if (!supportable_widening_operation (vinfo, code, stmt_info,
4852 						    vectype_out, cvt_type,
4853 						    &codecvt1, &codecvt2,
4854 						    &multi_step_cvt,
4855 						    &interm_types))
4856 	    continue;
4857 	  else
4858 	    gcc_assert (multi_step_cvt == 0);
4859 
4860 	  if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4861 					      cvt_type,
4862 					      vectype_in, &code1, &code2,
4863 					      &multi_step_cvt, &interm_types))
4864 	    {
4865 	      found_mode = true;
4866 	      break;
4867 	    }
4868 	}
4869 
4870       if (!found_mode)
4871 	goto unsupported;
4872 
4873       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4874 	codecvt2 = ERROR_MARK;
4875       else
4876 	{
4877 	  multi_step_cvt++;
4878 	  interm_types.safe_push (cvt_type);
4879 	  cvt_type = NULL_TREE;
4880 	}
4881       break;
4882 
4883     case NARROW:
4884       gcc_assert (op_type == unary_op);
4885       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4886 					   &code1, &multi_step_cvt,
4887 					   &interm_types))
4888 	break;
4889 
4890       if (code != FIX_TRUNC_EXPR
4891 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4892 	goto unsupported;
4893 
4894       cvt_type
4895 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4896       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4897       if (cvt_type == NULL_TREE)
4898 	goto unsupported;
4899       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4900 					  &codecvt1))
4901 	goto unsupported;
4902       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4903 					   &code1, &multi_step_cvt,
4904 					   &interm_types))
4905 	break;
4906       goto unsupported;
4907 
4908     default:
4909       gcc_unreachable ();
4910     }
4911 
4912   if (!vec_stmt)		/* transformation not required.  */
4913     {
4914       if (slp_node
4915 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4916 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4917 	{
4918 	  if (dump_enabled_p ())
4919 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 			     "incompatible vector types for invariants\n");
4921 	  return false;
4922 	}
4923       DUMP_VECT_SCOPE ("vectorizable_conversion");
4924       if (modifier == NONE)
4925         {
4926 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4927 	  vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4928 				  cost_vec);
4929 	}
4930       else if (modifier == NARROW)
4931 	{
4932 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4933 	  /* The final packing step produces one vector result per copy.  */
4934 	  unsigned int nvectors
4935 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4936 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4937 					      multi_step_cvt, cost_vec);
4938 	}
4939       else
4940 	{
4941 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4942 	  /* The initial unpacking step produces two vector results
4943 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
4944 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
4945 	  unsigned int nvectors
4946 	    = (slp_node
4947 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4948 	       : ncopies * 2);
4949 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4950 					      multi_step_cvt, cost_vec);
4951 	}
4952       interm_types.release ();
4953       return true;
4954     }
4955 
4956   /* Transform.  */
4957   if (dump_enabled_p ())
4958     dump_printf_loc (MSG_NOTE, vect_location,
4959                      "transform conversion. ncopies = %d.\n", ncopies);
4960 
4961   if (op_type == binary_op)
4962     {
4963       if (CONSTANT_CLASS_P (op0))
4964 	op0 = fold_convert (TREE_TYPE (op1), op0);
4965       else if (CONSTANT_CLASS_P (op1))
4966 	op1 = fold_convert (TREE_TYPE (op0), op1);
4967     }
4968 
4969   /* In case of multi-step conversion, we first generate conversion operations
4970      to the intermediate types, and then from that types to the final one.
4971      We create vector destinations for the intermediate type (TYPES) received
4972      from supportable_*_operation, and store them in the correct order
4973      for future use in vect_create_vectorized_*_stmts ().  */
4974   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4975   vec_dest = vect_create_destination_var (scalar_dest,
4976 					  (cvt_type && modifier == WIDEN)
4977 					  ? cvt_type : vectype_out);
4978   vec_dsts.quick_push (vec_dest);
4979 
4980   if (multi_step_cvt)
4981     {
4982       for (i = interm_types.length () - 1;
4983 	   interm_types.iterate (i, &intermediate_type); i--)
4984 	{
4985 	  vec_dest = vect_create_destination_var (scalar_dest,
4986 						  intermediate_type);
4987 	  vec_dsts.quick_push (vec_dest);
4988 	}
4989     }
4990 
4991   if (cvt_type)
4992     vec_dest = vect_create_destination_var (scalar_dest,
4993 					    modifier == WIDEN
4994 					    ? vectype_out : cvt_type);
4995 
4996   int ninputs = 1;
4997   if (!slp_node)
4998     {
4999       if (modifier == WIDEN)
5000 	;
5001       else if (modifier == NARROW)
5002 	{
5003 	  if (multi_step_cvt)
5004 	    ninputs = vect_pow2 (multi_step_cvt);
5005 	  ninputs *= 2;
5006 	}
5007     }
5008 
5009   switch (modifier)
5010     {
5011     case NONE:
5012       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5013 			 op0, &vec_oprnds0);
5014       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5015 	{
5016 	  /* Arguments are ready, create the new vector stmt.  */
5017 	  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5018 	  gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5019 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5020 	  gimple_assign_set_lhs (new_stmt, new_temp);
5021 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5022 
5023 	  if (slp_node)
5024 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5025 	  else
5026 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5027 	}
5028       break;
5029 
5030     case WIDEN:
5031       /* In case the vectorization factor (VF) is bigger than the number
5032 	 of elements that we can fit in a vectype (nunits), we have to
5033 	 generate more than one vector stmt - i.e - we need to "unroll"
5034 	 the vector stmt by a factor VF/nunits.  */
5035       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5036 			 op0, &vec_oprnds0,
5037 			 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5038 			 &vec_oprnds1);
5039       if (code == WIDEN_LSHIFT_EXPR)
5040 	{
5041 	  int oprnds_size = vec_oprnds0.length ();
5042 	  vec_oprnds1.create (oprnds_size);
5043 	  for (i = 0; i < oprnds_size; ++i)
5044 	    vec_oprnds1.quick_push (op1);
5045 	}
5046       /* Arguments are ready.  Create the new vector stmts.  */
5047       for (i = multi_step_cvt; i >= 0; i--)
5048 	{
5049 	  tree this_dest = vec_dsts[i];
5050 	  enum tree_code c1 = code1, c2 = code2;
5051 	  if (i == 0 && codecvt2 != ERROR_MARK)
5052 	    {
5053 	      c1 = codecvt1;
5054 	      c2 = codecvt2;
5055 	    }
5056 	  if (known_eq (nunits_out, nunits_in))
5057 	    vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5058 						    &vec_oprnds1, stmt_info,
5059 						    this_dest, gsi,
5060 						    c1, op_type);
5061 	  else
5062 	    vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5063 						    &vec_oprnds1, stmt_info,
5064 						    this_dest, gsi,
5065 						    c1, c2, op_type);
5066 	}
5067 
5068       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5069 	{
5070 	  gimple *new_stmt;
5071 	  if (cvt_type)
5072 	    {
5073 	      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5074 	      new_temp = make_ssa_name (vec_dest);
5075 	      new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5076 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5077 	    }
5078 	  else
5079 	    new_stmt = SSA_NAME_DEF_STMT (vop0);
5080 
5081 	  if (slp_node)
5082 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5083 	  else
5084 	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5085 	}
5086       break;
5087 
5088     case NARROW:
5089       /* In case the vectorization factor (VF) is bigger than the number
5090 	 of elements that we can fit in a vectype (nunits), we have to
5091 	 generate more than one vector stmt - i.e - we need to "unroll"
5092 	 the vector stmt by a factor VF/nunits.  */
5093       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5094 			 op0, &vec_oprnds0);
5095       /* Arguments are ready.  Create the new vector stmts.  */
5096       if (cvt_type)
5097 	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5098 	  {
5099 	    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5100 	    new_temp = make_ssa_name (vec_dest);
5101 	    gassign *new_stmt
5102 	      = gimple_build_assign (new_temp, codecvt1, vop0);
5103 	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5104 	    vec_oprnds0[i] = new_temp;
5105 	  }
5106 
5107       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5108 					     multi_step_cvt,
5109 					     stmt_info, vec_dsts, gsi,
5110 					     slp_node, code1);
5111       break;
5112     }
5113   if (!slp_node)
5114     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5115 
5116   vec_oprnds0.release ();
5117   vec_oprnds1.release ();
5118   interm_types.release ();
5119 
5120   return true;
5121 }
5122 
5123 /* Return true if we can assume from the scalar form of STMT_INFO that
5124    neither the scalar nor the vector forms will generate code.  STMT_INFO
5125    is known not to involve a data reference.  */
5126 
5127 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5128 vect_nop_conversion_p (stmt_vec_info stmt_info)
5129 {
5130   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5131   if (!stmt)
5132     return false;
5133 
5134   tree lhs = gimple_assign_lhs (stmt);
5135   tree_code code = gimple_assign_rhs_code (stmt);
5136   tree rhs = gimple_assign_rhs1 (stmt);
5137 
5138   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5139     return true;
5140 
5141   if (CONVERT_EXPR_CODE_P (code))
5142     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5143 
5144   return false;
5145 }
5146 
5147 /* Function vectorizable_assignment.
5148 
5149    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5150    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5151    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5152    Return true if STMT_INFO is vectorizable in this way.  */
5153 
5154 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5155 vectorizable_assignment (vec_info *vinfo,
5156 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5157 			 gimple **vec_stmt, slp_tree slp_node,
5158 			 stmt_vector_for_cost *cost_vec)
5159 {
5160   tree vec_dest;
5161   tree scalar_dest;
5162   tree op;
5163   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5164   tree new_temp;
5165   enum vect_def_type dt[1] = {vect_unknown_def_type};
5166   int ndts = 1;
5167   int ncopies;
5168   int i;
5169   vec<tree> vec_oprnds = vNULL;
5170   tree vop;
5171   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5172   enum tree_code code;
5173   tree vectype_in;
5174 
5175   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5176     return false;
5177 
5178   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5179       && ! vec_stmt)
5180     return false;
5181 
5182   /* Is vectorizable assignment?  */
5183   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5184   if (!stmt)
5185     return false;
5186 
5187   scalar_dest = gimple_assign_lhs (stmt);
5188   if (TREE_CODE (scalar_dest) != SSA_NAME)
5189     return false;
5190 
5191   if (STMT_VINFO_DATA_REF (stmt_info))
5192     return false;
5193 
5194   code = gimple_assign_rhs_code (stmt);
5195   if (!(gimple_assign_single_p (stmt)
5196 	|| code == PAREN_EXPR
5197 	|| CONVERT_EXPR_CODE_P (code)))
5198     return false;
5199 
5200   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5201   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5202 
5203   /* Multiple types in SLP are handled by creating the appropriate number of
5204      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5205      case of SLP.  */
5206   if (slp_node)
5207     ncopies = 1;
5208   else
5209     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5210 
5211   gcc_assert (ncopies >= 1);
5212 
5213   slp_tree slp_op;
5214   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5215 			   &dt[0], &vectype_in))
5216     {
5217       if (dump_enabled_p ())
5218         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5219                          "use not simple.\n");
5220       return false;
5221     }
5222   if (!vectype_in)
5223     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5224 
5225   /* We can handle NOP_EXPR conversions that do not change the number
5226      of elements or the vector size.  */
5227   if ((CONVERT_EXPR_CODE_P (code)
5228        || code == VIEW_CONVERT_EXPR)
5229       && (!vectype_in
5230 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5231 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5232 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5233     return false;
5234 
5235   if (VECTOR_BOOLEAN_TYPE_P (vectype)
5236       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5237     {
5238       if (dump_enabled_p ())
5239 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5240 			 "can't convert between boolean and non "
5241 			 "boolean vectors %T\n", TREE_TYPE (op));
5242 
5243       return false;
5244     }
5245 
5246   /* We do not handle bit-precision changes.  */
5247   if ((CONVERT_EXPR_CODE_P (code)
5248        || code == VIEW_CONVERT_EXPR)
5249       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5250       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5251 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5252       /* But a conversion that does not change the bit-pattern is ok.  */
5253       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5254 	    > TYPE_PRECISION (TREE_TYPE (op)))
5255 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
5256     {
5257       if (dump_enabled_p ())
5258         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5259                          "type conversion to/from bit-precision "
5260                          "unsupported.\n");
5261       return false;
5262     }
5263 
5264   if (!vec_stmt) /* transformation not required.  */
5265     {
5266       if (slp_node
5267 	  && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5268 	{
5269 	  if (dump_enabled_p ())
5270 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5271 			     "incompatible vector types for invariants\n");
5272 	  return false;
5273 	}
5274       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5275       DUMP_VECT_SCOPE ("vectorizable_assignment");
5276       if (!vect_nop_conversion_p (stmt_info))
5277 	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5278 				cost_vec);
5279       return true;
5280     }
5281 
5282   /* Transform.  */
5283   if (dump_enabled_p ())
5284     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5285 
5286   /* Handle def.  */
5287   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5288 
5289   /* Handle use.  */
5290   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5291 
5292   /* Arguments are ready. create the new vector stmt.  */
5293   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5294     {
5295       if (CONVERT_EXPR_CODE_P (code)
5296 	  || code == VIEW_CONVERT_EXPR)
5297 	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5298       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5299       new_temp = make_ssa_name (vec_dest, new_stmt);
5300       gimple_assign_set_lhs (new_stmt, new_temp);
5301       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5302       if (slp_node)
5303 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5304       else
5305 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5306     }
5307   if (!slp_node)
5308     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5309 
5310   vec_oprnds.release ();
5311   return true;
5312 }
5313 
5314 
5315 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5316    either as shift by a scalar or by a vector.  */
5317 
5318 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5319 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5320 {
5321 
5322   machine_mode vec_mode;
5323   optab optab;
5324   int icode;
5325   tree vectype;
5326 
5327   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5328   if (!vectype)
5329     return false;
5330 
5331   optab = optab_for_tree_code (code, vectype, optab_scalar);
5332   if (!optab
5333       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5334     {
5335       optab = optab_for_tree_code (code, vectype, optab_vector);
5336       if (!optab
5337           || (optab_handler (optab, TYPE_MODE (vectype))
5338                       == CODE_FOR_nothing))
5339         return false;
5340     }
5341 
5342   vec_mode = TYPE_MODE (vectype);
5343   icode = (int) optab_handler (optab, vec_mode);
5344   if (icode == CODE_FOR_nothing)
5345     return false;
5346 
5347   return true;
5348 }
5349 
5350 
5351 /* Function vectorizable_shift.
5352 
5353    Check if STMT_INFO performs a shift operation that can be vectorized.
5354    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5355    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5356    Return true if STMT_INFO is vectorizable in this way.  */
5357 
5358 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5359 vectorizable_shift (vec_info *vinfo,
5360 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5361 		    gimple **vec_stmt, slp_tree slp_node,
5362 		    stmt_vector_for_cost *cost_vec)
5363 {
5364   tree vec_dest;
5365   tree scalar_dest;
5366   tree op0, op1 = NULL;
5367   tree vec_oprnd1 = NULL_TREE;
5368   tree vectype;
5369   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5370   enum tree_code code;
5371   machine_mode vec_mode;
5372   tree new_temp;
5373   optab optab;
5374   int icode;
5375   machine_mode optab_op2_mode;
5376   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5377   int ndts = 2;
5378   poly_uint64 nunits_in;
5379   poly_uint64 nunits_out;
5380   tree vectype_out;
5381   tree op1_vectype;
5382   int ncopies;
5383   int i;
5384   vec<tree> vec_oprnds0 = vNULL;
5385   vec<tree> vec_oprnds1 = vNULL;
5386   tree vop0, vop1;
5387   unsigned int k;
5388   bool scalar_shift_arg = true;
5389   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5390   bool incompatible_op1_vectype_p = false;
5391 
5392   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5393     return false;
5394 
5395   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5396       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5397       && ! vec_stmt)
5398     return false;
5399 
5400   /* Is STMT a vectorizable binary/unary operation?   */
5401   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5402   if (!stmt)
5403     return false;
5404 
5405   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5406     return false;
5407 
5408   code = gimple_assign_rhs_code (stmt);
5409 
5410   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5411       || code == RROTATE_EXPR))
5412     return false;
5413 
5414   scalar_dest = gimple_assign_lhs (stmt);
5415   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5416   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5417     {
5418       if (dump_enabled_p ())
5419         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5420                          "bit-precision shifts not supported.\n");
5421       return false;
5422     }
5423 
5424   slp_tree slp_op0;
5425   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5426 			   0, &op0, &slp_op0, &dt[0], &vectype))
5427     {
5428       if (dump_enabled_p ())
5429         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430                          "use not simple.\n");
5431       return false;
5432     }
5433   /* If op0 is an external or constant def, infer the vector type
5434      from the scalar type.  */
5435   if (!vectype)
5436     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5437   if (vec_stmt)
5438     gcc_assert (vectype);
5439   if (!vectype)
5440     {
5441       if (dump_enabled_p ())
5442         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443                          "no vectype for scalar type\n");
5444       return false;
5445     }
5446 
5447   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5448   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5449   if (maybe_ne (nunits_out, nunits_in))
5450     return false;
5451 
5452   stmt_vec_info op1_def_stmt_info;
5453   slp_tree slp_op1;
5454   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5455 			   &dt[1], &op1_vectype, &op1_def_stmt_info))
5456     {
5457       if (dump_enabled_p ())
5458         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5459                          "use not simple.\n");
5460       return false;
5461     }
5462 
5463   /* Multiple types in SLP are handled by creating the appropriate number of
5464      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5465      case of SLP.  */
5466   if (slp_node)
5467     ncopies = 1;
5468   else
5469     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5470 
5471   gcc_assert (ncopies >= 1);
5472 
5473   /* Determine whether the shift amount is a vector, or scalar.  If the
5474      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5475 
5476   if ((dt[1] == vect_internal_def
5477        || dt[1] == vect_induction_def
5478        || dt[1] == vect_nested_cycle)
5479       && !slp_node)
5480     scalar_shift_arg = false;
5481   else if (dt[1] == vect_constant_def
5482 	   || dt[1] == vect_external_def
5483 	   || dt[1] == vect_internal_def)
5484     {
5485       /* In SLP, need to check whether the shift count is the same,
5486 	 in loops if it is a constant or invariant, it is always
5487 	 a scalar shift.  */
5488       if (slp_node)
5489 	{
5490 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5491 	  stmt_vec_info slpstmt_info;
5492 
5493 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5494 	    {
5495 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5496 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5497 		scalar_shift_arg = false;
5498 	    }
5499 
5500 	  /* For internal SLP defs we have to make sure we see scalar stmts
5501 	     for all vector elements.
5502 	     ???  For different vectors we could resort to a different
5503 	     scalar shift operand but code-generation below simply always
5504 	     takes the first.  */
5505 	  if (dt[1] == vect_internal_def
5506 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5507 			   stmts.length ()))
5508 	    scalar_shift_arg = false;
5509 	}
5510 
5511       /* If the shift amount is computed by a pattern stmt we cannot
5512          use the scalar amount directly thus give up and use a vector
5513 	 shift.  */
5514       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5515 	scalar_shift_arg = false;
5516     }
5517   else
5518     {
5519       if (dump_enabled_p ())
5520         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5521                          "operand mode requires invariant argument.\n");
5522       return false;
5523     }
5524 
5525   /* Vector shifted by vector.  */
5526   bool was_scalar_shift_arg = scalar_shift_arg;
5527   if (!scalar_shift_arg)
5528     {
5529       optab = optab_for_tree_code (code, vectype, optab_vector);
5530       if (dump_enabled_p ())
5531         dump_printf_loc (MSG_NOTE, vect_location,
5532                          "vector/vector shift/rotate found.\n");
5533 
5534       if (!op1_vectype)
5535 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5536 						   slp_op1);
5537       incompatible_op1_vectype_p
5538 	= (op1_vectype == NULL_TREE
5539 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5540 			TYPE_VECTOR_SUBPARTS (vectype))
5541 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5542       if (incompatible_op1_vectype_p
5543 	  && (!slp_node
5544 	      || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5545 	      || slp_op1->refcnt != 1))
5546 	{
5547 	  if (dump_enabled_p ())
5548 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5549                              "unusable type for last operand in"
5550                              " vector/vector shift/rotate.\n");
5551 	  return false;
5552 	}
5553     }
5554   /* See if the machine has a vector shifted by scalar insn and if not
5555      then see if it has a vector shifted by vector insn.  */
5556   else
5557     {
5558       optab = optab_for_tree_code (code, vectype, optab_scalar);
5559       if (optab
5560           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5561         {
5562           if (dump_enabled_p ())
5563             dump_printf_loc (MSG_NOTE, vect_location,
5564                              "vector/scalar shift/rotate found.\n");
5565         }
5566       else
5567         {
5568           optab = optab_for_tree_code (code, vectype, optab_vector);
5569           if (optab
5570                && (optab_handler (optab, TYPE_MODE (vectype))
5571                       != CODE_FOR_nothing))
5572             {
5573 	      scalar_shift_arg = false;
5574 
5575               if (dump_enabled_p ())
5576                 dump_printf_loc (MSG_NOTE, vect_location,
5577                                  "vector/vector shift/rotate found.\n");
5578 
5579 	      if (!op1_vectype)
5580 		op1_vectype = get_vectype_for_scalar_type (vinfo,
5581 							   TREE_TYPE (op1),
5582 							   slp_op1);
5583 
5584               /* Unlike the other binary operators, shifts/rotates have
5585                  the rhs being int, instead of the same type as the lhs,
5586                  so make sure the scalar is the right type if we are
5587 		 dealing with vectors of long long/long/short/char.  */
5588 	      incompatible_op1_vectype_p
5589 		= (!op1_vectype
5590 		   || !tree_nop_conversion_p (TREE_TYPE (vectype),
5591 					      TREE_TYPE (op1)));
5592 	      if (incompatible_op1_vectype_p
5593 		  && dt[1] == vect_internal_def)
5594 		{
5595 		  if (dump_enabled_p ())
5596 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 				     "unusable type for last operand in"
5598 				     " vector/vector shift/rotate.\n");
5599 		  return false;
5600 		}
5601             }
5602         }
5603     }
5604 
5605   /* Supportable by target?  */
5606   if (!optab)
5607     {
5608       if (dump_enabled_p ())
5609         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5610                          "no optab.\n");
5611       return false;
5612     }
5613   vec_mode = TYPE_MODE (vectype);
5614   icode = (int) optab_handler (optab, vec_mode);
5615   if (icode == CODE_FOR_nothing)
5616     {
5617       if (dump_enabled_p ())
5618         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5619                          "op not supported by target.\n");
5620       /* Check only during analysis.  */
5621       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5622 	  || (!vec_stmt
5623 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5624         return false;
5625       if (dump_enabled_p ())
5626         dump_printf_loc (MSG_NOTE, vect_location,
5627                          "proceeding using word mode.\n");
5628     }
5629 
5630   /* Worthwhile without SIMD support?  Check only during analysis.  */
5631   if (!vec_stmt
5632       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5633       && !vect_worthwhile_without_simd_p (vinfo, code))
5634     {
5635       if (dump_enabled_p ())
5636         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5637                          "not worthwhile without SIMD support.\n");
5638       return false;
5639     }
5640 
5641   if (!vec_stmt) /* transformation not required.  */
5642     {
5643       if (slp_node
5644 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5645 	      || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5646 		  && (!incompatible_op1_vectype_p
5647 		      || dt[1] == vect_constant_def)
5648 		  && !vect_maybe_update_slp_op_vectype
5649 			(slp_op1,
5650 			 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5651 	{
5652 	  if (dump_enabled_p ())
5653 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5654 			     "incompatible vector types for invariants\n");
5655 	  return false;
5656 	}
5657       /* Now adjust the constant shift amount in place.  */
5658       if (slp_node
5659 	  && incompatible_op1_vectype_p
5660 	  && dt[1] == vect_constant_def)
5661 	{
5662 	  for (unsigned i = 0;
5663 	       i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5664 	    {
5665 	      SLP_TREE_SCALAR_OPS (slp_op1)[i]
5666 		= fold_convert (TREE_TYPE (vectype),
5667 				SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5668 	      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5669 			   == INTEGER_CST));
5670 	    }
5671 	}
5672       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5673       DUMP_VECT_SCOPE ("vectorizable_shift");
5674       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5675 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5676       return true;
5677     }
5678 
5679   /* Transform.  */
5680 
5681   if (dump_enabled_p ())
5682     dump_printf_loc (MSG_NOTE, vect_location,
5683                      "transform binary/unary operation.\n");
5684 
5685   if (incompatible_op1_vectype_p && !slp_node)
5686     {
5687       gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5688       op1 = fold_convert (TREE_TYPE (vectype), op1);
5689       if (dt[1] != vect_constant_def)
5690 	op1 = vect_init_vector (vinfo, stmt_info, op1,
5691 				TREE_TYPE (vectype), NULL);
5692     }
5693 
5694   /* Handle def.  */
5695   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5696 
5697   if (scalar_shift_arg && dt[1] != vect_internal_def)
5698     {
5699       /* Vector shl and shr insn patterns can be defined with scalar
5700 	 operand 2 (shift operand).  In this case, use constant or loop
5701 	 invariant op1 directly, without extending it to vector mode
5702 	 first.  */
5703       optab_op2_mode = insn_data[icode].operand[2].mode;
5704       if (!VECTOR_MODE_P (optab_op2_mode))
5705 	{
5706 	  if (dump_enabled_p ())
5707 	    dump_printf_loc (MSG_NOTE, vect_location,
5708 			     "operand 1 using scalar mode.\n");
5709 	  vec_oprnd1 = op1;
5710 	  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5711 	  vec_oprnds1.quick_push (vec_oprnd1);
5712 	      /* Store vec_oprnd1 for every vector stmt to be created.
5713 		 We check during the analysis that all the shift arguments
5714 		 are the same.
5715 		 TODO: Allow different constants for different vector
5716 		 stmts generated for an SLP instance.  */
5717 	  for (k = 0;
5718 	       k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5719 	    vec_oprnds1.quick_push (vec_oprnd1);
5720 	}
5721     }
5722   else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5723     {
5724       if (was_scalar_shift_arg)
5725 	{
5726 	  /* If the argument was the same in all lanes create
5727 	     the correctly typed vector shift amount directly.  */
5728 	  op1 = fold_convert (TREE_TYPE (vectype), op1);
5729 	  op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5730 				  !loop_vinfo ? gsi : NULL);
5731 	  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5732 					 !loop_vinfo ? gsi : NULL);
5733 	  vec_oprnds1.create (slp_node->vec_stmts_size);
5734 	  for (k = 0; k < slp_node->vec_stmts_size; k++)
5735 	    vec_oprnds1.quick_push (vec_oprnd1);
5736 	}
5737       else if (dt[1] == vect_constant_def)
5738 	/* The constant shift amount has been adjusted in place.  */
5739 	;
5740       else
5741 	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5742     }
5743 
5744   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5745      (a special case for certain kind of vector shifts); otherwise,
5746      operand 1 should be of a vector type (the usual case).  */
5747   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5748 		     op0, &vec_oprnds0,
5749 		     vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5750 
5751   /* Arguments are ready.  Create the new vector stmt.  */
5752   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5753     {
5754       /* For internal defs where we need to use a scalar shift arg
5755 	 extract the first lane.  */
5756       if (scalar_shift_arg && dt[1] == vect_internal_def)
5757 	{
5758 	  vop1 = vec_oprnds1[0];
5759 	  new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5760 	  gassign *new_stmt
5761 	    = gimple_build_assign (new_temp,
5762 				   build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5763 					   vop1,
5764 					   TYPE_SIZE (TREE_TYPE (new_temp)),
5765 					   bitsize_zero_node));
5766 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5767 	  vop1 = new_temp;
5768 	}
5769       else
5770 	vop1 = vec_oprnds1[i];
5771       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5772       new_temp = make_ssa_name (vec_dest, new_stmt);
5773       gimple_assign_set_lhs (new_stmt, new_temp);
5774       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5775       if (slp_node)
5776 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5777       else
5778 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5779     }
5780 
5781   if (!slp_node)
5782     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5783 
5784   vec_oprnds0.release ();
5785   vec_oprnds1.release ();
5786 
5787   return true;
5788 }
5789 
5790 
5791 /* Function vectorizable_operation.
5792 
5793    Check if STMT_INFO performs a binary, unary or ternary operation that can
5794    be vectorized.
5795    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5796    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5797    Return true if STMT_INFO is vectorizable in this way.  */
5798 
5799 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5800 vectorizable_operation (vec_info *vinfo,
5801 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5802 			gimple **vec_stmt, slp_tree slp_node,
5803 			stmt_vector_for_cost *cost_vec)
5804 {
5805   tree vec_dest;
5806   tree scalar_dest;
5807   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5808   tree vectype;
5809   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5810   enum tree_code code, orig_code;
5811   machine_mode vec_mode;
5812   tree new_temp;
5813   int op_type;
5814   optab optab;
5815   bool target_support_p;
5816   enum vect_def_type dt[3]
5817     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5818   int ndts = 3;
5819   poly_uint64 nunits_in;
5820   poly_uint64 nunits_out;
5821   tree vectype_out;
5822   int ncopies, vec_num;
5823   int i;
5824   vec<tree> vec_oprnds0 = vNULL;
5825   vec<tree> vec_oprnds1 = vNULL;
5826   vec<tree> vec_oprnds2 = vNULL;
5827   tree vop0, vop1, vop2;
5828   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5829 
5830   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5831     return false;
5832 
5833   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5834       && ! vec_stmt)
5835     return false;
5836 
5837   /* Is STMT a vectorizable binary/unary operation?   */
5838   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5839   if (!stmt)
5840     return false;
5841 
5842   /* Loads and stores are handled in vectorizable_{load,store}.  */
5843   if (STMT_VINFO_DATA_REF (stmt_info))
5844     return false;
5845 
5846   orig_code = code = gimple_assign_rhs_code (stmt);
5847 
5848   /* Shifts are handled in vectorizable_shift.  */
5849   if (code == LSHIFT_EXPR
5850       || code == RSHIFT_EXPR
5851       || code == LROTATE_EXPR
5852       || code == RROTATE_EXPR)
5853    return false;
5854 
5855   /* Comparisons are handled in vectorizable_comparison.  */
5856   if (TREE_CODE_CLASS (code) == tcc_comparison)
5857     return false;
5858 
5859   /* Conditions are handled in vectorizable_condition.  */
5860   if (code == COND_EXPR)
5861     return false;
5862 
5863   /* For pointer addition and subtraction, we should use the normal
5864      plus and minus for the vector operation.  */
5865   if (code == POINTER_PLUS_EXPR)
5866     code = PLUS_EXPR;
5867   if (code == POINTER_DIFF_EXPR)
5868     code = MINUS_EXPR;
5869 
5870   /* Support only unary or binary operations.  */
5871   op_type = TREE_CODE_LENGTH (code);
5872   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5873     {
5874       if (dump_enabled_p ())
5875         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5876                          "num. args = %d (not unary/binary/ternary op).\n",
5877                          op_type);
5878       return false;
5879     }
5880 
5881   scalar_dest = gimple_assign_lhs (stmt);
5882   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5883 
5884   /* Most operations cannot handle bit-precision types without extra
5885      truncations.  */
5886   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5887   if (!mask_op_p
5888       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5889       /* Exception are bitwise binary operations.  */
5890       && code != BIT_IOR_EXPR
5891       && code != BIT_XOR_EXPR
5892       && code != BIT_AND_EXPR)
5893     {
5894       if (dump_enabled_p ())
5895         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5896                          "bit-precision arithmetic not supported.\n");
5897       return false;
5898     }
5899 
5900   slp_tree slp_op0;
5901   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5902 			   0, &op0, &slp_op0, &dt[0], &vectype))
5903     {
5904       if (dump_enabled_p ())
5905         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906                          "use not simple.\n");
5907       return false;
5908     }
5909   /* If op0 is an external or constant def, infer the vector type
5910      from the scalar type.  */
5911   if (!vectype)
5912     {
5913       /* For boolean type we cannot determine vectype by
5914 	 invariant value (don't know whether it is a vector
5915 	 of booleans or vector of integers).  We use output
5916 	 vectype because operations on boolean don't change
5917 	 type.  */
5918       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5919 	{
5920 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5921 	    {
5922 	      if (dump_enabled_p ())
5923 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 				 "not supported operation on bool value.\n");
5925 	      return false;
5926 	    }
5927 	  vectype = vectype_out;
5928 	}
5929       else
5930 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5931 					       slp_node);
5932     }
5933   if (vec_stmt)
5934     gcc_assert (vectype);
5935   if (!vectype)
5936     {
5937       if (dump_enabled_p ())
5938 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5939 			 "no vectype for scalar type %T\n",
5940 			 TREE_TYPE (op0));
5941 
5942       return false;
5943     }
5944 
5945   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5946   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5947   if (maybe_ne (nunits_out, nunits_in))
5948     return false;
5949 
5950   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5951   slp_tree slp_op1 = NULL, slp_op2 = NULL;
5952   if (op_type == binary_op || op_type == ternary_op)
5953     {
5954       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5955 			       1, &op1, &slp_op1, &dt[1], &vectype2))
5956 	{
5957 	  if (dump_enabled_p ())
5958 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5959                              "use not simple.\n");
5960 	  return false;
5961 	}
5962     }
5963   if (op_type == ternary_op)
5964     {
5965       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5966 			       2, &op2, &slp_op2, &dt[2], &vectype3))
5967 	{
5968 	  if (dump_enabled_p ())
5969 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970                              "use not simple.\n");
5971 	  return false;
5972 	}
5973     }
5974 
5975   /* Multiple types in SLP are handled by creating the appropriate number of
5976      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5977      case of SLP.  */
5978   if (slp_node)
5979     {
5980       ncopies = 1;
5981       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5982     }
5983   else
5984     {
5985       ncopies = vect_get_num_copies (loop_vinfo, vectype);
5986       vec_num = 1;
5987     }
5988 
5989   gcc_assert (ncopies >= 1);
5990 
5991   /* Reject attempts to combine mask types with nonmask types, e.g. if
5992      we have an AND between a (nonmask) boolean loaded from memory and
5993      a (mask) boolean result of a comparison.
5994 
5995      TODO: We could easily fix these cases up using pattern statements.  */
5996   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5997       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5998       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5999     {
6000       if (dump_enabled_p ())
6001 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6002 			 "mixed mask and nonmask vector types\n");
6003       return false;
6004     }
6005 
6006   /* Supportable by target?  */
6007 
6008   vec_mode = TYPE_MODE (vectype);
6009   if (code == MULT_HIGHPART_EXPR)
6010     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6011   else
6012     {
6013       optab = optab_for_tree_code (code, vectype, optab_default);
6014       if (!optab)
6015 	{
6016           if (dump_enabled_p ())
6017             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018                              "no optab.\n");
6019 	  return false;
6020 	}
6021       target_support_p = (optab_handler (optab, vec_mode)
6022 			  != CODE_FOR_nothing);
6023     }
6024 
6025   if (!target_support_p)
6026     {
6027       if (dump_enabled_p ())
6028 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6029                          "op not supported by target.\n");
6030       /* Check only during analysis.  */
6031       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6032 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6033         return false;
6034       if (dump_enabled_p ())
6035 	dump_printf_loc (MSG_NOTE, vect_location,
6036                          "proceeding using word mode.\n");
6037     }
6038 
6039   /* Worthwhile without SIMD support?  Check only during analysis.  */
6040   if (!VECTOR_MODE_P (vec_mode)
6041       && !vec_stmt
6042       && !vect_worthwhile_without_simd_p (vinfo, code))
6043     {
6044       if (dump_enabled_p ())
6045         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6046                          "not worthwhile without SIMD support.\n");
6047       return false;
6048     }
6049 
6050   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6051   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6052   internal_fn cond_fn = get_conditional_internal_fn (code);
6053 
6054   if (!vec_stmt) /* transformation not required.  */
6055     {
6056       /* If this operation is part of a reduction, a fully-masked loop
6057 	 should only change the active lanes of the reduction chain,
6058 	 keeping the inactive lanes as-is.  */
6059       if (loop_vinfo
6060 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6061 	  && reduc_idx >= 0)
6062 	{
6063 	  if (cond_fn == IFN_LAST
6064 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
6065 						  OPTIMIZE_FOR_SPEED))
6066 	    {
6067 	      if (dump_enabled_p ())
6068 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 				 "can't use a fully-masked loop because no"
6070 				 " conditional operation is available.\n");
6071 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6072 	    }
6073 	  else
6074 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6075 				   vectype, NULL);
6076 	}
6077 
6078       /* Put types on constant and invariant SLP children.  */
6079       if (slp_node
6080 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6081 	      || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6082 	      || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6083 	{
6084 	  if (dump_enabled_p ())
6085 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6086 			     "incompatible vector types for invariants\n");
6087 	  return false;
6088 	}
6089 
6090       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6091       DUMP_VECT_SCOPE ("vectorizable_operation");
6092       vect_model_simple_cost (vinfo, stmt_info,
6093 			      ncopies, dt, ndts, slp_node, cost_vec);
6094       return true;
6095     }
6096 
6097   /* Transform.  */
6098 
6099   if (dump_enabled_p ())
6100     dump_printf_loc (MSG_NOTE, vect_location,
6101                      "transform binary/unary operation.\n");
6102 
6103   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6104 
6105   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6106      vectors with unsigned elements, but the result is signed.  So, we
6107      need to compute the MINUS_EXPR into vectype temporary and
6108      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6109   tree vec_cvt_dest = NULL_TREE;
6110   if (orig_code == POINTER_DIFF_EXPR)
6111     {
6112       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6113       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6114     }
6115   /* Handle def.  */
6116   else
6117     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6118 
6119   /* In case the vectorization factor (VF) is bigger than the number
6120      of elements that we can fit in a vectype (nunits), we have to generate
6121      more than one vector stmt - i.e - we need to "unroll" the
6122      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6123      from one copy of the vector stmt to the next, in the field
6124      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6125      stages to find the correct vector defs to be used when vectorizing
6126      stmts that use the defs of the current stmt.  The example below
6127      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6128      we need to create 4 vectorized stmts):
6129 
6130      before vectorization:
6131                                 RELATED_STMT    VEC_STMT
6132         S1:     x = memref      -               -
6133         S2:     z = x + 1       -               -
6134 
6135      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6136              there):
6137                                 RELATED_STMT    VEC_STMT
6138         VS1_0:  vx0 = memref0   VS1_1           -
6139         VS1_1:  vx1 = memref1   VS1_2           -
6140         VS1_2:  vx2 = memref2   VS1_3           -
6141         VS1_3:  vx3 = memref3   -               -
6142         S1:     x = load        -               VS1_0
6143         S2:     z = x + 1       -               -
6144 
6145      step2: vectorize stmt S2 (done here):
6146         To vectorize stmt S2 we first need to find the relevant vector
6147         def for the first operand 'x'.  This is, as usual, obtained from
6148         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6149         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6150         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6151         the vector stmt VS2_0, and as usual, record it in the
6152         STMT_VINFO_VEC_STMT of stmt S2.
6153         When creating the second copy (VS2_1), we obtain the relevant vector
6154         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6155         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6156         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6157         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6158         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6159         chain of stmts and pointers:
6160                                 RELATED_STMT    VEC_STMT
6161         VS1_0:  vx0 = memref0   VS1_1           -
6162         VS1_1:  vx1 = memref1   VS1_2           -
6163         VS1_2:  vx2 = memref2   VS1_3           -
6164         VS1_3:  vx3 = memref3   -               -
6165         S1:     x = load        -               VS1_0
6166         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6167         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6168         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6169         VS2_3:  vz3 = vx3 + v1  -               -
6170         S2:     z = x + 1       -               VS2_0  */
6171 
6172   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6173 		     op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6174   /* Arguments are ready.  Create the new vector stmt.  */
6175   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6176     {
6177       gimple *new_stmt = NULL;
6178       vop1 = ((op_type == binary_op || op_type == ternary_op)
6179 	      ? vec_oprnds1[i] : NULL_TREE);
6180       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6181       if (masked_loop_p && reduc_idx >= 0)
6182 	{
6183 	  /* Perform the operation on active elements only and take
6184 	     inactive elements from the reduction chain input.  */
6185 	  gcc_assert (!vop2);
6186 	  vop2 = reduc_idx == 1 ? vop1 : vop0;
6187 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6188 					  vectype, i);
6189 	  gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6190 						    vop0, vop1, vop2);
6191 	  new_temp = make_ssa_name (vec_dest, call);
6192 	  gimple_call_set_lhs (call, new_temp);
6193 	  gimple_call_set_nothrow (call, true);
6194 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6195 	  new_stmt = call;
6196 	}
6197       else
6198 	{
6199 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6200 	  new_temp = make_ssa_name (vec_dest, new_stmt);
6201 	  gimple_assign_set_lhs (new_stmt, new_temp);
6202 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6203 	  if (vec_cvt_dest)
6204 	    {
6205 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6206 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6207 					      new_temp);
6208 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6209 	      gimple_assign_set_lhs (new_stmt, new_temp);
6210 	      vect_finish_stmt_generation (vinfo, stmt_info,
6211 					   new_stmt, gsi);
6212 	    }
6213 	}
6214       if (slp_node)
6215 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6216       else
6217 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6218     }
6219 
6220   if (!slp_node)
6221     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6222 
6223   vec_oprnds0.release ();
6224   vec_oprnds1.release ();
6225   vec_oprnds2.release ();
6226 
6227   return true;
6228 }
6229 
6230 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6231 
6232 static void
ensure_base_align(dr_vec_info * dr_info)6233 ensure_base_align (dr_vec_info *dr_info)
6234 {
6235   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6236     return;
6237 
6238   if (dr_info->base_misaligned)
6239     {
6240       tree base_decl = dr_info->base_decl;
6241 
6242       // We should only be able to increase the alignment of a base object if
6243       // we know what its new alignment should be at compile time.
6244       unsigned HOST_WIDE_INT align_base_to =
6245 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6246 
6247       if (decl_in_symtab_p (base_decl))
6248 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6249       else if (DECL_ALIGN (base_decl) < align_base_to)
6250 	{
6251 	  SET_DECL_ALIGN (base_decl, align_base_to);
6252           DECL_USER_ALIGN (base_decl) = 1;
6253 	}
6254       dr_info->base_misaligned = false;
6255     }
6256 }
6257 
6258 
6259 /* Function get_group_alias_ptr_type.
6260 
6261    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6262 
6263 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6264 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6265 {
6266   struct data_reference *first_dr, *next_dr;
6267 
6268   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6269   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6270   while (next_stmt_info)
6271     {
6272       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6273       if (get_alias_set (DR_REF (first_dr))
6274 	  != get_alias_set (DR_REF (next_dr)))
6275 	{
6276 	  if (dump_enabled_p ())
6277 	    dump_printf_loc (MSG_NOTE, vect_location,
6278 			     "conflicting alias set types.\n");
6279 	  return ptr_type_node;
6280 	}
6281       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6282     }
6283   return reference_alias_ptr_type (DR_REF (first_dr));
6284 }
6285 
6286 
6287 /* Function scan_operand_equal_p.
6288 
6289    Helper function for check_scan_store.  Compare two references
6290    with .GOMP_SIMD_LANE bases.  */
6291 
6292 static bool
scan_operand_equal_p(tree ref1,tree ref2)6293 scan_operand_equal_p (tree ref1, tree ref2)
6294 {
6295   tree ref[2] = { ref1, ref2 };
6296   poly_int64 bitsize[2], bitpos[2];
6297   tree offset[2], base[2];
6298   for (int i = 0; i < 2; ++i)
6299     {
6300       machine_mode mode;
6301       int unsignedp, reversep, volatilep = 0;
6302       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6303       				     &offset[i], &mode, &unsignedp,
6304       				     &reversep, &volatilep);
6305       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6306 	return false;
6307       if (TREE_CODE (base[i]) == MEM_REF
6308 	  && offset[i] == NULL_TREE
6309 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6310 	{
6311 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6312 	  if (is_gimple_assign (def_stmt)
6313 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6314 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6315 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6316 	    {
6317 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
6318 		return false;
6319 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6320 	      offset[i] = gimple_assign_rhs2 (def_stmt);
6321 	    }
6322 	}
6323     }
6324 
6325   if (!operand_equal_p (base[0], base[1], 0))
6326     return false;
6327   if (maybe_ne (bitsize[0], bitsize[1]))
6328     return false;
6329   if (offset[0] != offset[1])
6330     {
6331       if (!offset[0] || !offset[1])
6332 	return false;
6333       if (!operand_equal_p (offset[0], offset[1], 0))
6334 	{
6335 	  tree step[2];
6336 	  for (int i = 0; i < 2; ++i)
6337 	    {
6338 	      step[i] = integer_one_node;
6339 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6340 		{
6341 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6342 		  if (is_gimple_assign (def_stmt)
6343 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6344 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6345 			  == INTEGER_CST))
6346 		    {
6347 		      step[i] = gimple_assign_rhs2 (def_stmt);
6348 		      offset[i] = gimple_assign_rhs1 (def_stmt);
6349 		    }
6350 		}
6351 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
6352 		{
6353 		  step[i] = TREE_OPERAND (offset[i], 1);
6354 		  offset[i] = TREE_OPERAND (offset[i], 0);
6355 		}
6356 	      tree rhs1 = NULL_TREE;
6357 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6358 		{
6359 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6360 		  if (gimple_assign_cast_p (def_stmt))
6361 		    rhs1 = gimple_assign_rhs1 (def_stmt);
6362 		}
6363 	      else if (CONVERT_EXPR_P (offset[i]))
6364 		rhs1 = TREE_OPERAND (offset[i], 0);
6365 	      if (rhs1
6366 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6367 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6368 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6369 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6370 		offset[i] = rhs1;
6371 	    }
6372 	  if (!operand_equal_p (offset[0], offset[1], 0)
6373 	      || !operand_equal_p (step[0], step[1], 0))
6374 	    return false;
6375 	}
6376     }
6377   return true;
6378 }
6379 
6380 
6381 enum scan_store_kind {
6382   /* Normal permutation.  */
6383   scan_store_kind_perm,
6384 
6385   /* Whole vector left shift permutation with zero init.  */
6386   scan_store_kind_lshift_zero,
6387 
6388   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6389   scan_store_kind_lshift_cond
6390 };
6391 
6392 /* Function check_scan_store.
6393 
6394    Verify if we can perform the needed permutations or whole vector shifts.
6395    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6396    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6397    to do at each step.  */
6398 
6399 static int
6400 scan_store_can_perm_p (tree vectype, tree init,
6401 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
6402 {
6403   enum machine_mode vec_mode = TYPE_MODE (vectype);
6404   unsigned HOST_WIDE_INT nunits;
6405   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6406     return -1;
6407   int units_log2 = exact_log2 (nunits);
6408   if (units_log2 <= 0)
6409     return -1;
6410 
6411   int i;
6412   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6413   for (i = 0; i <= units_log2; ++i)
6414     {
6415       unsigned HOST_WIDE_INT j, k;
6416       enum scan_store_kind kind = scan_store_kind_perm;
6417       vec_perm_builder sel (nunits, nunits, 1);
6418       sel.quick_grow (nunits);
6419       if (i == units_log2)
6420 	{
6421 	  for (j = 0; j < nunits; ++j)
6422 	    sel[j] = nunits - 1;
6423 	}
6424       else
6425 	{
6426 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6427 	    sel[j] = j;
6428 	  for (k = 0; j < nunits; ++j, ++k)
6429 	    sel[j] = nunits + k;
6430 	}
6431       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6432       if (!can_vec_perm_const_p (vec_mode, indices))
6433 	{
6434 	  if (i == units_log2)
6435 	    return -1;
6436 
6437 	  if (whole_vector_shift_kind == scan_store_kind_perm)
6438 	    {
6439 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6440 		return -1;
6441 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
6442 	      /* Whole vector shifts shift in zeros, so if init is all zero
6443 		 constant, there is no need to do anything further.  */
6444 	      if ((TREE_CODE (init) != INTEGER_CST
6445 		   && TREE_CODE (init) != REAL_CST)
6446 		  || !initializer_zerop (init))
6447 		{
6448 		  tree masktype = truth_type_for (vectype);
6449 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6450 		    return -1;
6451 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
6452 		}
6453 	    }
6454 	  kind = whole_vector_shift_kind;
6455 	}
6456       if (use_whole_vector)
6457 	{
6458 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6459 	    use_whole_vector->safe_grow_cleared (i, true);
6460 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6461 	    use_whole_vector->safe_push (kind);
6462 	}
6463     }
6464 
6465   return units_log2;
6466 }
6467 
6468 
6469 /* Function check_scan_store.
6470 
6471    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6472 
6473 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6474 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6475 		  enum vect_def_type rhs_dt, bool slp, tree mask,
6476 		  vect_memory_access_type memory_access_type)
6477 {
6478   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6479   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6480   tree ref_type;
6481 
6482   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6483   if (slp
6484       || mask
6485       || memory_access_type != VMAT_CONTIGUOUS
6486       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6487       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6488       || loop_vinfo == NULL
6489       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6490       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6491       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6492       || !integer_zerop (DR_INIT (dr_info->dr))
6493       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6494       || !alias_sets_conflict_p (get_alias_set (vectype),
6495 				 get_alias_set (TREE_TYPE (ref_type))))
6496     {
6497       if (dump_enabled_p ())
6498 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6499 			 "unsupported OpenMP scan store.\n");
6500       return false;
6501     }
6502 
6503   /* We need to pattern match code built by OpenMP lowering and simplified
6504      by following optimizations into something we can handle.
6505      #pragma omp simd reduction(inscan,+:r)
6506      for (...)
6507        {
6508 	 r += something ();
6509 	 #pragma omp scan inclusive (r)
6510 	 use (r);
6511        }
6512      shall have body with:
6513        // Initialization for input phase, store the reduction initializer:
6514        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6515        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6516        D.2042[_21] = 0;
6517        // Actual input phase:
6518        ...
6519        r.0_5 = D.2042[_20];
6520        _6 = _4 + r.0_5;
6521        D.2042[_20] = _6;
6522        // Initialization for scan phase:
6523        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6524        _26 = D.2043[_25];
6525        _27 = D.2042[_25];
6526        _28 = _26 + _27;
6527        D.2043[_25] = _28;
6528        D.2042[_25] = _28;
6529        // Actual scan phase:
6530        ...
6531        r.1_8 = D.2042[_20];
6532        ...
6533      The "omp simd array" variable D.2042 holds the privatized copy used
6534      inside of the loop and D.2043 is another one that holds copies of
6535      the current original list item.  The separate GOMP_SIMD_LANE ifn
6536      kinds are there in order to allow optimizing the initializer store
6537      and combiner sequence, e.g. if it is originally some C++ish user
6538      defined reduction, but allow the vectorizer to pattern recognize it
6539      and turn into the appropriate vectorized scan.
6540 
6541      For exclusive scan, this is slightly different:
6542      #pragma omp simd reduction(inscan,+:r)
6543      for (...)
6544        {
6545 	 use (r);
6546 	 #pragma omp scan exclusive (r)
6547 	 r += something ();
6548        }
6549      shall have body with:
6550        // Initialization for input phase, store the reduction initializer:
6551        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6552        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6553        D.2042[_21] = 0;
6554        // Actual input phase:
6555        ...
6556        r.0_5 = D.2042[_20];
6557        _6 = _4 + r.0_5;
6558        D.2042[_20] = _6;
6559        // Initialization for scan phase:
6560        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6561        _26 = D.2043[_25];
6562        D.2044[_25] = _26;
6563        _27 = D.2042[_25];
6564        _28 = _26 + _27;
6565        D.2043[_25] = _28;
6566        // Actual scan phase:
6567        ...
6568        r.1_8 = D.2044[_20];
6569        ...  */
6570 
6571   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6572     {
6573       /* Match the D.2042[_21] = 0; store above.  Just require that
6574 	 it is a constant or external definition store.  */
6575       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6576 	{
6577 	 fail_init:
6578 	  if (dump_enabled_p ())
6579 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6580 			     "unsupported OpenMP scan initializer store.\n");
6581 	  return false;
6582 	}
6583 
6584       if (! loop_vinfo->scan_map)
6585 	loop_vinfo->scan_map = new hash_map<tree, tree>;
6586       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6587       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6588       if (cached)
6589 	goto fail_init;
6590       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6591 
6592       /* These stores can be vectorized normally.  */
6593       return true;
6594     }
6595 
6596   if (rhs_dt != vect_internal_def)
6597     {
6598      fail:
6599       if (dump_enabled_p ())
6600 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6601 			 "unsupported OpenMP scan combiner pattern.\n");
6602       return false;
6603     }
6604 
6605   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6606   tree rhs = gimple_assign_rhs1 (stmt);
6607   if (TREE_CODE (rhs) != SSA_NAME)
6608     goto fail;
6609 
6610   gimple *other_store_stmt = NULL;
6611   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6612   bool inscan_var_store
6613     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6614 
6615   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6616     {
6617       if (!inscan_var_store)
6618 	{
6619 	  use_operand_p use_p;
6620 	  imm_use_iterator iter;
6621 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6622 	    {
6623 	      gimple *use_stmt = USE_STMT (use_p);
6624 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
6625 		continue;
6626 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
6627 		  || !is_gimple_assign (use_stmt)
6628 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6629 		  || other_store_stmt
6630 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6631 		goto fail;
6632 	      other_store_stmt = use_stmt;
6633 	    }
6634 	  if (other_store_stmt == NULL)
6635 	    goto fail;
6636 	  rhs = gimple_assign_lhs (other_store_stmt);
6637 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6638 	    goto fail;
6639 	}
6640     }
6641   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6642     {
6643       use_operand_p use_p;
6644       imm_use_iterator iter;
6645       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6646 	{
6647 	  gimple *use_stmt = USE_STMT (use_p);
6648 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6649 	    continue;
6650 	  if (other_store_stmt)
6651 	    goto fail;
6652 	  other_store_stmt = use_stmt;
6653 	}
6654     }
6655   else
6656     goto fail;
6657 
6658   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6659   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6660       || !is_gimple_assign (def_stmt)
6661       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6662     goto fail;
6663 
6664   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6665   /* For pointer addition, we should use the normal plus for the vector
6666      operation.  */
6667   switch (code)
6668     {
6669     case POINTER_PLUS_EXPR:
6670       code = PLUS_EXPR;
6671       break;
6672     case MULT_HIGHPART_EXPR:
6673       goto fail;
6674     default:
6675       break;
6676     }
6677   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6678     goto fail;
6679 
6680   tree rhs1 = gimple_assign_rhs1 (def_stmt);
6681   tree rhs2 = gimple_assign_rhs2 (def_stmt);
6682   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6683     goto fail;
6684 
6685   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6686   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6687   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6688       || !gimple_assign_load_p (load1_stmt)
6689       || gimple_bb (load2_stmt) != gimple_bb (stmt)
6690       || !gimple_assign_load_p (load2_stmt))
6691     goto fail;
6692 
6693   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6694   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6695   if (load1_stmt_info == NULL
6696       || load2_stmt_info == NULL
6697       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6698 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6699       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6700 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6701     goto fail;
6702 
6703   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6704     {
6705       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6706       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6707 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6708 	goto fail;
6709       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6710       tree lrhs;
6711       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6712 	lrhs = rhs1;
6713       else
6714 	lrhs = rhs2;
6715       use_operand_p use_p;
6716       imm_use_iterator iter;
6717       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6718 	{
6719 	  gimple *use_stmt = USE_STMT (use_p);
6720 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6721 	    continue;
6722 	  if (other_store_stmt)
6723 	    goto fail;
6724 	  other_store_stmt = use_stmt;
6725 	}
6726     }
6727 
6728   if (other_store_stmt == NULL)
6729     goto fail;
6730   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6731       || !gimple_store_p (other_store_stmt))
6732     goto fail;
6733 
6734   stmt_vec_info other_store_stmt_info
6735     = loop_vinfo->lookup_stmt (other_store_stmt);
6736   if (other_store_stmt_info == NULL
6737       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6738 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6739     goto fail;
6740 
6741   gimple *stmt1 = stmt;
6742   gimple *stmt2 = other_store_stmt;
6743   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6744     std::swap (stmt1, stmt2);
6745   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6746 			    gimple_assign_rhs1 (load2_stmt)))
6747     {
6748       std::swap (rhs1, rhs2);
6749       std::swap (load1_stmt, load2_stmt);
6750       std::swap (load1_stmt_info, load2_stmt_info);
6751     }
6752   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6753 			     gimple_assign_rhs1 (load1_stmt)))
6754     goto fail;
6755 
6756   tree var3 = NULL_TREE;
6757   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6758       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6759 				gimple_assign_rhs1 (load2_stmt)))
6760     goto fail;
6761   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6762     {
6763       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6764       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6765 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6766 	goto fail;
6767       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6768       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6769 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6770 	  || lookup_attribute ("omp simd inscan exclusive",
6771 			       DECL_ATTRIBUTES (var3)))
6772 	goto fail;
6773     }
6774 
6775   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6776   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6777       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6778     goto fail;
6779 
6780   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6781   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6782   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6783       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6784       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6785 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6786     goto fail;
6787 
6788   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6789     std::swap (var1, var2);
6790 
6791   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6792     {
6793       if (!lookup_attribute ("omp simd inscan exclusive",
6794 			     DECL_ATTRIBUTES (var1)))
6795 	goto fail;
6796       var1 = var3;
6797     }
6798 
6799   if (loop_vinfo->scan_map == NULL)
6800     goto fail;
6801   tree *init = loop_vinfo->scan_map->get (var1);
6802   if (init == NULL)
6803     goto fail;
6804 
6805   /* The IL is as expected, now check if we can actually vectorize it.
6806      Inclusive scan:
6807        _26 = D.2043[_25];
6808        _27 = D.2042[_25];
6809        _28 = _26 + _27;
6810        D.2043[_25] = _28;
6811        D.2042[_25] = _28;
6812      should be vectorized as (where _40 is the vectorized rhs
6813      from the D.2042[_21] = 0; store):
6814        _30 = MEM <vector(8) int> [(int *)&D.2043];
6815        _31 = MEM <vector(8) int> [(int *)&D.2042];
6816        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6817        _33 = _31 + _32;
6818        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6819        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6820        _35 = _33 + _34;
6821        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6822        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
6823        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6824        _37 = _35 + _36;
6825        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6826        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
6827        _38 = _30 + _37;
6828        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6829        MEM <vector(8) int> [(int *)&D.2043] = _39;
6830        MEM <vector(8) int> [(int *)&D.2042] = _38;
6831      Exclusive scan:
6832        _26 = D.2043[_25];
6833        D.2044[_25] = _26;
6834        _27 = D.2042[_25];
6835        _28 = _26 + _27;
6836        D.2043[_25] = _28;
6837      should be vectorized as (where _40 is the vectorized rhs
6838      from the D.2042[_21] = 0; store):
6839        _30 = MEM <vector(8) int> [(int *)&D.2043];
6840        _31 = MEM <vector(8) int> [(int *)&D.2042];
6841        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6842        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6843        _34 = _32 + _33;
6844        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6845        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
6846        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6847        _36 = _34 + _35;
6848        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6849        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
6850        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6851        _38 = _36 + _37;
6852        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6853        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
6854        _39 = _30 + _38;
6855        _50 = _31 + _39;
6856        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6857        MEM <vector(8) int> [(int *)&D.2044] = _39;
6858        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
6859   enum machine_mode vec_mode = TYPE_MODE (vectype);
6860   optab optab = optab_for_tree_code (code, vectype, optab_default);
6861   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6862     goto fail;
6863 
6864   int units_log2 = scan_store_can_perm_p (vectype, *init);
6865   if (units_log2 == -1)
6866     goto fail;
6867 
6868   return true;
6869 }
6870 
6871 
6872 /* Function vectorizable_scan_store.
6873 
6874    Helper of vectorizable_score, arguments like on vectorizable_store.
6875    Handle only the transformation, checking is done in check_scan_store.  */
6876 
6877 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)6878 vectorizable_scan_store (vec_info *vinfo,
6879 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6880 			 gimple **vec_stmt, int ncopies)
6881 {
6882   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6883   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6884   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6885   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6886 
6887   if (dump_enabled_p ())
6888     dump_printf_loc (MSG_NOTE, vect_location,
6889 		     "transform scan store. ncopies = %d\n", ncopies);
6890 
6891   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6892   tree rhs = gimple_assign_rhs1 (stmt);
6893   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6894 
6895   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6896   bool inscan_var_store
6897     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6898 
6899   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6900     {
6901       use_operand_p use_p;
6902       imm_use_iterator iter;
6903       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6904 	{
6905 	  gimple *use_stmt = USE_STMT (use_p);
6906 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6907 	    continue;
6908 	  rhs = gimple_assign_lhs (use_stmt);
6909 	  break;
6910 	}
6911     }
6912 
6913   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6914   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6915   if (code == POINTER_PLUS_EXPR)
6916     code = PLUS_EXPR;
6917   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6918 	      && commutative_tree_code (code));
6919   tree rhs1 = gimple_assign_rhs1 (def_stmt);
6920   tree rhs2 = gimple_assign_rhs2 (def_stmt);
6921   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6922   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6923   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6924   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6925   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6926   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6927   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6928   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6929   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6930 
6931   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6932     {
6933       std::swap (rhs1, rhs2);
6934       std::swap (var1, var2);
6935       std::swap (load1_dr_info, load2_dr_info);
6936     }
6937 
6938   tree *init = loop_vinfo->scan_map->get (var1);
6939   gcc_assert (init);
6940 
6941   unsigned HOST_WIDE_INT nunits;
6942   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6943     gcc_unreachable ();
6944   auto_vec<enum scan_store_kind, 16> use_whole_vector;
6945   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6946   gcc_assert (units_log2 > 0);
6947   auto_vec<tree, 16> perms;
6948   perms.quick_grow (units_log2 + 1);
6949   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6950   for (int i = 0; i <= units_log2; ++i)
6951     {
6952       unsigned HOST_WIDE_INT j, k;
6953       vec_perm_builder sel (nunits, nunits, 1);
6954       sel.quick_grow (nunits);
6955       if (i == units_log2)
6956 	for (j = 0; j < nunits; ++j)
6957 	  sel[j] = nunits - 1;
6958       else
6959 	{
6960 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6961 	    sel[j] = j;
6962 	  for (k = 0; j < nunits; ++j, ++k)
6963 	    sel[j] = nunits + k;
6964 	}
6965       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6966       if (!use_whole_vector.is_empty ()
6967 	  && use_whole_vector[i] != scan_store_kind_perm)
6968 	{
6969 	  if (zero_vec == NULL_TREE)
6970 	    zero_vec = build_zero_cst (vectype);
6971 	  if (masktype == NULL_TREE
6972 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
6973 	    masktype = truth_type_for (vectype);
6974 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
6975 	}
6976       else
6977 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6978     }
6979 
6980   tree vec_oprnd1 = NULL_TREE;
6981   tree vec_oprnd2 = NULL_TREE;
6982   tree vec_oprnd3 = NULL_TREE;
6983   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6984   tree dataref_offset = build_int_cst (ref_type, 0);
6985   tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6986 					   vectype, VMAT_CONTIGUOUS);
6987   tree ldataref_ptr = NULL_TREE;
6988   tree orig = NULL_TREE;
6989   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6990     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6991   auto_vec<tree> vec_oprnds1;
6992   auto_vec<tree> vec_oprnds2;
6993   auto_vec<tree> vec_oprnds3;
6994   vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6995 		     *init, &vec_oprnds1,
6996 		     ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6997 		     rhs2, &vec_oprnds3);
6998   for (int j = 0; j < ncopies; j++)
6999     {
7000       vec_oprnd1 = vec_oprnds1[j];
7001       if (ldataref_ptr == NULL)
7002 	vec_oprnd2 = vec_oprnds2[j];
7003       vec_oprnd3 = vec_oprnds3[j];
7004       if (j == 0)
7005 	orig = vec_oprnd3;
7006       else if (!inscan_var_store)
7007 	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7008 
7009       if (ldataref_ptr)
7010 	{
7011 	  vec_oprnd2 = make_ssa_name (vectype);
7012 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7013 				       unshare_expr (ldataref_ptr),
7014 				       dataref_offset);
7015 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7016 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7017 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7018 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7019 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7020 	}
7021 
7022       tree v = vec_oprnd2;
7023       for (int i = 0; i < units_log2; ++i)
7024 	{
7025 	  tree new_temp = make_ssa_name (vectype);
7026 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7027 					   (zero_vec
7028 					    && (use_whole_vector[i]
7029 						!= scan_store_kind_perm))
7030 					   ? zero_vec : vec_oprnd1, v,
7031 					   perms[i]);
7032 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7033 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7034 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7035 
7036 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7037 	    {
7038 	      /* Whole vector shift shifted in zero bits, but if *init
7039 		 is not initializer_zerop, we need to replace those elements
7040 		 with elements from vec_oprnd1.  */
7041 	      tree_vector_builder vb (masktype, nunits, 1);
7042 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7043 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7044 			       ? boolean_false_node : boolean_true_node);
7045 
7046 	      tree new_temp2 = make_ssa_name (vectype);
7047 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7048 				       new_temp, vec_oprnd1);
7049 	      vect_finish_stmt_generation (vinfo, stmt_info,
7050 							   g, gsi);
7051 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7052 	      new_temp = new_temp2;
7053 	    }
7054 
7055 	  /* For exclusive scan, perform the perms[i] permutation once
7056 	     more.  */
7057 	  if (i == 0
7058 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7059 	      && v == vec_oprnd2)
7060 	    {
7061 	      v = new_temp;
7062 	      --i;
7063 	      continue;
7064 	    }
7065 
7066 	  tree new_temp2 = make_ssa_name (vectype);
7067 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
7068 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7069 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7070 
7071 	  v = new_temp2;
7072 	}
7073 
7074       tree new_temp = make_ssa_name (vectype);
7075       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7076       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7077       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7078 
7079       tree last_perm_arg = new_temp;
7080       /* For exclusive scan, new_temp computed above is the exclusive scan
7081 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
7082 	 of the last element into orig.  */
7083       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7084 	{
7085 	  last_perm_arg = make_ssa_name (vectype);
7086 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7087 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7088 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7089 	}
7090 
7091       orig = make_ssa_name (vectype);
7092       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7093 			       last_perm_arg, perms[units_log2]);
7094       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7095       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7096 
7097       if (!inscan_var_store)
7098 	{
7099 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7100 				       unshare_expr (dataref_ptr),
7101 				       dataref_offset);
7102 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7103 	  g = gimple_build_assign (data_ref, new_temp);
7104 	  vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7105 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7106 	}
7107     }
7108 
7109   if (inscan_var_store)
7110     for (int j = 0; j < ncopies; j++)
7111       {
7112 	if (j != 0)
7113 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7114 
7115 	tree data_ref = fold_build2 (MEM_REF, vectype,
7116 				     unshare_expr (dataref_ptr),
7117 				     dataref_offset);
7118 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7119 	gimple *g = gimple_build_assign (data_ref, orig);
7120 	vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7121 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7122       }
7123   return true;
7124 }
7125 
7126 
7127 /* Function vectorizable_store.
7128 
7129    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7130    that can be vectorized.
7131    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7132    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7133    Return true if STMT_INFO is vectorizable in this way.  */
7134 
7135 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7136 vectorizable_store (vec_info *vinfo,
7137 		    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7138 		    gimple **vec_stmt, slp_tree slp_node,
7139 		    stmt_vector_for_cost *cost_vec)
7140 {
7141   tree data_ref;
7142   tree op;
7143   tree vec_oprnd = NULL_TREE;
7144   tree elem_type;
7145   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7146   class loop *loop = NULL;
7147   machine_mode vec_mode;
7148   tree dummy;
7149   enum vect_def_type rhs_dt = vect_unknown_def_type;
7150   enum vect_def_type mask_dt = vect_unknown_def_type;
7151   tree dataref_ptr = NULL_TREE;
7152   tree dataref_offset = NULL_TREE;
7153   gimple *ptr_incr = NULL;
7154   int ncopies;
7155   int j;
7156   stmt_vec_info first_stmt_info;
7157   bool grouped_store;
7158   unsigned int group_size, i;
7159   vec<tree> oprnds = vNULL;
7160   vec<tree> result_chain = vNULL;
7161   tree offset = NULL_TREE;
7162   vec<tree> vec_oprnds = vNULL;
7163   bool slp = (slp_node != NULL);
7164   unsigned int vec_num;
7165   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7166   tree aggr_type;
7167   gather_scatter_info gs_info;
7168   poly_uint64 vf;
7169   vec_load_store_type vls_type;
7170   tree ref_type;
7171 
7172   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7173     return false;
7174 
7175   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7176       && ! vec_stmt)
7177     return false;
7178 
7179   /* Is vectorizable store? */
7180 
7181   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7182   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7183     {
7184       tree scalar_dest = gimple_assign_lhs (assign);
7185       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7186 	  && is_pattern_stmt_p (stmt_info))
7187 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
7188       if (TREE_CODE (scalar_dest) != ARRAY_REF
7189 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7190 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
7191 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
7192 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7193 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
7194 	  && TREE_CODE (scalar_dest) != MEM_REF)
7195 	return false;
7196     }
7197   else
7198     {
7199       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7200       if (!call || !gimple_call_internal_p (call))
7201 	return false;
7202 
7203       internal_fn ifn = gimple_call_internal_fn (call);
7204       if (!internal_store_fn_p (ifn))
7205 	return false;
7206 
7207       if (slp_node != NULL)
7208 	{
7209 	  if (dump_enabled_p ())
7210 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7211 			     "SLP of masked stores not supported.\n");
7212 	  return false;
7213 	}
7214 
7215       int mask_index = internal_fn_mask_index (ifn);
7216       if (mask_index >= 0)
7217 	{
7218 	  mask = gimple_call_arg (call, mask_index);
7219 	  if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7220 				       &mask_vectype))
7221 	    return false;
7222 	}
7223     }
7224 
7225   op = vect_get_store_rhs (stmt_info);
7226 
7227   /* Cannot have hybrid store SLP -- that would mean storing to the
7228      same location twice.  */
7229   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7230 
7231   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7232   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7233 
7234   if (loop_vinfo)
7235     {
7236       loop = LOOP_VINFO_LOOP (loop_vinfo);
7237       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7238     }
7239   else
7240     vf = 1;
7241 
7242   /* Multiple types in SLP are handled by creating the appropriate number of
7243      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7244      case of SLP.  */
7245   if (slp)
7246     ncopies = 1;
7247   else
7248     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7249 
7250   gcc_assert (ncopies >= 1);
7251 
7252   /* FORNOW.  This restriction should be relaxed.  */
7253   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7254     {
7255       if (dump_enabled_p ())
7256 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7257 			 "multiple types in nested loop.\n");
7258       return false;
7259     }
7260 
7261   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7262 			     op, &rhs_dt, &rhs_vectype, &vls_type))
7263     return false;
7264 
7265   elem_type = TREE_TYPE (vectype);
7266   vec_mode = TYPE_MODE (vectype);
7267 
7268   if (!STMT_VINFO_DATA_REF (stmt_info))
7269     return false;
7270 
7271   vect_memory_access_type memory_access_type;
7272   enum dr_alignment_support alignment_support_scheme;
7273   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7274 			    ncopies, &memory_access_type,
7275 			    &alignment_support_scheme, &gs_info))
7276     return false;
7277 
7278   if (mask)
7279     {
7280       if (memory_access_type == VMAT_CONTIGUOUS)
7281 	{
7282 	  if (!VECTOR_MODE_P (vec_mode)
7283 	      || !can_vec_mask_load_store_p (vec_mode,
7284 					     TYPE_MODE (mask_vectype), false))
7285 	    return false;
7286 	}
7287       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7288 	       && (memory_access_type != VMAT_GATHER_SCATTER
7289 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7290 	{
7291 	  if (dump_enabled_p ())
7292 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7293 			     "unsupported access type for masked store.\n");
7294 	  return false;
7295 	}
7296     }
7297   else
7298     {
7299       /* FORNOW. In some cases can vectorize even if data-type not supported
7300 	 (e.g. - array initialization with 0).  */
7301       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7302 	return false;
7303     }
7304 
7305   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7306   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7307 		   && memory_access_type != VMAT_GATHER_SCATTER
7308 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
7309   if (grouped_store)
7310     {
7311       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7312       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7313       group_size = DR_GROUP_SIZE (first_stmt_info);
7314     }
7315   else
7316     {
7317       first_stmt_info = stmt_info;
7318       first_dr_info = dr_info;
7319       group_size = vec_num = 1;
7320     }
7321 
7322   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7323     {
7324       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7325 			     memory_access_type))
7326 	return false;
7327     }
7328 
7329   if (!vec_stmt) /* transformation not required.  */
7330     {
7331       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7332 
7333       if (loop_vinfo
7334 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7335 	check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7336 					      group_size, memory_access_type,
7337 					      &gs_info, mask);
7338 
7339       if (slp_node
7340 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7341 						vectype))
7342 	{
7343 	  if (dump_enabled_p ())
7344 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7345 			     "incompatible vector types for invariants\n");
7346 	  return false;
7347 	}
7348 
7349       if (dump_enabled_p ()
7350 	  && memory_access_type != VMAT_ELEMENTWISE
7351 	  && memory_access_type != VMAT_GATHER_SCATTER
7352 	  && alignment_support_scheme != dr_aligned)
7353 	dump_printf_loc (MSG_NOTE, vect_location,
7354 			 "Vectorizing an unaligned access.\n");
7355 
7356       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7357       vect_model_store_cost (vinfo, stmt_info, ncopies,
7358 			     memory_access_type, vls_type, slp_node, cost_vec);
7359       return true;
7360     }
7361   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7362 
7363   /* Transform.  */
7364 
7365   ensure_base_align (dr_info);
7366 
7367   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7368     {
7369       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7370       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7371       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7372       tree ptr, var, scale, vec_mask;
7373       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7374       tree mask_halfvectype = mask_vectype;
7375       edge pe = loop_preheader_edge (loop);
7376       gimple_seq seq;
7377       basic_block new_bb;
7378       enum { NARROW, NONE, WIDEN } modifier;
7379       poly_uint64 scatter_off_nunits
7380 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7381 
7382       if (known_eq (nunits, scatter_off_nunits))
7383 	modifier = NONE;
7384       else if (known_eq (nunits * 2, scatter_off_nunits))
7385 	{
7386 	  modifier = WIDEN;
7387 
7388 	  /* Currently gathers and scatters are only supported for
7389 	     fixed-length vectors.  */
7390 	  unsigned int count = scatter_off_nunits.to_constant ();
7391 	  vec_perm_builder sel (count, count, 1);
7392 	  for (i = 0; i < (unsigned int) count; ++i)
7393 	    sel.quick_push (i | (count / 2));
7394 
7395 	  vec_perm_indices indices (sel, 1, count);
7396 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7397 						  indices);
7398 	  gcc_assert (perm_mask != NULL_TREE);
7399 	}
7400       else if (known_eq (nunits, scatter_off_nunits * 2))
7401 	{
7402 	  modifier = NARROW;
7403 
7404 	  /* Currently gathers and scatters are only supported for
7405 	     fixed-length vectors.  */
7406 	  unsigned int count = nunits.to_constant ();
7407 	  vec_perm_builder sel (count, count, 1);
7408 	  for (i = 0; i < (unsigned int) count; ++i)
7409 	    sel.quick_push (i | (count / 2));
7410 
7411 	  vec_perm_indices indices (sel, 2, count);
7412 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7413 	  gcc_assert (perm_mask != NULL_TREE);
7414 	  ncopies *= 2;
7415 
7416 	  if (mask)
7417 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7418 	}
7419       else
7420 	gcc_unreachable ();
7421 
7422       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7423       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7424       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7425       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7426       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7427       scaletype = TREE_VALUE (arglist);
7428 
7429       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7430 			   && TREE_CODE (rettype) == VOID_TYPE);
7431 
7432       ptr = fold_convert (ptrtype, gs_info.base);
7433       if (!is_gimple_min_invariant (ptr))
7434 	{
7435 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7436 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7437 	  gcc_assert (!new_bb);
7438 	}
7439 
7440       if (mask == NULL_TREE)
7441 	{
7442 	  mask_arg = build_int_cst (masktype, -1);
7443 	  mask_arg = vect_init_vector (vinfo, stmt_info,
7444 				       mask_arg, masktype, NULL);
7445 	}
7446 
7447       scale = build_int_cst (scaletype, gs_info.scale);
7448 
7449       auto_vec<tree> vec_oprnds0;
7450       auto_vec<tree> vec_oprnds1;
7451       auto_vec<tree> vec_masks;
7452       if (mask)
7453 	{
7454 	  tree mask_vectype = truth_type_for (vectype);
7455 	  vect_get_vec_defs_for_operand (vinfo, stmt_info,
7456 					 modifier == NARROW
7457 					 ? ncopies / 2 : ncopies,
7458 					 mask, &vec_masks, mask_vectype);
7459 	}
7460       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7461 				     modifier == WIDEN
7462 				     ? ncopies / 2 : ncopies,
7463 				     gs_info.offset, &vec_oprnds0);
7464       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7465 				     modifier == NARROW
7466 				     ? ncopies / 2 : ncopies,
7467 				     op, &vec_oprnds1);
7468       for (j = 0; j < ncopies; ++j)
7469 	{
7470 	  if (modifier == WIDEN)
7471 	    {
7472 	      if (j & 1)
7473 		op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7474 					   perm_mask, stmt_info, gsi);
7475 	      else
7476 		op = vec_oprnd0 = vec_oprnds0[j / 2];
7477 	      src = vec_oprnd1 = vec_oprnds1[j];
7478 	      if (mask)
7479 		mask_op = vec_mask = vec_masks[j];
7480 	    }
7481 	  else if (modifier == NARROW)
7482 	    {
7483 	      if (j & 1)
7484 		src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7485 					    perm_mask, stmt_info, gsi);
7486 	      else
7487 		src = vec_oprnd1 = vec_oprnds1[j / 2];
7488 	      op = vec_oprnd0 = vec_oprnds0[j];
7489 	      if (mask)
7490 		mask_op = vec_mask = vec_masks[j / 2];
7491 	    }
7492 	  else
7493 	    {
7494 	      op = vec_oprnd0 = vec_oprnds0[j];
7495 	      src = vec_oprnd1 = vec_oprnds1[j];
7496 	      if (mask)
7497 		mask_op = vec_mask = vec_masks[j];
7498 	    }
7499 
7500 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7501 	    {
7502 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7503 				    TYPE_VECTOR_SUBPARTS (srctype)));
7504 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
7505 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7506 	      gassign *new_stmt
7507 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7508 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7509 	      src = var;
7510 	    }
7511 
7512 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7513 	    {
7514 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7515 				    TYPE_VECTOR_SUBPARTS (idxtype)));
7516 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7517 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7518 	      gassign *new_stmt
7519 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7520 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7521 	      op = var;
7522 	    }
7523 
7524 	  if (mask)
7525 	    {
7526 	      tree utype;
7527 	      mask_arg = mask_op;
7528 	      if (modifier == NARROW)
7529 		{
7530 		  var = vect_get_new_ssa_name (mask_halfvectype,
7531 					       vect_simple_var);
7532 		  gassign *new_stmt
7533 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7534 							: VEC_UNPACK_LO_EXPR,
7535 					   mask_op);
7536 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7537 		  mask_arg = var;
7538 		}
7539 	      tree optype = TREE_TYPE (mask_arg);
7540 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7541 		utype = masktype;
7542 	      else
7543 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7544 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
7545 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7546 	      gassign *new_stmt
7547 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7548 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7549 	      mask_arg = var;
7550 	      if (!useless_type_conversion_p (masktype, utype))
7551 		{
7552 		  gcc_assert (TYPE_PRECISION (utype)
7553 			      <= TYPE_PRECISION (masktype));
7554 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7555 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7556 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7557 		  mask_arg = var;
7558 		}
7559 	    }
7560 
7561 	  gcall *new_stmt
7562 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7563 	   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7564 
7565 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7566 	}
7567       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7568       return true;
7569     }
7570   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7571     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7572 
7573   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7574     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7575 
7576   if (grouped_store)
7577     {
7578       /* FORNOW */
7579       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7580 
7581       /* We vectorize all the stmts of the interleaving group when we
7582 	 reach the last stmt in the group.  */
7583       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7584 	  < DR_GROUP_SIZE (first_stmt_info)
7585 	  && !slp)
7586 	{
7587 	  *vec_stmt = NULL;
7588 	  return true;
7589 	}
7590 
7591       if (slp)
7592         {
7593           grouped_store = false;
7594           /* VEC_NUM is the number of vect stmts to be created for this
7595              group.  */
7596           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7597 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7598 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7599 		      == first_stmt_info);
7600 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7601 	  op = vect_get_store_rhs (first_stmt_info);
7602         }
7603       else
7604         /* VEC_NUM is the number of vect stmts to be created for this
7605            group.  */
7606 	vec_num = group_size;
7607 
7608       ref_type = get_group_alias_ptr_type (first_stmt_info);
7609     }
7610   else
7611     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7612 
7613   if (dump_enabled_p ())
7614     dump_printf_loc (MSG_NOTE, vect_location,
7615                      "transform store. ncopies = %d\n", ncopies);
7616 
7617   if (memory_access_type == VMAT_ELEMENTWISE
7618       || memory_access_type == VMAT_STRIDED_SLP)
7619     {
7620       gimple_stmt_iterator incr_gsi;
7621       bool insert_after;
7622       gimple *incr;
7623       tree offvar;
7624       tree ivstep;
7625       tree running_off;
7626       tree stride_base, stride_step, alias_off;
7627       tree vec_oprnd;
7628       tree dr_offset;
7629       unsigned int g;
7630       /* Checked by get_load_store_type.  */
7631       unsigned int const_nunits = nunits.to_constant ();
7632 
7633       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7634       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7635 
7636       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7637       stride_base
7638 	= fold_build_pointer_plus
7639 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7640 	     size_binop (PLUS_EXPR,
7641 			 convert_to_ptrofftype (dr_offset),
7642 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7643       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7644 
7645       /* For a store with loop-invariant (but other than power-of-2)
7646          stride (i.e. not a grouped access) like so:
7647 
7648 	   for (i = 0; i < n; i += stride)
7649 	     array[i] = ...;
7650 
7651 	 we generate a new induction variable and new stores from
7652 	 the components of the (vectorized) rhs:
7653 
7654 	   for (j = 0; ; j += VF*stride)
7655 	     vectemp = ...;
7656 	     tmp1 = vectemp[0];
7657 	     array[j] = tmp1;
7658 	     tmp2 = vectemp[1];
7659 	     array[j + stride] = tmp2;
7660 	     ...
7661          */
7662 
7663       unsigned nstores = const_nunits;
7664       unsigned lnel = 1;
7665       tree ltype = elem_type;
7666       tree lvectype = vectype;
7667       if (slp)
7668 	{
7669 	  if (group_size < const_nunits
7670 	      && const_nunits % group_size == 0)
7671 	    {
7672 	      nstores = const_nunits / group_size;
7673 	      lnel = group_size;
7674 	      ltype = build_vector_type (elem_type, group_size);
7675 	      lvectype = vectype;
7676 
7677 	      /* First check if vec_extract optab doesn't support extraction
7678 		 of vector elts directly.  */
7679 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7680 	      machine_mode vmode;
7681 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7682 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
7683 					   group_size).exists (&vmode)
7684 		  || (convert_optab_handler (vec_extract_optab,
7685 					     TYPE_MODE (vectype), vmode)
7686 		      == CODE_FOR_nothing))
7687 		{
7688 		  /* Try to avoid emitting an extract of vector elements
7689 		     by performing the extracts using an integer type of the
7690 		     same size, extracting from a vector of those and then
7691 		     re-interpreting it as the original vector type if
7692 		     supported.  */
7693 		  unsigned lsize
7694 		    = group_size * GET_MODE_BITSIZE (elmode);
7695 		  unsigned int lnunits = const_nunits / group_size;
7696 		  /* If we can't construct such a vector fall back to
7697 		     element extracts from the original vector type and
7698 		     element size stores.  */
7699 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7700 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
7701 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
7702 					      lnunits).exists (&vmode)
7703 		      && (convert_optab_handler (vec_extract_optab,
7704 						 vmode, elmode)
7705 			  != CODE_FOR_nothing))
7706 		    {
7707 		      nstores = lnunits;
7708 		      lnel = group_size;
7709 		      ltype = build_nonstandard_integer_type (lsize, 1);
7710 		      lvectype = build_vector_type (ltype, nstores);
7711 		    }
7712 		  /* Else fall back to vector extraction anyway.
7713 		     Fewer stores are more important than avoiding spilling
7714 		     of the vector we extract from.  Compared to the
7715 		     construction case in vectorizable_load no store-forwarding
7716 		     issue exists here for reasonable archs.  */
7717 		}
7718 	    }
7719 	  else if (group_size >= const_nunits
7720 		   && group_size % const_nunits == 0)
7721 	    {
7722 	      nstores = 1;
7723 	      lnel = const_nunits;
7724 	      ltype = vectype;
7725 	      lvectype = vectype;
7726 	    }
7727 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7728 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7729 	}
7730 
7731       ivstep = stride_step;
7732       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7733 			    build_int_cst (TREE_TYPE (ivstep), vf));
7734 
7735       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7736 
7737       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7738       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7739       create_iv (stride_base, ivstep, NULL,
7740 		 loop, &incr_gsi, insert_after,
7741 		 &offvar, NULL);
7742       incr = gsi_stmt (incr_gsi);
7743 
7744       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7745 
7746       alias_off = build_int_cst (ref_type, 0);
7747       stmt_vec_info next_stmt_info = first_stmt_info;
7748       for (g = 0; g < group_size; g++)
7749 	{
7750 	  running_off = offvar;
7751 	  if (g)
7752 	    {
7753 	      tree size = TYPE_SIZE_UNIT (ltype);
7754 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7755 				      size);
7756 	      tree newoff = copy_ssa_name (running_off, NULL);
7757 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7758 					  running_off, pos);
7759 	      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7760 	      running_off = newoff;
7761 	    }
7762 	  if (!slp)
7763 	    op = vect_get_store_rhs (next_stmt_info);
7764 	  vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7765 			     op, &vec_oprnds);
7766 	  unsigned int group_el = 0;
7767 	  unsigned HOST_WIDE_INT
7768 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7769 	  for (j = 0; j < ncopies; j++)
7770 	    {
7771 	      vec_oprnd = vec_oprnds[j];
7772 	      /* Pun the vector to extract from if necessary.  */
7773 	      if (lvectype != vectype)
7774 		{
7775 		  tree tem = make_ssa_name (lvectype);
7776 		  gimple *pun
7777 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7778 							lvectype, vec_oprnd));
7779 		  vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7780 		  vec_oprnd = tem;
7781 		}
7782 	      for (i = 0; i < nstores; i++)
7783 		{
7784 		  tree newref, newoff;
7785 		  gimple *incr, *assign;
7786 		  tree size = TYPE_SIZE (ltype);
7787 		  /* Extract the i'th component.  */
7788 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7789 					  bitsize_int (i), size);
7790 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7791 					   size, pos);
7792 
7793 		  elem = force_gimple_operand_gsi (gsi, elem, true,
7794 						   NULL_TREE, true,
7795 						   GSI_SAME_STMT);
7796 
7797 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
7798 						 group_el * elsz);
7799 		  newref = build2 (MEM_REF, ltype,
7800 				   running_off, this_off);
7801 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7802 
7803 		  /* And store it to *running_off.  */
7804 		  assign = gimple_build_assign (newref, elem);
7805 		  vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7806 
7807 		  group_el += lnel;
7808 		  if (! slp
7809 		      || group_el == group_size)
7810 		    {
7811 		      newoff = copy_ssa_name (running_off, NULL);
7812 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7813 						  running_off, stride_step);
7814 		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7815 
7816 		      running_off = newoff;
7817 		      group_el = 0;
7818 		    }
7819 		  if (g == group_size - 1
7820 		      && !slp)
7821 		    {
7822 		      if (j == 0 && i == 0)
7823 			*vec_stmt = assign;
7824 		      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7825 		    }
7826 		}
7827 	    }
7828 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7829 	  vec_oprnds.release ();
7830 	  if (slp)
7831 	    break;
7832 	}
7833 
7834       return true;
7835     }
7836 
7837   auto_vec<tree> dr_chain (group_size);
7838   oprnds.create (group_size);
7839 
7840   /* Gather-scatter accesses perform only component accesses, alignment
7841      is irrelevant for them.  */
7842   if (memory_access_type == VMAT_GATHER_SCATTER)
7843     alignment_support_scheme = dr_unaligned_supported;
7844   else
7845     alignment_support_scheme
7846       = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7847 
7848   gcc_assert (alignment_support_scheme);
7849   vec_loop_masks *loop_masks
7850     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7851        ? &LOOP_VINFO_MASKS (loop_vinfo)
7852        : NULL);
7853   vec_loop_lens *loop_lens
7854     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7855        ? &LOOP_VINFO_LENS (loop_vinfo)
7856        : NULL);
7857 
7858   /* Shouldn't go with length-based approach if fully masked.  */
7859   gcc_assert (!loop_lens || !loop_masks);
7860 
7861   /* Targets with store-lane instructions must not require explicit
7862      realignment.  vect_supportable_dr_alignment always returns either
7863      dr_aligned or dr_unaligned_supported for masked operations.  */
7864   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7865 	       && !mask
7866 	       && !loop_masks)
7867 	      || alignment_support_scheme == dr_aligned
7868 	      || alignment_support_scheme == dr_unaligned_supported);
7869 
7870   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7871       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7872     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7873 
7874   tree bump;
7875   tree vec_offset = NULL_TREE;
7876   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7877     {
7878       aggr_type = NULL_TREE;
7879       bump = NULL_TREE;
7880     }
7881   else if (memory_access_type == VMAT_GATHER_SCATTER)
7882     {
7883       aggr_type = elem_type;
7884       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7885 				       &bump, &vec_offset);
7886     }
7887   else
7888     {
7889       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7890 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7891       else
7892 	aggr_type = vectype;
7893       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7894 					  memory_access_type);
7895     }
7896 
7897   if (mask)
7898     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7899 
7900   /* In case the vectorization factor (VF) is bigger than the number
7901      of elements that we can fit in a vectype (nunits), we have to generate
7902      more than one vector stmt - i.e - we need to "unroll" the
7903      vector stmt by a factor VF/nunits.  */
7904 
7905   /* In case of interleaving (non-unit grouped access):
7906 
7907         S1:  &base + 2 = x2
7908         S2:  &base = x0
7909         S3:  &base + 1 = x1
7910         S4:  &base + 3 = x3
7911 
7912      We create vectorized stores starting from base address (the access of the
7913      first stmt in the chain (S2 in the above example), when the last store stmt
7914      of the chain (S4) is reached:
7915 
7916         VS1: &base = vx2
7917 	VS2: &base + vec_size*1 = vx0
7918 	VS3: &base + vec_size*2 = vx1
7919 	VS4: &base + vec_size*3 = vx3
7920 
7921      Then permutation statements are generated:
7922 
7923 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7924 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7925 	...
7926 
7927      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7928      (the order of the data-refs in the output of vect_permute_store_chain
7929      corresponds to the order of scalar stmts in the interleaving chain - see
7930      the documentation of vect_permute_store_chain()).
7931 
7932      In case of both multiple types and interleaving, above vector stores and
7933      permutation stmts are created for every copy.  The result vector stmts are
7934      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7935      STMT_VINFO_RELATED_STMT for the next copies.
7936   */
7937 
7938   auto_vec<tree> vec_masks;
7939   tree vec_mask = NULL;
7940   auto_vec<tree> vec_offsets;
7941   auto_vec<vec<tree> > gvec_oprnds;
7942   gvec_oprnds.safe_grow_cleared (group_size, true);
7943   for (j = 0; j < ncopies; j++)
7944     {
7945       gimple *new_stmt;
7946       if (j == 0)
7947 	{
7948           if (slp)
7949             {
7950 	      /* Get vectorized arguments for SLP_NODE.  */
7951 	      vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7952 				 op, &vec_oprnds);
7953               vec_oprnd = vec_oprnds[0];
7954             }
7955           else
7956             {
7957 	      /* For interleaved stores we collect vectorized defs for all the
7958 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7959 		 used as an input to vect_permute_store_chain().
7960 
7961 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7962 		 and OPRNDS are of size 1.  */
7963 	      stmt_vec_info next_stmt_info = first_stmt_info;
7964 	      for (i = 0; i < group_size; i++)
7965 		{
7966 		  /* Since gaps are not supported for interleaved stores,
7967 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
7968 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
7969 		     that there is no interleaving, DR_GROUP_SIZE is 1,
7970 		     and only one iteration of the loop will be executed.  */
7971 		  op = vect_get_store_rhs (next_stmt_info);
7972 		  vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7973 						 ncopies, op, &gvec_oprnds[i]);
7974 		  vec_oprnd = gvec_oprnds[i][0];
7975 		  dr_chain.quick_push (gvec_oprnds[i][0]);
7976 		  oprnds.quick_push (gvec_oprnds[i][0]);
7977 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7978 		}
7979 	      if (mask)
7980 		{
7981 		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7982 						 mask, &vec_masks, mask_vectype);
7983 		  vec_mask = vec_masks[0];
7984 		}
7985 	    }
7986 
7987 	  /* We should have catched mismatched types earlier.  */
7988 	  gcc_assert (useless_type_conversion_p (vectype,
7989 						 TREE_TYPE (vec_oprnd)));
7990 	  bool simd_lane_access_p
7991 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7992 	  if (simd_lane_access_p
7993 	      && !loop_masks
7994 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7995 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7996 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7997 	      && integer_zerop (DR_INIT (first_dr_info->dr))
7998 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
7999 					get_alias_set (TREE_TYPE (ref_type))))
8000 	    {
8001 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8002 	      dataref_offset = build_int_cst (ref_type, 0);
8003 	    }
8004 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8005 	    {
8006 	      vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8007 					   &dataref_ptr, &vec_offsets, ncopies);
8008 	      vec_offset = vec_offsets[0];
8009 	    }
8010 	  else
8011 	    dataref_ptr
8012 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8013 					  simd_lane_access_p ? loop : NULL,
8014 					  offset, &dummy, gsi, &ptr_incr,
8015 					  simd_lane_access_p, NULL_TREE, bump);
8016 	}
8017       else
8018 	{
8019 	  /* For interleaved stores we created vectorized defs for all the
8020 	     defs stored in OPRNDS in the previous iteration (previous copy).
8021 	     DR_CHAIN is then used as an input to vect_permute_store_chain().
8022 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8023 	     OPRNDS are of size 1.  */
8024 	  for (i = 0; i < group_size; i++)
8025 	    {
8026 	      vec_oprnd = gvec_oprnds[i][j];
8027 	      dr_chain[i] = gvec_oprnds[i][j];
8028 	      oprnds[i] = gvec_oprnds[i][j];
8029 	    }
8030 	  if (mask)
8031 	    vec_mask = vec_masks[j];
8032 	  if (dataref_offset)
8033 	    dataref_offset
8034 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8035 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8036 	    vec_offset = vec_offsets[j];
8037 	  else
8038 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8039 					   stmt_info, bump);
8040 	}
8041 
8042       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8043 	{
8044 	  tree vec_array;
8045 
8046 	  /* Get an array into which we can store the individual vectors.  */
8047 	  vec_array = create_vector_array (vectype, vec_num);
8048 
8049 	  /* Invalidate the current contents of VEC_ARRAY.  This should
8050 	     become an RTL clobber too, which prevents the vector registers
8051 	     from being upward-exposed.  */
8052 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8053 
8054 	  /* Store the individual vectors into the array.  */
8055 	  for (i = 0; i < vec_num; i++)
8056 	    {
8057 	      vec_oprnd = dr_chain[i];
8058 	      write_vector_array (vinfo, stmt_info,
8059 				  gsi, vec_oprnd, vec_array, i);
8060 	    }
8061 
8062 	  tree final_mask = NULL;
8063 	  if (loop_masks)
8064 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8065 					     vectype, j);
8066 	  if (vec_mask)
8067 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8068 						  vec_mask, gsi);
8069 
8070 	  gcall *call;
8071 	  if (final_mask)
8072 	    {
8073 	      /* Emit:
8074 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8075 				     VEC_ARRAY).  */
8076 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8077 	      tree alias_ptr = build_int_cst (ref_type, align);
8078 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8079 						 dataref_ptr, alias_ptr,
8080 						 final_mask, vec_array);
8081 	    }
8082 	  else
8083 	    {
8084 	      /* Emit:
8085 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8086 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8087 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8088 						 vec_array);
8089 	      gimple_call_set_lhs (call, data_ref);
8090 	    }
8091 	  gimple_call_set_nothrow (call, true);
8092 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8093 	  new_stmt = call;
8094 
8095 	  /* Record that VEC_ARRAY is now dead.  */
8096 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8097 	}
8098       else
8099 	{
8100 	  new_stmt = NULL;
8101 	  if (grouped_store)
8102 	    {
8103 	      if (j == 0)
8104 		result_chain.create (group_size);
8105 	      /* Permute.  */
8106 	      vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8107 					gsi, &result_chain);
8108 	    }
8109 
8110 	  stmt_vec_info next_stmt_info = first_stmt_info;
8111 	  for (i = 0; i < vec_num; i++)
8112 	    {
8113 	      unsigned misalign;
8114 	      unsigned HOST_WIDE_INT align;
8115 
8116 	      tree final_mask = NULL_TREE;
8117 	      if (loop_masks)
8118 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8119 						 vec_num * ncopies,
8120 						 vectype, vec_num * j + i);
8121 	      if (vec_mask)
8122 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8123 						      vec_mask, gsi);
8124 
8125 	      if (memory_access_type == VMAT_GATHER_SCATTER)
8126 		{
8127 		  tree scale = size_int (gs_info.scale);
8128 		  gcall *call;
8129 		  if (final_mask)
8130 		    call = gimple_build_call_internal
8131 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8132 		       scale, vec_oprnd, final_mask);
8133 		  else
8134 		    call = gimple_build_call_internal
8135 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8136 		       scale, vec_oprnd);
8137 		  gimple_call_set_nothrow (call, true);
8138 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8139 		  new_stmt = call;
8140 		  break;
8141 		}
8142 
8143 	      if (i > 0)
8144 		/* Bump the vector pointer.  */
8145 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8146 					       gsi, stmt_info, bump);
8147 
8148 	      if (slp)
8149 		vec_oprnd = vec_oprnds[i];
8150 	      else if (grouped_store)
8151 		/* For grouped stores vectorized defs are interleaved in
8152 		   vect_permute_store_chain().  */
8153 		vec_oprnd = result_chain[i];
8154 
8155 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8156 	      if (aligned_access_p (first_dr_info))
8157 		misalign = 0;
8158 	      else if (DR_MISALIGNMENT (first_dr_info) == -1)
8159 		{
8160 		  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8161 		  misalign = 0;
8162 		}
8163 	      else
8164 		misalign = DR_MISALIGNMENT (first_dr_info);
8165 	      if (dataref_offset == NULL_TREE
8166 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
8167 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8168 					misalign);
8169 	      align = least_bit_hwi (misalign | align);
8170 
8171 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8172 		{
8173 		  tree perm_mask = perm_mask_for_reverse (vectype);
8174 		  tree perm_dest = vect_create_destination_var
8175 		    (vect_get_store_rhs (stmt_info), vectype);
8176 		  tree new_temp = make_ssa_name (perm_dest);
8177 
8178 		  /* Generate the permute statement.  */
8179 		  gimple *perm_stmt
8180 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8181 					   vec_oprnd, perm_mask);
8182 		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8183 
8184 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8185 		  vec_oprnd = new_temp;
8186 		}
8187 
8188 	      /* Arguments are ready.  Create the new vector stmt.  */
8189 	      if (final_mask)
8190 		{
8191 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8192 		  gcall *call
8193 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
8194 						  dataref_ptr, ptr,
8195 						  final_mask, vec_oprnd);
8196 		  gimple_call_set_nothrow (call, true);
8197 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8198 		  new_stmt = call;
8199 		}
8200 	      else if (loop_lens)
8201 		{
8202 		  tree final_len
8203 		    = vect_get_loop_len (loop_vinfo, loop_lens,
8204 					 vec_num * ncopies, vec_num * j + i);
8205 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8206 		  machine_mode vmode = TYPE_MODE (vectype);
8207 		  opt_machine_mode new_ovmode
8208 		    = get_len_load_store_mode (vmode, false);
8209 		  machine_mode new_vmode = new_ovmode.require ();
8210 		  /* Need conversion if it's wrapped with VnQI.  */
8211 		  if (vmode != new_vmode)
8212 		    {
8213 		      tree new_vtype
8214 			= build_vector_type_for_mode (unsigned_intQI_type_node,
8215 						      new_vmode);
8216 		      tree var
8217 			= vect_get_new_ssa_name (new_vtype, vect_simple_var);
8218 		      vec_oprnd
8219 			= build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8220 		      gassign *new_stmt
8221 			= gimple_build_assign (var, VIEW_CONVERT_EXPR,
8222 					       vec_oprnd);
8223 		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8224 						   gsi);
8225 		      vec_oprnd = var;
8226 		    }
8227 		  gcall *call
8228 		    = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8229 						  ptr, final_len, vec_oprnd);
8230 		  gimple_call_set_nothrow (call, true);
8231 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8232 		  new_stmt = call;
8233 		}
8234 	      else
8235 		{
8236 		  data_ref = fold_build2 (MEM_REF, vectype,
8237 					  dataref_ptr,
8238 					  dataref_offset
8239 					  ? dataref_offset
8240 					  : build_int_cst (ref_type, 0));
8241 		  if (aligned_access_p (first_dr_info))
8242 		    ;
8243 		  else
8244 		    TREE_TYPE (data_ref)
8245 		      = build_aligned_type (TREE_TYPE (data_ref),
8246 					    align * BITS_PER_UNIT);
8247 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8248 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8249 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8250 		}
8251 
8252 	      if (slp)
8253 		continue;
8254 
8255 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8256 	      if (!next_stmt_info)
8257 		break;
8258 	    }
8259 	}
8260       if (!slp)
8261 	{
8262 	  if (j == 0)
8263 	    *vec_stmt = new_stmt;
8264 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8265 	}
8266     }
8267 
8268   for (i = 0; i < group_size; ++i)
8269     {
8270       vec<tree> oprndsi = gvec_oprnds[i];
8271       oprndsi.release ();
8272     }
8273   oprnds.release ();
8274   result_chain.release ();
8275   vec_oprnds.release ();
8276 
8277   return true;
8278 }
8279 
8280 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8281    VECTOR_CST mask.  No checks are made that the target platform supports the
8282    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8283    vect_gen_perm_mask_checked.  */
8284 
8285 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8286 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8287 {
8288   tree mask_type;
8289 
8290   poly_uint64 nunits = sel.length ();
8291   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8292 
8293   mask_type = build_vector_type (ssizetype, nunits);
8294   return vec_perm_indices_to_tree (mask_type, sel);
8295 }
8296 
8297 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8298    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8299 
8300 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8301 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8302 {
8303   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8304   return vect_gen_perm_mask_any (vectype, sel);
8305 }
8306 
8307 /* Given a vector variable X and Y, that was generated for the scalar
8308    STMT_INFO, generate instructions to permute the vector elements of X and Y
8309    using permutation mask MASK_VEC, insert them at *GSI and return the
8310    permuted vector variable.  */
8311 
8312 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8313 permute_vec_elements (vec_info *vinfo,
8314 		      tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8315 		      gimple_stmt_iterator *gsi)
8316 {
8317   tree vectype = TREE_TYPE (x);
8318   tree perm_dest, data_ref;
8319   gimple *perm_stmt;
8320 
8321   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8322   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8323     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8324   else
8325     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8326   data_ref = make_ssa_name (perm_dest);
8327 
8328   /* Generate the permute statement.  */
8329   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8330   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8331 
8332   return data_ref;
8333 }
8334 
8335 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8336    inserting them on the loops preheader edge.  Returns true if we
8337    were successful in doing so (and thus STMT_INFO can be moved then),
8338    otherwise returns false.  */
8339 
8340 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8341 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8342 {
8343   ssa_op_iter i;
8344   tree op;
8345   bool any = false;
8346 
8347   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8348     {
8349       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8350       if (!gimple_nop_p (def_stmt)
8351 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8352 	{
8353 	  /* Make sure we don't need to recurse.  While we could do
8354 	     so in simple cases when there are more complex use webs
8355 	     we don't have an easy way to preserve stmt order to fulfil
8356 	     dependencies within them.  */
8357 	  tree op2;
8358 	  ssa_op_iter i2;
8359 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
8360 	    return false;
8361 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8362 	    {
8363 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8364 	      if (!gimple_nop_p (def_stmt2)
8365 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8366 		return false;
8367 	    }
8368 	  any = true;
8369 	}
8370     }
8371 
8372   if (!any)
8373     return true;
8374 
8375   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8376     {
8377       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8378       if (!gimple_nop_p (def_stmt)
8379 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8380 	{
8381 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8382 	  gsi_remove (&gsi, false);
8383 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8384 	}
8385     }
8386 
8387   return true;
8388 }
8389 
8390 /* vectorizable_load.
8391 
8392    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8393    that can be vectorized.
8394    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8395    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8396    Return true if STMT_INFO is vectorizable in this way.  */
8397 
8398 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8399 vectorizable_load (vec_info *vinfo,
8400 		   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8401 		   gimple **vec_stmt, slp_tree slp_node,
8402 		   stmt_vector_for_cost *cost_vec)
8403 {
8404   tree scalar_dest;
8405   tree vec_dest = NULL;
8406   tree data_ref = NULL;
8407   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8408   class loop *loop = NULL;
8409   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8410   bool nested_in_vect_loop = false;
8411   tree elem_type;
8412   tree new_temp;
8413   machine_mode mode;
8414   tree dummy;
8415   tree dataref_ptr = NULL_TREE;
8416   tree dataref_offset = NULL_TREE;
8417   gimple *ptr_incr = NULL;
8418   int ncopies;
8419   int i, j;
8420   unsigned int group_size;
8421   poly_uint64 group_gap_adj;
8422   tree msq = NULL_TREE, lsq;
8423   tree offset = NULL_TREE;
8424   tree byte_offset = NULL_TREE;
8425   tree realignment_token = NULL_TREE;
8426   gphi *phi = NULL;
8427   vec<tree> dr_chain = vNULL;
8428   bool grouped_load = false;
8429   stmt_vec_info first_stmt_info;
8430   stmt_vec_info first_stmt_info_for_drptr = NULL;
8431   bool compute_in_loop = false;
8432   class loop *at_loop;
8433   int vec_num;
8434   bool slp = (slp_node != NULL);
8435   bool slp_perm = false;
8436   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8437   poly_uint64 vf;
8438   tree aggr_type;
8439   gather_scatter_info gs_info;
8440   tree ref_type;
8441   enum vect_def_type mask_dt = vect_unknown_def_type;
8442 
8443   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8444     return false;
8445 
8446   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8447       && ! vec_stmt)
8448     return false;
8449 
8450   if (!STMT_VINFO_DATA_REF (stmt_info))
8451     return false;
8452 
8453   /* ???  Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8454      for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8455      which can be different when reduction chains were re-ordered.
8456      Now that we figured we're a dataref reset stmt_info back to
8457      SLP_TREE_SCALAR_STMTS[0].  When we're SLP only things should be
8458      refactored in a way to maintain the dr_vec_info pointer for the
8459      relevant access explicitely.  */
8460   stmt_vec_info orig_stmt_info = stmt_info;
8461   if (slp_node)
8462     stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8463 
8464   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8465   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8466     {
8467       scalar_dest = gimple_assign_lhs (assign);
8468       if (TREE_CODE (scalar_dest) != SSA_NAME)
8469 	return false;
8470 
8471       tree_code code = gimple_assign_rhs_code (assign);
8472       if (code != ARRAY_REF
8473 	  && code != BIT_FIELD_REF
8474 	  && code != INDIRECT_REF
8475 	  && code != COMPONENT_REF
8476 	  && code != IMAGPART_EXPR
8477 	  && code != REALPART_EXPR
8478 	  && code != MEM_REF
8479 	  && TREE_CODE_CLASS (code) != tcc_declaration)
8480 	return false;
8481     }
8482   else
8483     {
8484       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8485       if (!call || !gimple_call_internal_p (call))
8486 	return false;
8487 
8488       internal_fn ifn = gimple_call_internal_fn (call);
8489       if (!internal_load_fn_p (ifn))
8490 	return false;
8491 
8492       scalar_dest = gimple_call_lhs (call);
8493       if (!scalar_dest)
8494 	return false;
8495 
8496       int mask_index = internal_fn_mask_index (ifn);
8497       if (mask_index >= 0)
8498 	{
8499 	  mask = gimple_call_arg (call, mask_index);
8500 	  if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8501 				       &mask_vectype))
8502 	    return false;
8503 	}
8504     }
8505 
8506   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8507   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8508 
8509   if (loop_vinfo)
8510     {
8511       loop = LOOP_VINFO_LOOP (loop_vinfo);
8512       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8513       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8514     }
8515   else
8516     vf = 1;
8517 
8518   /* Multiple types in SLP are handled by creating the appropriate number of
8519      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8520      case of SLP.  */
8521   if (slp)
8522     ncopies = 1;
8523   else
8524     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8525 
8526   gcc_assert (ncopies >= 1);
8527 
8528   /* FORNOW. This restriction should be relaxed.  */
8529   if (nested_in_vect_loop && ncopies > 1)
8530     {
8531       if (dump_enabled_p ())
8532         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8533                          "multiple types in nested loop.\n");
8534       return false;
8535     }
8536 
8537   /* Invalidate assumptions made by dependence analysis when vectorization
8538      on the unrolled body effectively re-orders stmts.  */
8539   if (ncopies > 1
8540       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8541       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8542 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8543     {
8544       if (dump_enabled_p ())
8545 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8546 			 "cannot perform implicit CSE when unrolling "
8547 			 "with negative dependence distance\n");
8548       return false;
8549     }
8550 
8551   elem_type = TREE_TYPE (vectype);
8552   mode = TYPE_MODE (vectype);
8553 
8554   /* FORNOW. In some cases can vectorize even if data-type not supported
8555     (e.g. - data copies).  */
8556   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8557     {
8558       if (dump_enabled_p ())
8559         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8560                          "Aligned load, but unsupported type.\n");
8561       return false;
8562     }
8563 
8564   /* Check if the load is a part of an interleaving chain.  */
8565   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8566     {
8567       grouped_load = true;
8568       /* FORNOW */
8569       gcc_assert (!nested_in_vect_loop);
8570       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8571 
8572       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8573       group_size = DR_GROUP_SIZE (first_stmt_info);
8574 
8575       /* Refuse non-SLP vectorization of SLP-only groups.  */
8576       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8577 	{
8578 	  if (dump_enabled_p ())
8579 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8580 			     "cannot vectorize load in non-SLP mode.\n");
8581 	  return false;
8582 	}
8583 
8584       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8585 	{
8586 	  slp_perm = true;
8587 
8588 	  if (!loop_vinfo)
8589 	    {
8590 	      /* In BB vectorization we may not actually use a loaded vector
8591 		 accessing elements in excess of DR_GROUP_SIZE.  */
8592 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8593 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8594 	      unsigned HOST_WIDE_INT nunits;
8595 	      unsigned j, k, maxk = 0;
8596 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8597 		if (k > maxk)
8598 		  maxk = k;
8599 	      tree vectype = STMT_VINFO_VECTYPE (group_info);
8600 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8601 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8602 		{
8603 		  if (dump_enabled_p ())
8604 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8605 				     "BB vectorization with gaps at the end of "
8606 				     "a load is not supported\n");
8607 		  return false;
8608 		}
8609 	    }
8610 
8611 	  auto_vec<tree> tem;
8612 	  unsigned n_perms;
8613 	  if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8614 					     true, &n_perms))
8615 	    {
8616 	      if (dump_enabled_p ())
8617 		dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8618 				 vect_location,
8619 				 "unsupported load permutation\n");
8620 	      return false;
8621 	    }
8622 	}
8623 
8624       /* Invalidate assumptions made by dependence analysis when vectorization
8625 	 on the unrolled body effectively re-orders stmts.  */
8626       if (!PURE_SLP_STMT (stmt_info)
8627 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8628 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8629 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8630 	{
8631 	  if (dump_enabled_p ())
8632 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8633 			     "cannot perform implicit CSE when performing "
8634 			     "group loads with negative dependence distance\n");
8635 	  return false;
8636 	}
8637     }
8638   else
8639     group_size = 1;
8640 
8641   vect_memory_access_type memory_access_type;
8642   enum dr_alignment_support alignment_support_scheme;
8643   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8644 			    ncopies, &memory_access_type,
8645 			    &alignment_support_scheme, &gs_info))
8646     return false;
8647 
8648   if (mask)
8649     {
8650       if (memory_access_type == VMAT_CONTIGUOUS)
8651 	{
8652 	  machine_mode vec_mode = TYPE_MODE (vectype);
8653 	  if (!VECTOR_MODE_P (vec_mode)
8654 	      || !can_vec_mask_load_store_p (vec_mode,
8655 					     TYPE_MODE (mask_vectype), true))
8656 	    return false;
8657 	}
8658       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8659 	       && memory_access_type != VMAT_GATHER_SCATTER)
8660 	{
8661 	  if (dump_enabled_p ())
8662 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 			     "unsupported access type for masked load.\n");
8664 	  return false;
8665 	}
8666     }
8667 
8668   if (!vec_stmt) /* transformation not required.  */
8669     {
8670       if (slp_node
8671 	  && mask
8672 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8673 						mask_vectype))
8674 	{
8675 	  if (dump_enabled_p ())
8676 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8677 			     "incompatible vector types for invariants\n");
8678 	  return false;
8679 	}
8680 
8681       if (!slp)
8682 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8683 
8684       if (loop_vinfo
8685 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8686 	check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8687 					      group_size, memory_access_type,
8688 					      &gs_info, mask);
8689 
8690       if (dump_enabled_p ()
8691 	  && memory_access_type != VMAT_ELEMENTWISE
8692 	  && memory_access_type != VMAT_GATHER_SCATTER
8693 	  && alignment_support_scheme != dr_aligned)
8694 	dump_printf_loc (MSG_NOTE, vect_location,
8695 			 "Vectorizing an unaligned access.\n");
8696 
8697       STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8698       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8699 			    slp_node, cost_vec);
8700       return true;
8701     }
8702 
8703   if (!slp)
8704     gcc_assert (memory_access_type
8705 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8706 
8707   if (dump_enabled_p ())
8708     dump_printf_loc (MSG_NOTE, vect_location,
8709                      "transform load. ncopies = %d\n", ncopies);
8710 
8711   /* Transform.  */
8712 
8713   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8714   ensure_base_align (dr_info);
8715 
8716   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8717     {
8718       vect_build_gather_load_calls (vinfo,
8719 				    stmt_info, gsi, vec_stmt, &gs_info, mask);
8720       return true;
8721     }
8722 
8723   if (memory_access_type == VMAT_INVARIANT)
8724     {
8725       gcc_assert (!grouped_load && !mask && !bb_vinfo);
8726       /* If we have versioned for aliasing or the loop doesn't
8727 	 have any data dependencies that would preclude this,
8728 	 then we are sure this is a loop invariant load and
8729 	 thus we can insert it on the preheader edge.  */
8730       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8731 		      && !nested_in_vect_loop
8732 		      && hoist_defs_of_uses (stmt_info, loop));
8733       if (hoist_p)
8734 	{
8735 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8736 	  if (dump_enabled_p ())
8737 	    dump_printf_loc (MSG_NOTE, vect_location,
8738 			     "hoisting out of the vectorized loop: %G", stmt);
8739 	  scalar_dest = copy_ssa_name (scalar_dest);
8740 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8741 	  gsi_insert_on_edge_immediate
8742 	    (loop_preheader_edge (loop),
8743 	     gimple_build_assign (scalar_dest, rhs));
8744 	}
8745       /* These copies are all equivalent, but currently the representation
8746 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
8747       gimple_stmt_iterator gsi2 = *gsi;
8748       gsi_next (&gsi2);
8749       for (j = 0; j < ncopies; j++)
8750 	{
8751 	  if (hoist_p)
8752 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8753 					 vectype, NULL);
8754 	  else
8755 	    new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8756 					 vectype, &gsi2);
8757 	  gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8758 	  if (slp)
8759 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8760 	  else
8761 	    {
8762 	      if (j == 0)
8763 		*vec_stmt = new_stmt;
8764 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8765 	    }
8766 	}
8767       return true;
8768     }
8769 
8770   if (memory_access_type == VMAT_ELEMENTWISE
8771       || memory_access_type == VMAT_STRIDED_SLP)
8772     {
8773       gimple_stmt_iterator incr_gsi;
8774       bool insert_after;
8775       tree offvar;
8776       tree ivstep;
8777       tree running_off;
8778       vec<constructor_elt, va_gc> *v = NULL;
8779       tree stride_base, stride_step, alias_off;
8780       /* Checked by get_load_store_type.  */
8781       unsigned int const_nunits = nunits.to_constant ();
8782       unsigned HOST_WIDE_INT cst_offset = 0;
8783       tree dr_offset;
8784 
8785       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8786       gcc_assert (!nested_in_vect_loop);
8787 
8788       if (grouped_load)
8789 	{
8790 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8791 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8792 	}
8793       else
8794 	{
8795 	  first_stmt_info = stmt_info;
8796 	  first_dr_info = dr_info;
8797 	}
8798       if (slp && grouped_load)
8799 	{
8800 	  group_size = DR_GROUP_SIZE (first_stmt_info);
8801 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
8802 	}
8803       else
8804 	{
8805 	  if (grouped_load)
8806 	    cst_offset
8807 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8808 		 * vect_get_place_in_interleaving_chain (stmt_info,
8809 							 first_stmt_info));
8810 	  group_size = 1;
8811 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8812 	}
8813 
8814       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8815       stride_base
8816 	= fold_build_pointer_plus
8817 	    (DR_BASE_ADDRESS (first_dr_info->dr),
8818 	     size_binop (PLUS_EXPR,
8819 			 convert_to_ptrofftype (dr_offset),
8820 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8821       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8822 
8823       /* For a load with loop-invariant (but other than power-of-2)
8824          stride (i.e. not a grouped access) like so:
8825 
8826 	   for (i = 0; i < n; i += stride)
8827 	     ... = array[i];
8828 
8829 	 we generate a new induction variable and new accesses to
8830 	 form a new vector (or vectors, depending on ncopies):
8831 
8832 	   for (j = 0; ; j += VF*stride)
8833 	     tmp1 = array[j];
8834 	     tmp2 = array[j + stride];
8835 	     ...
8836 	     vectemp = {tmp1, tmp2, ...}
8837          */
8838 
8839       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8840 			    build_int_cst (TREE_TYPE (stride_step), vf));
8841 
8842       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8843 
8844       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8845       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8846       create_iv (stride_base, ivstep, NULL,
8847 		 loop, &incr_gsi, insert_after,
8848 		 &offvar, NULL);
8849 
8850       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8851 
8852       running_off = offvar;
8853       alias_off = build_int_cst (ref_type, 0);
8854       int nloads = const_nunits;
8855       int lnel = 1;
8856       tree ltype = TREE_TYPE (vectype);
8857       tree lvectype = vectype;
8858       auto_vec<tree> dr_chain;
8859       if (memory_access_type == VMAT_STRIDED_SLP)
8860 	{
8861 	  if (group_size < const_nunits)
8862 	    {
8863 	      /* First check if vec_init optab supports construction from vector
8864 		 elts directly.  Otherwise avoid emitting a constructor of
8865 		 vector elements by performing the loads using an integer type
8866 		 of the same size, constructing a vector of those and then
8867 		 re-interpreting it as the original vector type.  This avoids a
8868 		 huge runtime penalty due to the general inability to perform
8869 		 store forwarding from smaller stores to a larger load.  */
8870 	      tree ptype;
8871 	      tree vtype
8872 		= vector_vector_composition_type (vectype,
8873 						  const_nunits / group_size,
8874 						  &ptype);
8875 	      if (vtype != NULL_TREE)
8876 		{
8877 		  nloads = const_nunits / group_size;
8878 		  lnel = group_size;
8879 		  lvectype = vtype;
8880 		  ltype = ptype;
8881 		}
8882 	    }
8883 	  else
8884 	    {
8885 	      nloads = 1;
8886 	      lnel = const_nunits;
8887 	      ltype = vectype;
8888 	    }
8889 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8890 	}
8891       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
8892       else if (nloads == 1)
8893 	ltype = vectype;
8894 
8895       if (slp)
8896 	{
8897 	  /* For SLP permutation support we need to load the whole group,
8898 	     not only the number of vector stmts the permutation result
8899 	     fits in.  */
8900 	  if (slp_perm)
8901 	    {
8902 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8903 		 variable VF.  */
8904 	      unsigned int const_vf = vf.to_constant ();
8905 	      ncopies = CEIL (group_size * const_vf, const_nunits);
8906 	      dr_chain.create (ncopies);
8907 	    }
8908 	  else
8909 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8910 	}
8911       unsigned int group_el = 0;
8912       unsigned HOST_WIDE_INT
8913 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8914       for (j = 0; j < ncopies; j++)
8915 	{
8916 	  if (nloads > 1)
8917 	    vec_alloc (v, nloads);
8918 	  gimple *new_stmt = NULL;
8919 	  for (i = 0; i < nloads; i++)
8920 	    {
8921 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
8922 					     group_el * elsz + cst_offset);
8923 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8924 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8925 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8926 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8927 	      if (nloads > 1)
8928 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8929 					gimple_assign_lhs (new_stmt));
8930 
8931 	      group_el += lnel;
8932 	      if (! slp
8933 		  || group_el == group_size)
8934 		{
8935 		  tree newoff = copy_ssa_name (running_off);
8936 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8937 						      running_off, stride_step);
8938 		  vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8939 
8940 		  running_off = newoff;
8941 		  group_el = 0;
8942 		}
8943 	    }
8944 	  if (nloads > 1)
8945 	    {
8946 	      tree vec_inv = build_constructor (lvectype, v);
8947 	      new_temp = vect_init_vector (vinfo, stmt_info,
8948 					   vec_inv, lvectype, gsi);
8949 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8950 	      if (lvectype != vectype)
8951 		{
8952 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
8953 						  VIEW_CONVERT_EXPR,
8954 						  build1 (VIEW_CONVERT_EXPR,
8955 							  vectype, new_temp));
8956 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8957 		}
8958 	    }
8959 
8960 	  if (slp)
8961 	    {
8962 	      if (slp_perm)
8963 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8964 	      else
8965 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8966 	    }
8967 	  else
8968 	    {
8969 	      if (j == 0)
8970 		*vec_stmt = new_stmt;
8971 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8972 	    }
8973 	}
8974       if (slp_perm)
8975 	{
8976 	  unsigned n_perms;
8977 	  vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8978 					false, &n_perms);
8979 	}
8980       return true;
8981     }
8982 
8983   if (memory_access_type == VMAT_GATHER_SCATTER
8984       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8985     grouped_load = false;
8986 
8987   if (grouped_load)
8988     {
8989       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8990       group_size = DR_GROUP_SIZE (first_stmt_info);
8991       /* For SLP vectorization we directly vectorize a subchain
8992          without permutation.  */
8993       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8994 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8995       /* For BB vectorization always use the first stmt to base
8996 	 the data ref pointer on.  */
8997       if (bb_vinfo)
8998 	first_stmt_info_for_drptr
8999 	  = vect_find_first_scalar_stmt_in_slp (slp_node);
9000 
9001       /* Check if the chain of loads is already vectorized.  */
9002       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9003 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9004 	     ???  But we can only do so if there is exactly one
9005 	     as we have no way to get at the rest.  Leave the CSE
9006 	     opportunity alone.
9007 	     ???  With the group load eventually participating
9008 	     in multiple different permutations (having multiple
9009 	     slp nodes which refer to the same group) the CSE
9010 	     is even wrong code.  See PR56270.  */
9011 	  && !slp)
9012 	{
9013 	  *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9014 	  return true;
9015 	}
9016       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9017       group_gap_adj = 0;
9018 
9019       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9020       if (slp)
9021 	{
9022 	  grouped_load = false;
9023 	  /* If an SLP permutation is from N elements to N elements,
9024 	     and if one vector holds a whole number of N, we can load
9025 	     the inputs to the permutation in the same way as an
9026 	     unpermuted sequence.  In other cases we need to load the
9027 	     whole group, not only the number of vector stmts the
9028 	     permutation result fits in.  */
9029 	  unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9030 	  if (slp_perm
9031 	      && (group_size != scalar_lanes
9032 		  || !multiple_p (nunits, group_size)))
9033 	    {
9034 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9035 		 variable VF; see vect_transform_slp_perm_load.  */
9036 	      unsigned int const_vf = vf.to_constant ();
9037 	      unsigned int const_nunits = nunits.to_constant ();
9038 	      vec_num = CEIL (group_size * const_vf, const_nunits);
9039 	      group_gap_adj = vf * group_size - nunits * vec_num;
9040 	    }
9041 	  else
9042 	    {
9043 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9044 	      group_gap_adj
9045 		= group_size - scalar_lanes;
9046 	    }
9047     	}
9048       else
9049 	vec_num = group_size;
9050 
9051       ref_type = get_group_alias_ptr_type (first_stmt_info);
9052     }
9053   else
9054     {
9055       first_stmt_info = stmt_info;
9056       first_dr_info = dr_info;
9057       group_size = vec_num = 1;
9058       group_gap_adj = 0;
9059       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9060     }
9061 
9062   gcc_assert (alignment_support_scheme);
9063   vec_loop_masks *loop_masks
9064     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9065        ? &LOOP_VINFO_MASKS (loop_vinfo)
9066        : NULL);
9067   vec_loop_lens *loop_lens
9068     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9069        ? &LOOP_VINFO_LENS (loop_vinfo)
9070        : NULL);
9071 
9072   /* Shouldn't go with length-based approach if fully masked.  */
9073   gcc_assert (!loop_lens || !loop_masks);
9074 
9075   /* Targets with store-lane instructions must not require explicit
9076      realignment.  vect_supportable_dr_alignment always returns either
9077      dr_aligned or dr_unaligned_supported for masked operations.  */
9078   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9079 	       && !mask
9080 	       && !loop_masks)
9081 	      || alignment_support_scheme == dr_aligned
9082 	      || alignment_support_scheme == dr_unaligned_supported);
9083 
9084   /* In case the vectorization factor (VF) is bigger than the number
9085      of elements that we can fit in a vectype (nunits), we have to generate
9086      more than one vector stmt - i.e - we need to "unroll" the
9087      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9088      from one copy of the vector stmt to the next, in the field
9089      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9090      stages to find the correct vector defs to be used when vectorizing
9091      stmts that use the defs of the current stmt.  The example below
9092      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9093      need to create 4 vectorized stmts):
9094 
9095      before vectorization:
9096                                 RELATED_STMT    VEC_STMT
9097         S1:     x = memref      -               -
9098         S2:     z = x + 1       -               -
9099 
9100      step 1: vectorize stmt S1:
9101         We first create the vector stmt VS1_0, and, as usual, record a
9102         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9103         Next, we create the vector stmt VS1_1, and record a pointer to
9104         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9105         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9106         stmts and pointers:
9107                                 RELATED_STMT    VEC_STMT
9108         VS1_0:  vx0 = memref0   VS1_1           -
9109         VS1_1:  vx1 = memref1   VS1_2           -
9110         VS1_2:  vx2 = memref2   VS1_3           -
9111         VS1_3:  vx3 = memref3   -               -
9112         S1:     x = load        -               VS1_0
9113         S2:     z = x + 1       -               -
9114   */
9115 
9116   /* In case of interleaving (non-unit grouped access):
9117 
9118      S1:  x2 = &base + 2
9119      S2:  x0 = &base
9120      S3:  x1 = &base + 1
9121      S4:  x3 = &base + 3
9122 
9123      Vectorized loads are created in the order of memory accesses
9124      starting from the access of the first stmt of the chain:
9125 
9126      VS1: vx0 = &base
9127      VS2: vx1 = &base + vec_size*1
9128      VS3: vx3 = &base + vec_size*2
9129      VS4: vx4 = &base + vec_size*3
9130 
9131      Then permutation statements are generated:
9132 
9133      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9134      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9135        ...
9136 
9137      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9138      (the order of the data-refs in the output of vect_permute_load_chain
9139      corresponds to the order of scalar stmts in the interleaving chain - see
9140      the documentation of vect_permute_load_chain()).
9141      The generation of permutation stmts and recording them in
9142      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9143 
9144      In case of both multiple types and interleaving, the vector loads and
9145      permutation stmts above are created for every copy.  The result vector
9146      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9147      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9148 
9149   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9150      on a target that supports unaligned accesses (dr_unaligned_supported)
9151      we generate the following code:
9152          p = initial_addr;
9153          indx = 0;
9154          loop {
9155 	   p = p + indx * vectype_size;
9156            vec_dest = *(p);
9157            indx = indx + 1;
9158          }
9159 
9160      Otherwise, the data reference is potentially unaligned on a target that
9161      does not support unaligned accesses (dr_explicit_realign_optimized) -
9162      then generate the following code, in which the data in each iteration is
9163      obtained by two vector loads, one from the previous iteration, and one
9164      from the current iteration:
9165          p1 = initial_addr;
9166          msq_init = *(floor(p1))
9167          p2 = initial_addr + VS - 1;
9168          realignment_token = call target_builtin;
9169          indx = 0;
9170          loop {
9171            p2 = p2 + indx * vectype_size
9172            lsq = *(floor(p2))
9173            vec_dest = realign_load (msq, lsq, realignment_token)
9174            indx = indx + 1;
9175            msq = lsq;
9176          }   */
9177 
9178   /* If the misalignment remains the same throughout the execution of the
9179      loop, we can create the init_addr and permutation mask at the loop
9180      preheader.  Otherwise, it needs to be created inside the loop.
9181      This can only occur when vectorizing memory accesses in the inner-loop
9182      nested within an outer-loop that is being vectorized.  */
9183 
9184   if (nested_in_vect_loop
9185       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9186 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
9187     {
9188       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9189       compute_in_loop = true;
9190     }
9191 
9192   bool diff_first_stmt_info
9193     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9194 
9195   if ((alignment_support_scheme == dr_explicit_realign_optimized
9196        || alignment_support_scheme == dr_explicit_realign)
9197       && !compute_in_loop)
9198     {
9199       /* If we have different first_stmt_info, we can't set up realignment
9200 	 here, since we can't guarantee first_stmt_info DR has been
9201 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9202 	 distance from first_stmt_info DR instead as below.  */
9203       if (!diff_first_stmt_info)
9204 	msq = vect_setup_realignment (vinfo,
9205 				      first_stmt_info, gsi, &realignment_token,
9206 				      alignment_support_scheme, NULL_TREE,
9207 				      &at_loop);
9208       if (alignment_support_scheme == dr_explicit_realign_optimized)
9209 	{
9210 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9211 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9212 				    size_one_node);
9213 	  gcc_assert (!first_stmt_info_for_drptr);
9214 	}
9215     }
9216   else
9217     at_loop = loop;
9218 
9219   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9220     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9221 
9222   tree bump;
9223   tree vec_offset = NULL_TREE;
9224   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9225     {
9226       aggr_type = NULL_TREE;
9227       bump = NULL_TREE;
9228     }
9229   else if (memory_access_type == VMAT_GATHER_SCATTER)
9230     {
9231       aggr_type = elem_type;
9232       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9233 				       &bump, &vec_offset);
9234     }
9235   else
9236     {
9237       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9238 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9239       else
9240 	aggr_type = vectype;
9241       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9242 					  memory_access_type);
9243     }
9244 
9245   vec<tree> vec_offsets = vNULL;
9246   auto_vec<tree> vec_masks;
9247   if (mask)
9248     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9249 		       mask, &vec_masks, mask_vectype, NULL_TREE);
9250   tree vec_mask = NULL_TREE;
9251   poly_uint64 group_elt = 0;
9252   for (j = 0; j < ncopies; j++)
9253     {
9254       /* 1. Create the vector or array pointer update chain.  */
9255       if (j == 0)
9256 	{
9257 	  bool simd_lane_access_p
9258 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9259 	  if (simd_lane_access_p
9260 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9261 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9262 	      && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9263 	      && integer_zerop (DR_INIT (first_dr_info->dr))
9264 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
9265 					get_alias_set (TREE_TYPE (ref_type)))
9266 	      && (alignment_support_scheme == dr_aligned
9267 		  || alignment_support_scheme == dr_unaligned_supported))
9268 	    {
9269 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9270 	      dataref_offset = build_int_cst (ref_type, 0);
9271 	    }
9272 	  else if (diff_first_stmt_info)
9273 	    {
9274 	      dataref_ptr
9275 		= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9276 					    aggr_type, at_loop, offset, &dummy,
9277 					    gsi, &ptr_incr, simd_lane_access_p,
9278 					    byte_offset, bump);
9279 	      /* Adjust the pointer by the difference to first_stmt.  */
9280 	      data_reference_p ptrdr
9281 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9282 	      tree diff
9283 		= fold_convert (sizetype,
9284 				size_binop (MINUS_EXPR,
9285 					    DR_INIT (first_dr_info->dr),
9286 					    DR_INIT (ptrdr)));
9287 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9288 					     stmt_info, diff);
9289 	      if (alignment_support_scheme == dr_explicit_realign)
9290 		{
9291 		  msq = vect_setup_realignment (vinfo,
9292 						first_stmt_info_for_drptr, gsi,
9293 						&realignment_token,
9294 						alignment_support_scheme,
9295 						dataref_ptr, &at_loop);
9296 		  gcc_assert (!compute_in_loop);
9297 		}
9298 	    }
9299 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9300 	    {
9301 	      vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9302 					   &dataref_ptr, &vec_offsets, ncopies);
9303 	      vec_offset = vec_offsets[0];
9304 	    }
9305 	  else
9306 	    dataref_ptr
9307 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9308 					  at_loop,
9309 					  offset, &dummy, gsi, &ptr_incr,
9310 					  simd_lane_access_p,
9311 					  byte_offset, bump);
9312 	  if (mask)
9313 	    vec_mask = vec_masks[0];
9314 	}
9315       else
9316 	{
9317 	  if (dataref_offset)
9318 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9319 					      bump);
9320 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9321 	    vec_offset = vec_offsets[j];
9322 	  else
9323 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9324 					   stmt_info, bump);
9325 	  if (mask)
9326 	    vec_mask = vec_masks[j];
9327 	}
9328 
9329       if (grouped_load || slp_perm)
9330 	dr_chain.create (vec_num);
9331 
9332       gimple *new_stmt = NULL;
9333       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9334 	{
9335 	  tree vec_array;
9336 
9337 	  vec_array = create_vector_array (vectype, vec_num);
9338 
9339 	  tree final_mask = NULL_TREE;
9340 	  if (loop_masks)
9341 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9342 					     vectype, j);
9343 	  if (vec_mask)
9344 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9345 						  vec_mask, gsi);
9346 
9347 	  gcall *call;
9348 	  if (final_mask)
9349 	    {
9350 	      /* Emit:
9351 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9352 		                                VEC_MASK).  */
9353 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9354 	      tree alias_ptr = build_int_cst (ref_type, align);
9355 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9356 						 dataref_ptr, alias_ptr,
9357 						 final_mask);
9358 	    }
9359 	  else
9360 	    {
9361 	      /* Emit:
9362 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9363 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9364 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9365 	    }
9366 	  gimple_call_set_lhs (call, vec_array);
9367 	  gimple_call_set_nothrow (call, true);
9368 	  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9369 	  new_stmt = call;
9370 
9371 	  /* Extract each vector into an SSA_NAME.  */
9372 	  for (i = 0; i < vec_num; i++)
9373 	    {
9374 	      new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9375 					    vec_array, i);
9376 	      dr_chain.quick_push (new_temp);
9377 	    }
9378 
9379 	  /* Record the mapping between SSA_NAMEs and statements.  */
9380 	  vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9381 
9382 	  /* Record that VEC_ARRAY is now dead.  */
9383 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9384 	}
9385       else
9386 	{
9387 	  for (i = 0; i < vec_num; i++)
9388 	    {
9389 	      tree final_mask = NULL_TREE;
9390 	      if (loop_masks
9391 		  && memory_access_type != VMAT_INVARIANT)
9392 		final_mask = vect_get_loop_mask (gsi, loop_masks,
9393 						 vec_num * ncopies,
9394 						 vectype, vec_num * j + i);
9395 	      if (vec_mask)
9396 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9397 						      vec_mask, gsi);
9398 
9399 	      if (i > 0)
9400 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9401 					       gsi, stmt_info, bump);
9402 
9403 	      /* 2. Create the vector-load in the loop.  */
9404 	      switch (alignment_support_scheme)
9405 		{
9406 		case dr_aligned:
9407 		case dr_unaligned_supported:
9408 		  {
9409 		    unsigned int misalign;
9410 		    unsigned HOST_WIDE_INT align;
9411 
9412 		    if (memory_access_type == VMAT_GATHER_SCATTER)
9413 		      {
9414 			tree zero = build_zero_cst (vectype);
9415 			tree scale = size_int (gs_info.scale);
9416 			gcall *call;
9417 			if (final_mask)
9418 			  call = gimple_build_call_internal
9419 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9420 			     vec_offset, scale, zero, final_mask);
9421 			else
9422 			  call = gimple_build_call_internal
9423 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
9424 			     vec_offset, scale, zero);
9425 			gimple_call_set_nothrow (call, true);
9426 			new_stmt = call;
9427 			data_ref = NULL_TREE;
9428 			break;
9429 		      }
9430 
9431 		    align =
9432 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9433 		    if (alignment_support_scheme == dr_aligned)
9434 		      {
9435 			gcc_assert (aligned_access_p (first_dr_info));
9436 			misalign = 0;
9437 		      }
9438 		    else if (DR_MISALIGNMENT (first_dr_info) == -1)
9439 		      {
9440 			align = dr_alignment
9441 			  (vect_dr_behavior (vinfo, first_dr_info));
9442 			misalign = 0;
9443 		      }
9444 		    else
9445 		      misalign = DR_MISALIGNMENT (first_dr_info);
9446 		    if (dataref_offset == NULL_TREE
9447 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
9448 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9449 					      align, misalign);
9450 		    align = least_bit_hwi (misalign | align);
9451 
9452 		    if (final_mask)
9453 		      {
9454 			tree ptr = build_int_cst (ref_type,
9455 						  align * BITS_PER_UNIT);
9456 			gcall *call
9457 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9458 							dataref_ptr, ptr,
9459 							final_mask);
9460 			gimple_call_set_nothrow (call, true);
9461 			new_stmt = call;
9462 			data_ref = NULL_TREE;
9463 		      }
9464 		    else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9465 		      {
9466 			tree final_len
9467 			  = vect_get_loop_len (loop_vinfo, loop_lens,
9468 					       vec_num * ncopies,
9469 					       vec_num * j + i);
9470 			tree ptr = build_int_cst (ref_type,
9471 						  align * BITS_PER_UNIT);
9472 			gcall *call
9473 			  = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9474 							dataref_ptr, ptr,
9475 							final_len);
9476 			gimple_call_set_nothrow (call, true);
9477 			new_stmt = call;
9478 			data_ref = NULL_TREE;
9479 
9480 			/* Need conversion if it's wrapped with VnQI.  */
9481 			machine_mode vmode = TYPE_MODE (vectype);
9482 			opt_machine_mode new_ovmode
9483 			  = get_len_load_store_mode (vmode, true);
9484 			machine_mode new_vmode = new_ovmode.require ();
9485 			if (vmode != new_vmode)
9486 			  {
9487 			    tree qi_type = unsigned_intQI_type_node;
9488 			    tree new_vtype
9489 			      = build_vector_type_for_mode (qi_type, new_vmode);
9490 			    tree var = vect_get_new_ssa_name (new_vtype,
9491 							      vect_simple_var);
9492 			    gimple_set_lhs (call, var);
9493 			    vect_finish_stmt_generation (vinfo, stmt_info, call,
9494 							 gsi);
9495 			    tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9496 			    new_stmt
9497 			      = gimple_build_assign (vec_dest,
9498 						     VIEW_CONVERT_EXPR, op);
9499 			  }
9500 		      }
9501 		    else
9502 		      {
9503 			tree ltype = vectype;
9504 			tree new_vtype = NULL_TREE;
9505 			unsigned HOST_WIDE_INT gap
9506 			  = DR_GROUP_GAP (first_stmt_info);
9507 			unsigned int vect_align
9508 			  = vect_known_alignment_in_bytes (first_dr_info);
9509 			unsigned int scalar_dr_size
9510 			  = vect_get_scalar_dr_size (first_dr_info);
9511 			/* If there's no peeling for gaps but we have a gap
9512 			   with slp loads then load the lower half of the
9513 			   vector only.  See get_group_load_store_type for
9514 			   when we apply this optimization.  */
9515 			if (slp
9516 			    && loop_vinfo
9517 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9518 			    && gap != 0
9519 			    && known_eq (nunits, (group_size - gap) * 2)
9520 			    && known_eq (nunits, group_size)
9521 			    && gap >= (vect_align / scalar_dr_size))
9522 			  {
9523 			    tree half_vtype;
9524 			    new_vtype
9525 			      = vector_vector_composition_type (vectype, 2,
9526 								&half_vtype);
9527 			    if (new_vtype != NULL_TREE)
9528 			      ltype = half_vtype;
9529 			  }
9530 			tree offset
9531 			  = (dataref_offset ? dataref_offset
9532 					    : build_int_cst (ref_type, 0));
9533 			if (ltype != vectype
9534 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9535 			  {
9536 			    unsigned HOST_WIDE_INT gap_offset
9537 			      = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9538 			    tree gapcst = build_int_cst (ref_type, gap_offset);
9539 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
9540 			  }
9541 			data_ref
9542 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9543 			if (alignment_support_scheme == dr_aligned)
9544 			  ;
9545 			else
9546 			  TREE_TYPE (data_ref)
9547 			    = build_aligned_type (TREE_TYPE (data_ref),
9548 						  align * BITS_PER_UNIT);
9549 			if (ltype != vectype)
9550 			  {
9551 			    vect_copy_ref_info (data_ref,
9552 						DR_REF (first_dr_info->dr));
9553 			    tree tem = make_ssa_name (ltype);
9554 			    new_stmt = gimple_build_assign (tem, data_ref);
9555 			    vect_finish_stmt_generation (vinfo, stmt_info,
9556 							 new_stmt, gsi);
9557 			    data_ref = NULL;
9558 			    vec<constructor_elt, va_gc> *v;
9559 			    vec_alloc (v, 2);
9560 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9561 			      {
9562 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9563 							build_zero_cst (ltype));
9564 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9565 			      }
9566 			    else
9567 			      {
9568 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9569 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9570 							build_zero_cst (ltype));
9571 			      }
9572 			    gcc_assert (new_vtype != NULL_TREE);
9573 			    if (new_vtype == vectype)
9574 			      new_stmt = gimple_build_assign (
9575 				vec_dest, build_constructor (vectype, v));
9576 			    else
9577 			      {
9578 				tree new_vname = make_ssa_name (new_vtype);
9579 				new_stmt = gimple_build_assign (
9580 				  new_vname, build_constructor (new_vtype, v));
9581 				vect_finish_stmt_generation (vinfo, stmt_info,
9582 							     new_stmt, gsi);
9583 				new_stmt = gimple_build_assign (
9584 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9585 						    new_vname));
9586 			      }
9587 			  }
9588 		      }
9589 		    break;
9590 		  }
9591 		case dr_explicit_realign:
9592 		  {
9593 		    tree ptr, bump;
9594 
9595 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9596 
9597 		    if (compute_in_loop)
9598 		      msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9599 						    &realignment_token,
9600 						    dr_explicit_realign,
9601 						    dataref_ptr, NULL);
9602 
9603 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9604 		      ptr = copy_ssa_name (dataref_ptr);
9605 		    else
9606 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9607 		    // For explicit realign the target alignment should be
9608 		    // known at compile time.
9609 		    unsigned HOST_WIDE_INT align =
9610 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9611 		    new_stmt = gimple_build_assign
9612 				 (ptr, BIT_AND_EXPR, dataref_ptr,
9613 				  build_int_cst
9614 				  (TREE_TYPE (dataref_ptr),
9615 				   -(HOST_WIDE_INT) align));
9616 		    vect_finish_stmt_generation (vinfo, stmt_info,
9617 						 new_stmt, gsi);
9618 		    data_ref
9619 		      = build2 (MEM_REF, vectype, ptr,
9620 				build_int_cst (ref_type, 0));
9621 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9622 		    vec_dest = vect_create_destination_var (scalar_dest,
9623 							    vectype);
9624 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
9625 		    new_temp = make_ssa_name (vec_dest, new_stmt);
9626 		    gimple_assign_set_lhs (new_stmt, new_temp);
9627 		    gimple_move_vops (new_stmt, stmt_info->stmt);
9628 		    vect_finish_stmt_generation (vinfo, stmt_info,
9629 						 new_stmt, gsi);
9630 		    msq = new_temp;
9631 
9632 		    bump = size_binop (MULT_EXPR, vs,
9633 				       TYPE_SIZE_UNIT (elem_type));
9634 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
9635 		    ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9636 					   stmt_info, bump);
9637 		    new_stmt = gimple_build_assign
9638 				 (NULL_TREE, BIT_AND_EXPR, ptr,
9639 				  build_int_cst
9640 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9641 		    ptr = copy_ssa_name (ptr, new_stmt);
9642 		    gimple_assign_set_lhs (new_stmt, ptr);
9643 		    vect_finish_stmt_generation (vinfo, stmt_info,
9644 						 new_stmt, gsi);
9645 		    data_ref
9646 		      = build2 (MEM_REF, vectype, ptr,
9647 				build_int_cst (ref_type, 0));
9648 		    break;
9649 		  }
9650 		case dr_explicit_realign_optimized:
9651 		  {
9652 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9653 		      new_temp = copy_ssa_name (dataref_ptr);
9654 		    else
9655 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9656 		    // We should only be doing this if we know the target
9657 		    // alignment at compile time.
9658 		    unsigned HOST_WIDE_INT align =
9659 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9660 		    new_stmt = gimple_build_assign
9661 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
9662 		       build_int_cst (TREE_TYPE (dataref_ptr),
9663 				     -(HOST_WIDE_INT) align));
9664 		    vect_finish_stmt_generation (vinfo, stmt_info,
9665 						 new_stmt, gsi);
9666 		    data_ref
9667 		      = build2 (MEM_REF, vectype, new_temp,
9668 				build_int_cst (ref_type, 0));
9669 		    break;
9670 		  }
9671 		default:
9672 		  gcc_unreachable ();
9673 		}
9674 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
9675 	      /* DATA_REF is null if we've already built the statement.  */
9676 	      if (data_ref)
9677 		{
9678 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9679 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
9680 		}
9681 	      new_temp = make_ssa_name (vec_dest, new_stmt);
9682 	      gimple_set_lhs (new_stmt, new_temp);
9683 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9684 
9685 	      /* 3. Handle explicit realignment if necessary/supported.
9686 		 Create in loop:
9687 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
9688 	      if (alignment_support_scheme == dr_explicit_realign_optimized
9689 		  || alignment_support_scheme == dr_explicit_realign)
9690 		{
9691 		  lsq = gimple_assign_lhs (new_stmt);
9692 		  if (!realignment_token)
9693 		    realignment_token = dataref_ptr;
9694 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
9695 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9696 						  msq, lsq, realignment_token);
9697 		  new_temp = make_ssa_name (vec_dest, new_stmt);
9698 		  gimple_assign_set_lhs (new_stmt, new_temp);
9699 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9700 
9701 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
9702 		    {
9703 		      gcc_assert (phi);
9704 		      if (i == vec_num - 1 && j == ncopies - 1)
9705 			add_phi_arg (phi, lsq,
9706 				     loop_latch_edge (containing_loop),
9707 				     UNKNOWN_LOCATION);
9708 		      msq = lsq;
9709 		    }
9710 		}
9711 
9712 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9713 		{
9714 		  tree perm_mask = perm_mask_for_reverse (vectype);
9715 		  new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9716 						   perm_mask, stmt_info, gsi);
9717 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
9718 		}
9719 
9720 	      /* Collect vector loads and later create their permutation in
9721 		 vect_transform_grouped_load ().  */
9722 	      if (grouped_load || slp_perm)
9723 		dr_chain.quick_push (new_temp);
9724 
9725 	      /* Store vector loads in the corresponding SLP_NODE.  */
9726 	      if (slp && !slp_perm)
9727 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9728 
9729 	      /* With SLP permutation we load the gaps as well, without
9730 	         we need to skip the gaps after we manage to fully load
9731 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
9732 	      group_elt += nunits;
9733 	      if (maybe_ne (group_gap_adj, 0U)
9734 		  && !slp_perm
9735 		  && known_eq (group_elt, group_size - group_gap_adj))
9736 		{
9737 		  poly_wide_int bump_val
9738 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9739 		       * group_gap_adj);
9740 		  if (tree_int_cst_sgn
9741 			(vect_dr_behavior (vinfo, dr_info)->step) == -1)
9742 		    bump_val = -bump_val;
9743 		  tree bump = wide_int_to_tree (sizetype, bump_val);
9744 		  dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9745 						 gsi, stmt_info, bump);
9746 		  group_elt = 0;
9747 		}
9748 	    }
9749 	  /* Bump the vector pointer to account for a gap or for excess
9750 	     elements loaded for a permuted SLP load.  */
9751 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9752 	    {
9753 	      poly_wide_int bump_val
9754 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9755 		   * group_gap_adj);
9756 	      if (tree_int_cst_sgn
9757 		    (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9758 		bump_val = -bump_val;
9759 	      tree bump = wide_int_to_tree (sizetype, bump_val);
9760 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9761 					     stmt_info, bump);
9762 	    }
9763 	}
9764 
9765       if (slp && !slp_perm)
9766 	continue;
9767 
9768       if (slp_perm)
9769         {
9770 	  unsigned n_perms;
9771 	  bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9772 						  gsi, vf, false, &n_perms);
9773 	  gcc_assert (ok);
9774         }
9775       else
9776         {
9777           if (grouped_load)
9778   	    {
9779 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
9780 		vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9781 					     group_size, gsi);
9782 	      *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9783 	    }
9784           else
9785 	    {
9786 	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9787 	    }
9788         }
9789       dr_chain.release ();
9790     }
9791   if (!slp)
9792     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9793 
9794   return true;
9795 }
9796 
9797 /* Function vect_is_simple_cond.
9798 
9799    Input:
9800    LOOP - the loop that is being vectorized.
9801    COND - Condition that is checked for simple use.
9802 
9803    Output:
9804    *COMP_VECTYPE - the vector type for the comparison.
9805    *DTS - The def types for the arguments of the comparison
9806 
9807    Returns whether a COND can be vectorized.  Checks whether
9808    condition operands are supportable using vec_is_simple_use.  */
9809 
9810 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9811 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9812 		     slp_tree slp_node, tree *comp_vectype,
9813 		     enum vect_def_type *dts, tree vectype)
9814 {
9815   tree lhs, rhs;
9816   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9817   slp_tree slp_op;
9818 
9819   /* Mask case.  */
9820   if (TREE_CODE (cond) == SSA_NAME
9821       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9822     {
9823       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9824 			       &slp_op, &dts[0], comp_vectype)
9825 	  || !*comp_vectype
9826 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9827 	return false;
9828       return true;
9829     }
9830 
9831   if (!COMPARISON_CLASS_P (cond))
9832     return false;
9833 
9834   lhs = TREE_OPERAND (cond, 0);
9835   rhs = TREE_OPERAND (cond, 1);
9836 
9837   if (TREE_CODE (lhs) == SSA_NAME)
9838     {
9839       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9840 			       &lhs, &slp_op, &dts[0], &vectype1))
9841 	return false;
9842     }
9843   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9844 	   || TREE_CODE (lhs) == FIXED_CST)
9845     dts[0] = vect_constant_def;
9846   else
9847     return false;
9848 
9849   if (TREE_CODE (rhs) == SSA_NAME)
9850     {
9851       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9852 			       &rhs, &slp_op, &dts[1], &vectype2))
9853 	return false;
9854     }
9855   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9856 	   || TREE_CODE (rhs) == FIXED_CST)
9857     dts[1] = vect_constant_def;
9858   else
9859     return false;
9860 
9861   if (vectype1 && vectype2
9862       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9863 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9864     return false;
9865 
9866   *comp_vectype = vectype1 ? vectype1 : vectype2;
9867   /* Invariant comparison.  */
9868   if (! *comp_vectype)
9869     {
9870       tree scalar_type = TREE_TYPE (lhs);
9871       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9872 	*comp_vectype = truth_type_for (vectype);
9873       else
9874 	{
9875 	  /* If we can widen the comparison to match vectype do so.  */
9876 	  if (INTEGRAL_TYPE_P (scalar_type)
9877 	      && !slp_node
9878 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9879 				  TYPE_SIZE (TREE_TYPE (vectype))))
9880 	    scalar_type = build_nonstandard_integer_type
9881 	      (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9882 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9883 						       slp_node);
9884 	}
9885     }
9886 
9887   return true;
9888 }
9889 
9890 /* vectorizable_condition.
9891 
9892    Check if STMT_INFO is conditional modify expression that can be vectorized.
9893    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9894    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
9895    at GSI.
9896 
9897    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9898 
9899    Return true if STMT_INFO is vectorizable in this way.  */
9900 
9901 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9902 vectorizable_condition (vec_info *vinfo,
9903 			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9904 			gimple **vec_stmt,
9905 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9906 {
9907   tree scalar_dest = NULL_TREE;
9908   tree vec_dest = NULL_TREE;
9909   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9910   tree then_clause, else_clause;
9911   tree comp_vectype = NULL_TREE;
9912   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9913   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9914   tree vec_compare;
9915   tree new_temp;
9916   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9917   enum vect_def_type dts[4]
9918     = {vect_unknown_def_type, vect_unknown_def_type,
9919        vect_unknown_def_type, vect_unknown_def_type};
9920   int ndts = 4;
9921   int ncopies;
9922   int vec_num;
9923   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9924   int i;
9925   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9926   vec<tree> vec_oprnds0 = vNULL;
9927   vec<tree> vec_oprnds1 = vNULL;
9928   vec<tree> vec_oprnds2 = vNULL;
9929   vec<tree> vec_oprnds3 = vNULL;
9930   tree vec_cmp_type;
9931   bool masked = false;
9932 
9933   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9934     return false;
9935 
9936   /* Is vectorizable conditional operation?  */
9937   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9938   if (!stmt)
9939     return false;
9940 
9941   code = gimple_assign_rhs_code (stmt);
9942   if (code != COND_EXPR)
9943     return false;
9944 
9945   stmt_vec_info reduc_info = NULL;
9946   int reduc_index = -1;
9947   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9948   bool for_reduction
9949     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9950   if (for_reduction)
9951     {
9952       if (STMT_SLP_TYPE (stmt_info))
9953 	return false;
9954       reduc_info = info_for_reduction (vinfo, stmt_info);
9955       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9956       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9957       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9958 		  || reduc_index != -1);
9959     }
9960   else
9961     {
9962       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9963 	return false;
9964     }
9965 
9966   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9967   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9968 
9969   if (slp_node)
9970     {
9971       ncopies = 1;
9972       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9973     }
9974   else
9975     {
9976       ncopies = vect_get_num_copies (loop_vinfo, vectype);
9977       vec_num = 1;
9978     }
9979 
9980   gcc_assert (ncopies >= 1);
9981   if (for_reduction && ncopies > 1)
9982     return false; /* FORNOW */
9983 
9984   cond_expr = gimple_assign_rhs1 (stmt);
9985 
9986   if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9987 			    &comp_vectype, &dts[0], vectype)
9988       || !comp_vectype)
9989     return false;
9990 
9991   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9992   slp_tree then_slp_node, else_slp_node;
9993   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9994 			   &then_clause, &then_slp_node, &dts[2], &vectype1))
9995     return false;
9996   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9997 			   &else_clause, &else_slp_node, &dts[3], &vectype2))
9998     return false;
9999 
10000   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10001     return false;
10002 
10003   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10004     return false;
10005 
10006   masked = !COMPARISON_CLASS_P (cond_expr);
10007   vec_cmp_type = truth_type_for (comp_vectype);
10008 
10009   if (vec_cmp_type == NULL_TREE)
10010     return false;
10011 
10012   cond_code = TREE_CODE (cond_expr);
10013   if (!masked)
10014     {
10015       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10016       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10017     }
10018 
10019   /* For conditional reductions, the "then" value needs to be the candidate
10020      value calculated by this iteration while the "else" value needs to be
10021      the result carried over from previous iterations.  If the COND_EXPR
10022      is the other way around, we need to swap it.  */
10023   bool must_invert_cmp_result = false;
10024   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10025     {
10026       if (masked)
10027 	must_invert_cmp_result = true;
10028       else
10029 	{
10030 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10031 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10032 	  if (new_code == ERROR_MARK)
10033 	    must_invert_cmp_result = true;
10034 	  else
10035 	    {
10036 	      cond_code = new_code;
10037 	      /* Make sure we don't accidentally use the old condition.  */
10038 	      cond_expr = NULL_TREE;
10039 	    }
10040 	}
10041       std::swap (then_clause, else_clause);
10042     }
10043 
10044   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10045     {
10046       /* Boolean values may have another representation in vectors
10047 	 and therefore we prefer bit operations over comparison for
10048 	 them (which also works for scalar masks).  We store opcodes
10049 	 to use in bitop1 and bitop2.  Statement is vectorized as
10050 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10051 	 depending on bitop1 and bitop2 arity.  */
10052       switch (cond_code)
10053 	{
10054 	case GT_EXPR:
10055 	  bitop1 = BIT_NOT_EXPR;
10056 	  bitop2 = BIT_AND_EXPR;
10057 	  break;
10058 	case GE_EXPR:
10059 	  bitop1 = BIT_NOT_EXPR;
10060 	  bitop2 = BIT_IOR_EXPR;
10061 	  break;
10062 	case LT_EXPR:
10063 	  bitop1 = BIT_NOT_EXPR;
10064 	  bitop2 = BIT_AND_EXPR;
10065 	  std::swap (cond_expr0, cond_expr1);
10066 	  break;
10067 	case LE_EXPR:
10068 	  bitop1 = BIT_NOT_EXPR;
10069 	  bitop2 = BIT_IOR_EXPR;
10070 	  std::swap (cond_expr0, cond_expr1);
10071 	  break;
10072 	case NE_EXPR:
10073 	  bitop1 = BIT_XOR_EXPR;
10074 	  break;
10075 	case EQ_EXPR:
10076 	  bitop1 = BIT_XOR_EXPR;
10077 	  bitop2 = BIT_NOT_EXPR;
10078 	  break;
10079 	default:
10080 	  return false;
10081 	}
10082       cond_code = SSA_NAME;
10083     }
10084 
10085   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10086       && reduction_type == EXTRACT_LAST_REDUCTION
10087       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10088     {
10089       if (dump_enabled_p ())
10090 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10091 			 "reduction comparison operation not supported.\n");
10092       return false;
10093     }
10094 
10095   if (!vec_stmt)
10096     {
10097       if (bitop1 != NOP_EXPR)
10098 	{
10099 	  machine_mode mode = TYPE_MODE (comp_vectype);
10100 	  optab optab;
10101 
10102 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10103 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10104 	    return false;
10105 
10106 	  if (bitop2 != NOP_EXPR)
10107 	    {
10108 	      optab = optab_for_tree_code (bitop2, comp_vectype,
10109 					   optab_default);
10110 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10111 		return false;
10112 	    }
10113 	}
10114 
10115       vect_cost_for_stmt kind = vector_stmt;
10116       if (reduction_type == EXTRACT_LAST_REDUCTION)
10117 	/* Count one reduction-like operation per vector.  */
10118 	kind = vec_to_scalar;
10119       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10120 	return false;
10121 
10122       if (slp_node
10123 	  && (!vect_maybe_update_slp_op_vectype
10124 		 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10125 	      || (op_adjust == 1
10126 		  && !vect_maybe_update_slp_op_vectype
10127 			(SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10128 	      || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10129 	      || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10130 	{
10131 	  if (dump_enabled_p ())
10132 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10133 			     "incompatible vector types for invariants\n");
10134 	  return false;
10135 	}
10136 
10137       if (loop_vinfo && for_reduction
10138 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10139 	{
10140 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10141 	    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10142 				   ncopies * vec_num, vectype, NULL);
10143 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
10144 	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10145 	    {
10146 	      if (dump_enabled_p ())
10147 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10148 				 "conditional reduction prevents the use"
10149 				 " of partial vectors.\n");
10150 	      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10151 	    }
10152 	}
10153 
10154       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10155       vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10156 			      cost_vec, kind);
10157       return true;
10158     }
10159 
10160   /* Transform.  */
10161 
10162   /* Handle def.  */
10163   scalar_dest = gimple_assign_lhs (stmt);
10164   if (reduction_type != EXTRACT_LAST_REDUCTION)
10165     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10166 
10167   bool swap_cond_operands = false;
10168 
10169   /* See whether another part of the vectorized code applies a loop
10170      mask to the condition, or to its inverse.  */
10171 
10172   vec_loop_masks *masks = NULL;
10173   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10174     {
10175       if (reduction_type == EXTRACT_LAST_REDUCTION)
10176 	masks = &LOOP_VINFO_MASKS (loop_vinfo);
10177       else
10178 	{
10179 	  scalar_cond_masked_key cond (cond_expr, ncopies);
10180 	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10181 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
10182 	  else
10183 	    {
10184 	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10185 	      cond.code = invert_tree_comparison (cond.code, honor_nans);
10186 	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10187 		{
10188 		  masks = &LOOP_VINFO_MASKS (loop_vinfo);
10189 		  cond_code = cond.code;
10190 		  swap_cond_operands = true;
10191 		}
10192 	    }
10193 	}
10194     }
10195 
10196   /* Handle cond expr.  */
10197   if (masked)
10198     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10199 		       cond_expr, &vec_oprnds0, comp_vectype,
10200 		       then_clause, &vec_oprnds2, vectype,
10201 		       reduction_type != EXTRACT_LAST_REDUCTION
10202 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10203   else
10204     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10205 		       cond_expr0, &vec_oprnds0, comp_vectype,
10206 		       cond_expr1, &vec_oprnds1, comp_vectype,
10207 		       then_clause, &vec_oprnds2, vectype,
10208 		       reduction_type != EXTRACT_LAST_REDUCTION
10209 		       ? else_clause : NULL, &vec_oprnds3, vectype);
10210 
10211   /* Arguments are ready.  Create the new vector stmt.  */
10212   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10213     {
10214       vec_then_clause = vec_oprnds2[i];
10215       if (reduction_type != EXTRACT_LAST_REDUCTION)
10216 	vec_else_clause = vec_oprnds3[i];
10217 
10218       if (swap_cond_operands)
10219 	std::swap (vec_then_clause, vec_else_clause);
10220 
10221       if (masked)
10222 	vec_compare = vec_cond_lhs;
10223       else
10224 	{
10225 	  vec_cond_rhs = vec_oprnds1[i];
10226 	  if (bitop1 == NOP_EXPR)
10227 	    {
10228 	      gimple_seq stmts = NULL;
10229 	      vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10230 					   vec_cond_lhs, vec_cond_rhs);
10231 	      gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10232 	    }
10233 	  else
10234 	    {
10235 	      new_temp = make_ssa_name (vec_cmp_type);
10236 	      gassign *new_stmt;
10237 	      if (bitop1 == BIT_NOT_EXPR)
10238 		new_stmt = gimple_build_assign (new_temp, bitop1,
10239 						vec_cond_rhs);
10240 	      else
10241 		new_stmt
10242 		  = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10243 					 vec_cond_rhs);
10244 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10245 	      if (bitop2 == NOP_EXPR)
10246 		vec_compare = new_temp;
10247 	      else if (bitop2 == BIT_NOT_EXPR)
10248 		{
10249 		  /* Instead of doing ~x ? y : z do x ? z : y.  */
10250 		  vec_compare = new_temp;
10251 		  std::swap (vec_then_clause, vec_else_clause);
10252 		}
10253 	      else
10254 		{
10255 		  vec_compare = make_ssa_name (vec_cmp_type);
10256 		  new_stmt
10257 		    = gimple_build_assign (vec_compare, bitop2,
10258 					   vec_cond_lhs, new_temp);
10259 		  vect_finish_stmt_generation (vinfo, stmt_info,
10260 					       new_stmt, gsi);
10261 		}
10262 	    }
10263 	}
10264 
10265       /* If we decided to apply a loop mask to the result of the vector
10266 	 comparison, AND the comparison with the mask now.  Later passes
10267 	 should then be able to reuse the AND results between mulitple
10268 	 vector statements.
10269 
10270 	 For example:
10271 	 for (int i = 0; i < 100; ++i)
10272 	 x[i] = y[i] ? z[i] : 10;
10273 
10274 	 results in following optimized GIMPLE:
10275 
10276 	 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10277 	 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10278 	 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10279 	 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10280 	 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10281 	 vect_iftmp.11_47, { 10, ... }>;
10282 
10283 	 instead of using a masked and unmasked forms of
10284 	 vec != { 0, ... } (masked in the MASK_LOAD,
10285 	 unmasked in the VEC_COND_EXPR).  */
10286 
10287       /* Force vec_compare to be an SSA_NAME rather than a comparison,
10288 	 in cases where that's necessary.  */
10289 
10290       if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10291 	{
10292 	  if (!is_gimple_val (vec_compare))
10293 	    {
10294 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10295 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10296 						       vec_compare);
10297 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10298 	      vec_compare = vec_compare_name;
10299 	    }
10300 
10301 	  if (must_invert_cmp_result)
10302 	    {
10303 	      tree vec_compare_name = make_ssa_name (vec_cmp_type);
10304 	      gassign *new_stmt = gimple_build_assign (vec_compare_name,
10305 						       BIT_NOT_EXPR,
10306 						       vec_compare);
10307 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10308 	      vec_compare = vec_compare_name;
10309 	    }
10310 
10311 	  if (masks)
10312 	    {
10313 	      tree loop_mask
10314 		= vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10315 				      vectype, i);
10316 	      tree tmp2 = make_ssa_name (vec_cmp_type);
10317 	      gassign *g
10318 		= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10319 				       loop_mask);
10320 	      vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10321 	      vec_compare = tmp2;
10322 	    }
10323 	}
10324 
10325       gimple *new_stmt;
10326       if (reduction_type == EXTRACT_LAST_REDUCTION)
10327 	{
10328 	  gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10329 	  tree lhs = gimple_get_lhs (old_stmt);
10330 	  new_stmt = gimple_build_call_internal
10331 	      (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10332 	       vec_then_clause);
10333 	  gimple_call_set_lhs (new_stmt, lhs);
10334 	  SSA_NAME_DEF_STMT (lhs) = new_stmt;
10335 	  if (old_stmt == gsi_stmt (*gsi))
10336 	    vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10337 	  else
10338 	    {
10339 	      /* In this case we're moving the definition to later in the
10340 		 block.  That doesn't matter because the only uses of the
10341 		 lhs are in phi statements.  */
10342 	      gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10343 	      gsi_remove (&old_gsi, true);
10344 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10345 	    }
10346 	}
10347       else
10348 	{
10349 	  new_temp = make_ssa_name (vec_dest);
10350 	  new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10351 					  vec_then_clause, vec_else_clause);
10352 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10353 	}
10354       if (slp_node)
10355 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10356       else
10357 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10358     }
10359 
10360   if (!slp_node)
10361     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10362 
10363   vec_oprnds0.release ();
10364   vec_oprnds1.release ();
10365   vec_oprnds2.release ();
10366   vec_oprnds3.release ();
10367 
10368   return true;
10369 }
10370 
10371 /* vectorizable_comparison.
10372 
10373    Check if STMT_INFO is comparison expression that can be vectorized.
10374    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10375    comparison, put it in VEC_STMT, and insert it at GSI.
10376 
10377    Return true if STMT_INFO is vectorizable in this way.  */
10378 
10379 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10380 vectorizable_comparison (vec_info *vinfo,
10381 			 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10382 			 gimple **vec_stmt,
10383 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10384 {
10385   tree lhs, rhs1, rhs2;
10386   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10387   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10388   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10389   tree new_temp;
10390   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10391   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10392   int ndts = 2;
10393   poly_uint64 nunits;
10394   int ncopies;
10395   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10396   int i;
10397   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10398   vec<tree> vec_oprnds0 = vNULL;
10399   vec<tree> vec_oprnds1 = vNULL;
10400   tree mask_type;
10401   tree mask;
10402 
10403   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10404     return false;
10405 
10406   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10407     return false;
10408 
10409   mask_type = vectype;
10410   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10411 
10412   if (slp_node)
10413     ncopies = 1;
10414   else
10415     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10416 
10417   gcc_assert (ncopies >= 1);
10418   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10419     return false;
10420 
10421   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10422   if (!stmt)
10423     return false;
10424 
10425   code = gimple_assign_rhs_code (stmt);
10426 
10427   if (TREE_CODE_CLASS (code) != tcc_comparison)
10428     return false;
10429 
10430   slp_tree slp_rhs1, slp_rhs2;
10431   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10432 			   0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10433     return false;
10434 
10435   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10436 			   1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10437     return false;
10438 
10439   if (vectype1 && vectype2
10440       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10441 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10442     return false;
10443 
10444   vectype = vectype1 ? vectype1 : vectype2;
10445 
10446   /* Invariant comparison.  */
10447   if (!vectype)
10448     {
10449       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10450 	vectype = mask_type;
10451       else
10452 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10453 					       slp_node);
10454       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10455 	return false;
10456     }
10457   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10458     return false;
10459 
10460   /* Can't compare mask and non-mask types.  */
10461   if (vectype1 && vectype2
10462       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10463     return false;
10464 
10465   /* Boolean values may have another representation in vectors
10466      and therefore we prefer bit operations over comparison for
10467      them (which also works for scalar masks).  We store opcodes
10468      to use in bitop1 and bitop2.  Statement is vectorized as
10469        BITOP2 (rhs1 BITOP1 rhs2) or
10470        rhs1 BITOP2 (BITOP1 rhs2)
10471      depending on bitop1 and bitop2 arity.  */
10472   bool swap_p = false;
10473   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10474     {
10475       if (code == GT_EXPR)
10476 	{
10477 	  bitop1 = BIT_NOT_EXPR;
10478 	  bitop2 = BIT_AND_EXPR;
10479 	}
10480       else if (code == GE_EXPR)
10481 	{
10482 	  bitop1 = BIT_NOT_EXPR;
10483 	  bitop2 = BIT_IOR_EXPR;
10484 	}
10485       else if (code == LT_EXPR)
10486 	{
10487 	  bitop1 = BIT_NOT_EXPR;
10488 	  bitop2 = BIT_AND_EXPR;
10489 	  swap_p = true;
10490 	}
10491       else if (code == LE_EXPR)
10492 	{
10493 	  bitop1 = BIT_NOT_EXPR;
10494 	  bitop2 = BIT_IOR_EXPR;
10495 	  swap_p = true;
10496 	}
10497       else
10498 	{
10499 	  bitop1 = BIT_XOR_EXPR;
10500 	  if (code == EQ_EXPR)
10501 	    bitop2 = BIT_NOT_EXPR;
10502 	}
10503     }
10504 
10505   if (!vec_stmt)
10506     {
10507       if (bitop1 == NOP_EXPR)
10508 	{
10509 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10510 	    return false;
10511 	}
10512       else
10513 	{
10514 	  machine_mode mode = TYPE_MODE (vectype);
10515 	  optab optab;
10516 
10517 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
10518 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10519 	    return false;
10520 
10521 	  if (bitop2 != NOP_EXPR)
10522 	    {
10523 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
10524 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10525 		return false;
10526 	    }
10527 	}
10528 
10529       /* Put types on constant and invariant SLP children.  */
10530       if (slp_node
10531 	  && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10532 	      || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10533 	{
10534 	  if (dump_enabled_p ())
10535 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10536 			     "incompatible vector types for invariants\n");
10537 	  return false;
10538 	}
10539 
10540       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10541       vect_model_simple_cost (vinfo, stmt_info,
10542 			      ncopies * (1 + (bitop2 != NOP_EXPR)),
10543 			      dts, ndts, slp_node, cost_vec);
10544       return true;
10545     }
10546 
10547   /* Transform.  */
10548 
10549   /* Handle def.  */
10550   lhs = gimple_assign_lhs (stmt);
10551   mask = vect_create_destination_var (lhs, mask_type);
10552 
10553   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10554 		     rhs1, &vec_oprnds0, vectype,
10555 		     rhs2, &vec_oprnds1, vectype);
10556   if (swap_p)
10557     std::swap (vec_oprnds0, vec_oprnds1);
10558 
10559   /* Arguments are ready.  Create the new vector stmt.  */
10560   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10561     {
10562       gimple *new_stmt;
10563       vec_rhs2 = vec_oprnds1[i];
10564 
10565       new_temp = make_ssa_name (mask);
10566       if (bitop1 == NOP_EXPR)
10567 	{
10568 	  new_stmt = gimple_build_assign (new_temp, code,
10569 					  vec_rhs1, vec_rhs2);
10570 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10571 	}
10572       else
10573 	{
10574 	  if (bitop1 == BIT_NOT_EXPR)
10575 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10576 	  else
10577 	    new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10578 					    vec_rhs2);
10579 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10580 	  if (bitop2 != NOP_EXPR)
10581 	    {
10582 	      tree res = make_ssa_name (mask);
10583 	      if (bitop2 == BIT_NOT_EXPR)
10584 		new_stmt = gimple_build_assign (res, bitop2, new_temp);
10585 	      else
10586 		new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10587 						new_temp);
10588 	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10589 	    }
10590 	}
10591       if (slp_node)
10592 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10593       else
10594 	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10595     }
10596 
10597   if (!slp_node)
10598     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10599 
10600   vec_oprnds0.release ();
10601   vec_oprnds1.release ();
10602 
10603   return true;
10604 }
10605 
10606 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10607    can handle all live statements in the node.  Otherwise return true
10608    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10609    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
10610 
10611 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10612 can_vectorize_live_stmts (vec_info *vinfo,
10613 			  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10614 			  slp_tree slp_node, slp_instance slp_node_instance,
10615 			  bool vec_stmt_p,
10616 			  stmt_vector_for_cost *cost_vec)
10617 {
10618   if (slp_node)
10619     {
10620       stmt_vec_info slp_stmt_info;
10621       unsigned int i;
10622       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10623 	{
10624 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
10625 	      && !vectorizable_live_operation (vinfo,
10626 					       slp_stmt_info, gsi, slp_node,
10627 					       slp_node_instance, i,
10628 					       vec_stmt_p, cost_vec))
10629 	    return false;
10630 	}
10631     }
10632   else if (STMT_VINFO_LIVE_P (stmt_info)
10633 	   && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10634 					    slp_node, slp_node_instance, -1,
10635 					    vec_stmt_p, cost_vec))
10636     return false;
10637 
10638   return true;
10639 }
10640 
10641 /* Make sure the statement is vectorizable.  */
10642 
10643 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10644 vect_analyze_stmt (vec_info *vinfo,
10645 		   stmt_vec_info stmt_info, bool *need_to_vectorize,
10646 		   slp_tree node, slp_instance node_instance,
10647 		   stmt_vector_for_cost *cost_vec)
10648 {
10649   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10650   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10651   bool ok;
10652   gimple_seq pattern_def_seq;
10653 
10654   if (dump_enabled_p ())
10655     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10656 		     stmt_info->stmt);
10657 
10658   if (gimple_has_volatile_ops (stmt_info->stmt))
10659     return opt_result::failure_at (stmt_info->stmt,
10660 				   "not vectorized:"
10661 				   " stmt has volatile operands: %G\n",
10662 				   stmt_info->stmt);
10663 
10664   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10665       && node == NULL
10666       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10667     {
10668       gimple_stmt_iterator si;
10669 
10670       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10671 	{
10672 	  stmt_vec_info pattern_def_stmt_info
10673 	    = vinfo->lookup_stmt (gsi_stmt (si));
10674 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10675 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10676 	    {
10677 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
10678 	      if (dump_enabled_p ())
10679 		dump_printf_loc (MSG_NOTE, vect_location,
10680 				 "==> examining pattern def statement: %G",
10681 				 pattern_def_stmt_info->stmt);
10682 
10683 	      opt_result res
10684 		= vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10685 				     need_to_vectorize, node, node_instance,
10686 				     cost_vec);
10687 	      if (!res)
10688 		return res;
10689 	    }
10690 	}
10691     }
10692 
10693   /* Skip stmts that do not need to be vectorized. In loops this is expected
10694      to include:
10695      - the COND_EXPR which is the loop exit condition
10696      - any LABEL_EXPRs in the loop
10697      - computations that are used only for array indexing or loop control.
10698      In basic blocks we only analyze statements that are a part of some SLP
10699      instance, therefore, all the statements are relevant.
10700 
10701      Pattern statement needs to be analyzed instead of the original statement
10702      if the original statement is not relevant.  Otherwise, we analyze both
10703      statements.  In basic blocks we are called from some SLP instance
10704      traversal, don't analyze pattern stmts instead, the pattern stmts
10705      already will be part of SLP instance.  */
10706 
10707   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10708   if (!STMT_VINFO_RELEVANT_P (stmt_info)
10709       && !STMT_VINFO_LIVE_P (stmt_info))
10710     {
10711       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10712 	  && pattern_stmt_info
10713 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10714 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10715         {
10716           /* Analyze PATTERN_STMT instead of the original stmt.  */
10717 	  stmt_info = pattern_stmt_info;
10718           if (dump_enabled_p ())
10719 	    dump_printf_loc (MSG_NOTE, vect_location,
10720 			     "==> examining pattern statement: %G",
10721 			     stmt_info->stmt);
10722         }
10723       else
10724         {
10725           if (dump_enabled_p ())
10726             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10727 
10728           return opt_result::success ();
10729         }
10730     }
10731   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10732 	   && node == NULL
10733 	   && pattern_stmt_info
10734 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10735 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10736     {
10737       /* Analyze PATTERN_STMT too.  */
10738       if (dump_enabled_p ())
10739 	dump_printf_loc (MSG_NOTE, vect_location,
10740 			 "==> examining pattern statement: %G",
10741 			 pattern_stmt_info->stmt);
10742 
10743       opt_result res
10744 	= vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10745 			     node_instance, cost_vec);
10746       if (!res)
10747 	return res;
10748    }
10749 
10750   switch (STMT_VINFO_DEF_TYPE (stmt_info))
10751     {
10752       case vect_internal_def:
10753         break;
10754 
10755       case vect_reduction_def:
10756       case vect_nested_cycle:
10757          gcc_assert (!bb_vinfo
10758 		     && (relevance == vect_used_in_outer
10759 			 || relevance == vect_used_in_outer_by_reduction
10760 			 || relevance == vect_used_by_reduction
10761 			 || relevance == vect_unused_in_scope
10762 			 || relevance == vect_used_only_live));
10763          break;
10764 
10765       case vect_induction_def:
10766 	gcc_assert (!bb_vinfo);
10767 	break;
10768 
10769       case vect_constant_def:
10770       case vect_external_def:
10771       case vect_unknown_def_type:
10772       default:
10773         gcc_unreachable ();
10774     }
10775 
10776   if (STMT_VINFO_RELEVANT_P (stmt_info))
10777     {
10778       tree type = gimple_expr_type (stmt_info->stmt);
10779       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10780       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10781       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10782 		  || (call && gimple_call_lhs (call) == NULL_TREE));
10783       *need_to_vectorize = true;
10784     }
10785 
10786   if (PURE_SLP_STMT (stmt_info) && !node)
10787     {
10788       if (dump_enabled_p ())
10789 	dump_printf_loc (MSG_NOTE, vect_location,
10790 			 "handled only by SLP analysis\n");
10791       return opt_result::success ();
10792     }
10793 
10794   ok = true;
10795   if (!bb_vinfo
10796       && (STMT_VINFO_RELEVANT_P (stmt_info)
10797 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10798     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10799        -mveclibabi= takes preference over library functions with
10800        the simd attribute.  */
10801     ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10802 	  || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10803 					   cost_vec)
10804 	  || vectorizable_conversion (vinfo, stmt_info,
10805 				      NULL, NULL, node, cost_vec)
10806 	  || vectorizable_operation (vinfo, stmt_info,
10807 				     NULL, NULL, node, cost_vec)
10808 	  || vectorizable_assignment (vinfo, stmt_info,
10809 				      NULL, NULL, node, cost_vec)
10810 	  || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10811 	  || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10812 	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10813 				     node, node_instance, cost_vec)
10814 	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10815 				     NULL, node, cost_vec)
10816 	  || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10817 	  || vectorizable_condition (vinfo, stmt_info,
10818 				     NULL, NULL, node, cost_vec)
10819 	  || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10820 				      cost_vec)
10821 	  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10822 				  stmt_info, NULL, node));
10823   else
10824     {
10825       if (bb_vinfo)
10826 	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10827 	      || vectorizable_simd_clone_call (vinfo, stmt_info,
10828 					       NULL, NULL, node, cost_vec)
10829 	      || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10830 					  cost_vec)
10831 	      || vectorizable_shift (vinfo, stmt_info,
10832 				     NULL, NULL, node, cost_vec)
10833 	      || vectorizable_operation (vinfo, stmt_info,
10834 					 NULL, NULL, node, cost_vec)
10835 	      || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10836 					  cost_vec)
10837 	      || vectorizable_load (vinfo, stmt_info,
10838 				    NULL, NULL, node, cost_vec)
10839 	      || vectorizable_store (vinfo, stmt_info,
10840 				     NULL, NULL, node, cost_vec)
10841 	      || vectorizable_condition (vinfo, stmt_info,
10842 					 NULL, NULL, node, cost_vec)
10843 	      || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10844 					  cost_vec)
10845 	      || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10846     }
10847 
10848   if (!ok)
10849     return opt_result::failure_at (stmt_info->stmt,
10850 				   "not vectorized:"
10851 				   " relevant stmt not supported: %G",
10852 				   stmt_info->stmt);
10853 
10854   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10855       need extra handling, except for vectorizable reductions.  */
10856   if (!bb_vinfo
10857       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10858       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10859       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10860 				    stmt_info, NULL, node, node_instance,
10861 				    false, cost_vec))
10862     return opt_result::failure_at (stmt_info->stmt,
10863 				   "not vectorized:"
10864 				   " live stmt not supported: %G",
10865 				   stmt_info->stmt);
10866 
10867   return opt_result::success ();
10868 }
10869 
10870 
10871 /* Function vect_transform_stmt.
10872 
10873    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
10874 
10875 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)10876 vect_transform_stmt (vec_info *vinfo,
10877 		     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10878 		     slp_tree slp_node, slp_instance slp_node_instance)
10879 {
10880   bool is_store = false;
10881   gimple *vec_stmt = NULL;
10882   bool done;
10883 
10884   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10885 
10886   switch (STMT_VINFO_TYPE (stmt_info))
10887     {
10888     case type_demotion_vec_info_type:
10889     case type_promotion_vec_info_type:
10890     case type_conversion_vec_info_type:
10891       done = vectorizable_conversion (vinfo, stmt_info,
10892 				      gsi, &vec_stmt, slp_node, NULL);
10893       gcc_assert (done);
10894       break;
10895 
10896     case induc_vec_info_type:
10897       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10898 				     stmt_info, &vec_stmt, slp_node,
10899 				     NULL);
10900       gcc_assert (done);
10901       break;
10902 
10903     case shift_vec_info_type:
10904       done = vectorizable_shift (vinfo, stmt_info,
10905 				 gsi, &vec_stmt, slp_node, NULL);
10906       gcc_assert (done);
10907       break;
10908 
10909     case op_vec_info_type:
10910       done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10911 				     NULL);
10912       gcc_assert (done);
10913       break;
10914 
10915     case assignment_vec_info_type:
10916       done = vectorizable_assignment (vinfo, stmt_info,
10917 				      gsi, &vec_stmt, slp_node, NULL);
10918       gcc_assert (done);
10919       break;
10920 
10921     case load_vec_info_type:
10922       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10923 				NULL);
10924       gcc_assert (done);
10925       break;
10926 
10927     case store_vec_info_type:
10928       done = vectorizable_store (vinfo, stmt_info,
10929 				 gsi, &vec_stmt, slp_node, NULL);
10930       gcc_assert (done);
10931       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10932 	{
10933 	  /* In case of interleaving, the whole chain is vectorized when the
10934 	     last store in the chain is reached.  Store stmts before the last
10935 	     one are skipped, and there vec_stmt_info shouldn't be freed
10936 	     meanwhile.  */
10937 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10938 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10939 	    is_store = true;
10940 	}
10941       else
10942 	is_store = true;
10943       break;
10944 
10945     case condition_vec_info_type:
10946       done = vectorizable_condition (vinfo, stmt_info,
10947 				     gsi, &vec_stmt, slp_node, NULL);
10948       gcc_assert (done);
10949       break;
10950 
10951     case comparison_vec_info_type:
10952       done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10953 				      slp_node, NULL);
10954       gcc_assert (done);
10955       break;
10956 
10957     case call_vec_info_type:
10958       done = vectorizable_call (vinfo, stmt_info,
10959 				gsi, &vec_stmt, slp_node, NULL);
10960       break;
10961 
10962     case call_simd_clone_vec_info_type:
10963       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10964 					   slp_node, NULL);
10965       break;
10966 
10967     case reduc_vec_info_type:
10968       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10969 				       gsi, &vec_stmt, slp_node);
10970       gcc_assert (done);
10971       break;
10972 
10973     case cycle_phi_info_type:
10974       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10975 				       &vec_stmt, slp_node, slp_node_instance);
10976       gcc_assert (done);
10977       break;
10978 
10979     case lc_phi_info_type:
10980       done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10981 				  stmt_info, &vec_stmt, slp_node);
10982       gcc_assert (done);
10983       break;
10984 
10985     case phi_info_type:
10986       done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10987       gcc_assert (done);
10988       break;
10989 
10990     default:
10991       if (!STMT_VINFO_LIVE_P (stmt_info))
10992 	{
10993 	  if (dump_enabled_p ())
10994 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10995                              "stmt not supported.\n");
10996 	  gcc_unreachable ();
10997 	}
10998       done = true;
10999     }
11000 
11001   if (!slp_node && vec_stmt)
11002     gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11003 
11004   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11005     return is_store;
11006 
11007   /* Handle stmts whose DEF is used outside the loop-nest that is
11008      being vectorized.  */
11009   done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11010 				   slp_node_instance, true, NULL);
11011   gcc_assert (done);
11012 
11013   return false;
11014 }
11015 
11016 
11017 /* Remove a group of stores (for SLP or interleaving), free their
11018    stmt_vec_info.  */
11019 
11020 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11021 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11022 {
11023   stmt_vec_info next_stmt_info = first_stmt_info;
11024 
11025   while (next_stmt_info)
11026     {
11027       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11028       next_stmt_info = vect_orig_stmt (next_stmt_info);
11029       /* Free the attached stmt_vec_info and remove the stmt.  */
11030       vinfo->remove_stmt (next_stmt_info);
11031       next_stmt_info = tmp;
11032     }
11033 }
11034 
11035 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11036    elements of type SCALAR_TYPE, or null if the target doesn't support
11037    such a type.
11038 
11039    If NUNITS is zero, return a vector type that contains elements of
11040    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11041 
11042    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11043    for this vectorization region and want to "autodetect" the best choice.
11044    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11045    and we want the new type to be interoperable with it.   PREVAILING_MODE
11046    in this case can be a scalar integer mode or a vector mode; when it
11047    is a vector mode, the function acts like a tree-level version of
11048    related_vector_mode.  */
11049 
11050 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11051 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11052 				     tree scalar_type, poly_uint64 nunits)
11053 {
11054   tree orig_scalar_type = scalar_type;
11055   scalar_mode inner_mode;
11056   machine_mode simd_mode;
11057   tree vectype;
11058 
11059   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11060       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11061     return NULL_TREE;
11062 
11063   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11064 
11065   /* For vector types of elements whose mode precision doesn't
11066      match their types precision we use a element type of mode
11067      precision.  The vectorization routines will have to make sure
11068      they support the proper result truncation/extension.
11069      We also make sure to build vector types with INTEGER_TYPE
11070      component type only.  */
11071   if (INTEGRAL_TYPE_P (scalar_type)
11072       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11073 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
11074     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11075 						  TYPE_UNSIGNED (scalar_type));
11076 
11077   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11078      When the component mode passes the above test simply use a type
11079      corresponding to that mode.  The theory is that any use that
11080      would cause problems with this will disable vectorization anyway.  */
11081   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11082 	   && !INTEGRAL_TYPE_P (scalar_type))
11083     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11084 
11085   /* We can't build a vector type of elements with alignment bigger than
11086      their size.  */
11087   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11088     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11089 						  TYPE_UNSIGNED (scalar_type));
11090 
11091   /* If we felt back to using the mode fail if there was
11092      no scalar type for it.  */
11093   if (scalar_type == NULL_TREE)
11094     return NULL_TREE;
11095 
11096   /* If no prevailing mode was supplied, use the mode the target prefers.
11097      Otherwise lookup a vector mode based on the prevailing mode.  */
11098   if (prevailing_mode == VOIDmode)
11099     {
11100       gcc_assert (known_eq (nunits, 0U));
11101       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11102       if (SCALAR_INT_MODE_P (simd_mode))
11103 	{
11104 	  /* Traditional behavior is not to take the integer mode
11105 	     literally, but simply to use it as a way of determining
11106 	     the vector size.  It is up to mode_for_vector to decide
11107 	     what the TYPE_MODE should be.
11108 
11109 	     Note that nunits == 1 is allowed in order to support single
11110 	     element vector types.  */
11111 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11112 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11113 	    return NULL_TREE;
11114 	}
11115     }
11116   else if (SCALAR_INT_MODE_P (prevailing_mode)
11117 	   || !related_vector_mode (prevailing_mode,
11118 				    inner_mode, nunits).exists (&simd_mode))
11119     {
11120       /* Fall back to using mode_for_vector, mostly in the hope of being
11121 	 able to use an integer mode.  */
11122       if (known_eq (nunits, 0U)
11123 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11124 	return NULL_TREE;
11125 
11126       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11127 	return NULL_TREE;
11128     }
11129 
11130   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11131 
11132   /* In cases where the mode was chosen by mode_for_vector, check that
11133      the target actually supports the chosen mode, or that it at least
11134      allows the vector mode to be replaced by a like-sized integer.  */
11135   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11136       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11137     return NULL_TREE;
11138 
11139   /* Re-attach the address-space qualifier if we canonicalized the scalar
11140      type.  */
11141   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11142     return build_qualified_type
11143 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11144 
11145   return vectype;
11146 }
11147 
11148 /* Function get_vectype_for_scalar_type.
11149 
11150    Returns the vector type corresponding to SCALAR_TYPE as supported
11151    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11152    vectorization, make sure that the number of elements in the vector
11153    is no bigger than GROUP_SIZE.  */
11154 
11155 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11156 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11157 			     unsigned int group_size)
11158 {
11159   /* For BB vectorization, we should always have a group size once we've
11160      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11161      are tentative requests during things like early data reference
11162      analysis and pattern recognition.  */
11163   if (is_a <bb_vec_info> (vinfo))
11164     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11165   else
11166     group_size = 0;
11167 
11168   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11169 						      scalar_type);
11170   if (vectype && vinfo->vector_mode == VOIDmode)
11171     vinfo->vector_mode = TYPE_MODE (vectype);
11172 
11173   /* Register the natural choice of vector type, before the group size
11174      has been applied.  */
11175   if (vectype)
11176     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11177 
11178   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11179      try again with an explicit number of elements.  */
11180   if (vectype
11181       && group_size
11182       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11183     {
11184       /* Start with the biggest number of units that fits within
11185 	 GROUP_SIZE and halve it until we find a valid vector type.
11186 	 Usually either the first attempt will succeed or all will
11187 	 fail (in the latter case because GROUP_SIZE is too small
11188 	 for the target), but it's possible that a target could have
11189 	 a hole between supported vector types.
11190 
11191 	 If GROUP_SIZE is not a power of 2, this has the effect of
11192 	 trying the largest power of 2 that fits within the group,
11193 	 even though the group is not a multiple of that vector size.
11194 	 The BB vectorizer will then try to carve up the group into
11195 	 smaller pieces.  */
11196       unsigned int nunits = 1 << floor_log2 (group_size);
11197       do
11198 	{
11199 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11200 							 scalar_type, nunits);
11201 	  nunits /= 2;
11202 	}
11203       while (nunits > 1 && !vectype);
11204     }
11205 
11206   return vectype;
11207 }
11208 
11209 /* Return the vector type corresponding to SCALAR_TYPE as supported
11210    by the target.  NODE, if nonnull, is the SLP tree node that will
11211    use the returned vector type.  */
11212 
11213 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11214 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11215 {
11216   unsigned int group_size = 0;
11217   if (node)
11218     group_size = SLP_TREE_LANES (node);
11219   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11220 }
11221 
11222 /* Function get_mask_type_for_scalar_type.
11223 
11224    Returns the mask type corresponding to a result of comparison
11225    of vectors of specified SCALAR_TYPE as supported by target.
11226    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11227    make sure that the number of elements in the vector is no bigger
11228    than GROUP_SIZE.  */
11229 
11230 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11231 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11232 			       unsigned int group_size)
11233 {
11234   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11235 
11236   if (!vectype)
11237     return NULL;
11238 
11239   return truth_type_for (vectype);
11240 }
11241 
11242 /* Function get_same_sized_vectype
11243 
11244    Returns a vector type corresponding to SCALAR_TYPE of size
11245    VECTOR_TYPE if supported by the target.  */
11246 
11247 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11248 get_same_sized_vectype (tree scalar_type, tree vector_type)
11249 {
11250   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11251     return truth_type_for (vector_type);
11252 
11253   poly_uint64 nunits;
11254   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11255 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11256     return NULL_TREE;
11257 
11258   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11259 					      scalar_type, nunits);
11260 }
11261 
11262 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11263    would not change the chosen vector modes.  */
11264 
11265 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11266 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11267 {
11268   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11269        i != vinfo->used_vector_modes.end (); ++i)
11270     if (!VECTOR_MODE_P (*i)
11271 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11272       return false;
11273   return true;
11274 }
11275 
11276 /* Function vect_is_simple_use.
11277 
11278    Input:
11279    VINFO - the vect info of the loop or basic block that is being vectorized.
11280    OPERAND - operand in the loop or bb.
11281    Output:
11282    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11283      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11284    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11285      the definition could be anywhere in the function
11286    DT - the type of definition
11287 
11288    Returns whether a stmt with OPERAND can be vectorized.
11289    For loops, supportable operands are constants, loop invariants, and operands
11290    that are defined by the current iteration of the loop.  Unsupportable
11291    operands are those that are defined by a previous iteration of the loop (as
11292    is the case in reduction/induction computations).
11293    For basic blocks, supportable operands are constants and bb invariants.
11294    For now, operands defined outside the basic block are not supported.  */
11295 
11296 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11297 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11298 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11299 {
11300   if (def_stmt_info_out)
11301     *def_stmt_info_out = NULL;
11302   if (def_stmt_out)
11303     *def_stmt_out = NULL;
11304   *dt = vect_unknown_def_type;
11305 
11306   if (dump_enabled_p ())
11307     {
11308       dump_printf_loc (MSG_NOTE, vect_location,
11309                        "vect_is_simple_use: operand ");
11310       if (TREE_CODE (operand) == SSA_NAME
11311 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
11312 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11313       else
11314 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11315     }
11316 
11317   if (CONSTANT_CLASS_P (operand))
11318     *dt = vect_constant_def;
11319   else if (is_gimple_min_invariant (operand))
11320     *dt = vect_external_def;
11321   else if (TREE_CODE (operand) != SSA_NAME)
11322     *dt = vect_unknown_def_type;
11323   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11324     *dt = vect_external_def;
11325   else
11326     {
11327       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11328       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11329       if (!stmt_vinfo)
11330 	*dt = vect_external_def;
11331       else
11332 	{
11333 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11334 	  def_stmt = stmt_vinfo->stmt;
11335 	  switch (gimple_code (def_stmt))
11336 	    {
11337 	    case GIMPLE_PHI:
11338 	    case GIMPLE_ASSIGN:
11339 	    case GIMPLE_CALL:
11340 	      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11341 	      break;
11342 	    default:
11343 	      *dt = vect_unknown_def_type;
11344 	      break;
11345 	    }
11346 	  if (def_stmt_info_out)
11347 	    *def_stmt_info_out = stmt_vinfo;
11348 	}
11349       if (def_stmt_out)
11350 	*def_stmt_out = def_stmt;
11351     }
11352 
11353   if (dump_enabled_p ())
11354     {
11355       dump_printf (MSG_NOTE, ", type of def: ");
11356       switch (*dt)
11357 	{
11358 	case vect_uninitialized_def:
11359 	  dump_printf (MSG_NOTE, "uninitialized\n");
11360 	  break;
11361 	case vect_constant_def:
11362 	  dump_printf (MSG_NOTE, "constant\n");
11363 	  break;
11364 	case vect_external_def:
11365 	  dump_printf (MSG_NOTE, "external\n");
11366 	  break;
11367 	case vect_internal_def:
11368 	  dump_printf (MSG_NOTE, "internal\n");
11369 	  break;
11370 	case vect_induction_def:
11371 	  dump_printf (MSG_NOTE, "induction\n");
11372 	  break;
11373 	case vect_reduction_def:
11374 	  dump_printf (MSG_NOTE, "reduction\n");
11375 	  break;
11376 	case vect_double_reduction_def:
11377 	  dump_printf (MSG_NOTE, "double reduction\n");
11378 	  break;
11379 	case vect_nested_cycle:
11380 	  dump_printf (MSG_NOTE, "nested cycle\n");
11381 	  break;
11382 	case vect_unknown_def_type:
11383 	  dump_printf (MSG_NOTE, "unknown\n");
11384 	  break;
11385 	}
11386     }
11387 
11388   if (*dt == vect_unknown_def_type)
11389     {
11390       if (dump_enabled_p ())
11391         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11392                          "Unsupported pattern.\n");
11393       return false;
11394     }
11395 
11396   return true;
11397 }
11398 
11399 /* Function vect_is_simple_use.
11400 
11401    Same as vect_is_simple_use but also determines the vector operand
11402    type of OPERAND and stores it to *VECTYPE.  If the definition of
11403    OPERAND is vect_uninitialized_def, vect_constant_def or
11404    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11405    is responsible to compute the best suited vector type for the
11406    scalar operand.  */
11407 
11408 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11409 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11410 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
11411 		    gimple **def_stmt_out)
11412 {
11413   stmt_vec_info def_stmt_info;
11414   gimple *def_stmt;
11415   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11416     return false;
11417 
11418   if (def_stmt_out)
11419     *def_stmt_out = def_stmt;
11420   if (def_stmt_info_out)
11421     *def_stmt_info_out = def_stmt_info;
11422 
11423   /* Now get a vector type if the def is internal, otherwise supply
11424      NULL_TREE and leave it up to the caller to figure out a proper
11425      type for the use stmt.  */
11426   if (*dt == vect_internal_def
11427       || *dt == vect_induction_def
11428       || *dt == vect_reduction_def
11429       || *dt == vect_double_reduction_def
11430       || *dt == vect_nested_cycle)
11431     {
11432       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11433       gcc_assert (*vectype != NULL_TREE);
11434       if (dump_enabled_p ())
11435 	dump_printf_loc (MSG_NOTE, vect_location,
11436 			 "vect_is_simple_use: vectype %T\n", *vectype);
11437     }
11438   else if (*dt == vect_uninitialized_def
11439 	   || *dt == vect_constant_def
11440 	   || *dt == vect_external_def)
11441     *vectype = NULL_TREE;
11442   else
11443     gcc_unreachable ();
11444 
11445   return true;
11446 }
11447 
11448 /* Function vect_is_simple_use.
11449 
11450    Same as vect_is_simple_use but determines the operand by operand
11451    position OPERAND from either STMT or SLP_NODE, filling in *OP
11452    and *SLP_DEF (when SLP_NODE is not NULL).  */
11453 
11454 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11455 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11456 		    unsigned operand, tree *op, slp_tree *slp_def,
11457 		    enum vect_def_type *dt,
11458 		    tree *vectype, stmt_vec_info *def_stmt_info_out)
11459 {
11460   if (slp_node)
11461     {
11462       slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11463       *slp_def = child;
11464       *vectype = SLP_TREE_VECTYPE (child);
11465       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11466 	{
11467 	  *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11468 	  return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11469 	}
11470       else
11471 	{
11472 	  if (def_stmt_info_out)
11473 	    *def_stmt_info_out = NULL;
11474 	  *op = SLP_TREE_SCALAR_OPS (child)[0];
11475 	  *dt = SLP_TREE_DEF_TYPE (child);
11476 	  return true;
11477 	}
11478     }
11479   else
11480     {
11481       *slp_def = NULL;
11482       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11483 	{
11484 	  if (gimple_assign_rhs_code (ass) == COND_EXPR
11485 	      && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11486 	    {
11487 	      if (operand < 2)
11488 		*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11489 	      else
11490 		*op = gimple_op (ass, operand);
11491 	    }
11492 	  else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11493 	    *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11494 	  else
11495 	    *op = gimple_op (ass, operand + 1);
11496 	}
11497       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11498 	{
11499 	  if (gimple_call_internal_p (call)
11500 	      && internal_store_fn_p (gimple_call_internal_fn (call)))
11501 	    operand = internal_fn_stored_value_index (gimple_call_internal_fn
11502 									(call));
11503 	  *op = gimple_call_arg (call, operand);
11504 	}
11505       else
11506 	gcc_unreachable ();
11507       return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11508     }
11509 }
11510 
11511 /* If OP is not NULL and is external or constant update its vector
11512    type with VECTYPE.  Returns true if successful or false if not,
11513    for example when conflicting vector types are present.  */
11514 
11515 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11516 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11517 {
11518   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11519     return true;
11520   if (SLP_TREE_VECTYPE (op))
11521     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11522   SLP_TREE_VECTYPE (op) = vectype;
11523   return true;
11524 }
11525 
11526 /* Function supportable_widening_operation
11527 
11528    Check whether an operation represented by the code CODE is a
11529    widening operation that is supported by the target platform in
11530    vector form (i.e., when operating on arguments of type VECTYPE_IN
11531    producing a result of type VECTYPE_OUT).
11532 
11533    Widening operations we currently support are NOP (CONVERT), FLOAT,
11534    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11535    are supported by the target platform either directly (via vector
11536    tree-codes), or via target builtins.
11537 
11538    Output:
11539    - CODE1 and CODE2 are codes of vector operations to be used when
11540    vectorizing the operation, if available.
11541    - MULTI_STEP_CVT determines the number of required intermediate steps in
11542    case of multi-step conversion (like char->short->int - in that case
11543    MULTI_STEP_CVT will be 1).
11544    - INTERM_TYPES contains the intermediate type required to perform the
11545    widening operation (short in the above example).  */
11546 
11547 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11548 supportable_widening_operation (vec_info *vinfo,
11549 				enum tree_code code, stmt_vec_info stmt_info,
11550 				tree vectype_out, tree vectype_in,
11551                                 enum tree_code *code1, enum tree_code *code2,
11552                                 int *multi_step_cvt,
11553                                 vec<tree> *interm_types)
11554 {
11555   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11556   class loop *vect_loop = NULL;
11557   machine_mode vec_mode;
11558   enum insn_code icode1, icode2;
11559   optab optab1, optab2;
11560   tree vectype = vectype_in;
11561   tree wide_vectype = vectype_out;
11562   enum tree_code c1, c2;
11563   int i;
11564   tree prev_type, intermediate_type;
11565   machine_mode intermediate_mode, prev_mode;
11566   optab optab3, optab4;
11567 
11568   *multi_step_cvt = 0;
11569   if (loop_info)
11570     vect_loop = LOOP_VINFO_LOOP (loop_info);
11571 
11572   switch (code)
11573     {
11574     case WIDEN_MULT_EXPR:
11575       /* The result of a vectorized widening operation usually requires
11576 	 two vectors (because the widened results do not fit into one vector).
11577 	 The generated vector results would normally be expected to be
11578 	 generated in the same order as in the original scalar computation,
11579 	 i.e. if 8 results are generated in each vector iteration, they are
11580 	 to be organized as follows:
11581 		vect1: [res1,res2,res3,res4],
11582 		vect2: [res5,res6,res7,res8].
11583 
11584 	 However, in the special case that the result of the widening
11585 	 operation is used in a reduction computation only, the order doesn't
11586 	 matter (because when vectorizing a reduction we change the order of
11587 	 the computation).  Some targets can take advantage of this and
11588 	 generate more efficient code.  For example, targets like Altivec,
11589 	 that support widen_mult using a sequence of {mult_even,mult_odd}
11590 	 generate the following vectors:
11591 		vect1: [res1,res3,res5,res7],
11592 		vect2: [res2,res4,res6,res8].
11593 
11594 	 When vectorizing outer-loops, we execute the inner-loop sequentially
11595 	 (each vectorized inner-loop iteration contributes to VF outer-loop
11596 	 iterations in parallel).  We therefore don't allow to change the
11597 	 order of the computation in the inner-loop during outer-loop
11598 	 vectorization.  */
11599       /* TODO: Another case in which order doesn't *really* matter is when we
11600 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
11601 	 Normally, pack_trunc performs an even/odd permute, whereas the
11602 	 repack from an even/odd expansion would be an interleave, which
11603 	 would be significantly simpler for e.g. AVX2.  */
11604       /* In any case, in order to avoid duplicating the code below, recurse
11605 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
11606 	 are properly set up for the caller.  If we fail, we'll continue with
11607 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
11608       if (vect_loop
11609 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11610 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
11611 	  && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11612 					     stmt_info, vectype_out,
11613 					     vectype_in, code1, code2,
11614 					     multi_step_cvt, interm_types))
11615         {
11616           /* Elements in a vector with vect_used_by_reduction property cannot
11617              be reordered if the use chain with this property does not have the
11618              same operation.  One such an example is s += a * b, where elements
11619              in a and b cannot be reordered.  Here we check if the vector defined
11620              by STMT is only directly used in the reduction statement.  */
11621 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
11622 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11623 	  if (use_stmt_info
11624 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11625 	    return true;
11626         }
11627       c1 = VEC_WIDEN_MULT_LO_EXPR;
11628       c2 = VEC_WIDEN_MULT_HI_EXPR;
11629       break;
11630 
11631     case DOT_PROD_EXPR:
11632       c1 = DOT_PROD_EXPR;
11633       c2 = DOT_PROD_EXPR;
11634       break;
11635 
11636     case SAD_EXPR:
11637       c1 = SAD_EXPR;
11638       c2 = SAD_EXPR;
11639       break;
11640 
11641     case VEC_WIDEN_MULT_EVEN_EXPR:
11642       /* Support the recursion induced just above.  */
11643       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11644       c2 = VEC_WIDEN_MULT_ODD_EXPR;
11645       break;
11646 
11647     case WIDEN_LSHIFT_EXPR:
11648       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11649       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11650       break;
11651 
11652     case WIDEN_PLUS_EXPR:
11653       c1 = VEC_WIDEN_PLUS_LO_EXPR;
11654       c2 = VEC_WIDEN_PLUS_HI_EXPR;
11655       break;
11656 
11657     case WIDEN_MINUS_EXPR:
11658       c1 = VEC_WIDEN_MINUS_LO_EXPR;
11659       c2 = VEC_WIDEN_MINUS_HI_EXPR;
11660       break;
11661 
11662     CASE_CONVERT:
11663       c1 = VEC_UNPACK_LO_EXPR;
11664       c2 = VEC_UNPACK_HI_EXPR;
11665       break;
11666 
11667     case FLOAT_EXPR:
11668       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11669       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11670       break;
11671 
11672     case FIX_TRUNC_EXPR:
11673       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11674       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11675       break;
11676 
11677     default:
11678       gcc_unreachable ();
11679     }
11680 
11681   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11682     std::swap (c1, c2);
11683 
11684   if (code == FIX_TRUNC_EXPR)
11685     {
11686       /* The signedness is determined from output operand.  */
11687       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11688       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11689     }
11690   else if (CONVERT_EXPR_CODE_P (code)
11691 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11692 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
11693 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11694 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11695     {
11696       /* If the input and result modes are the same, a different optab
11697 	 is needed where we pass in the number of units in vectype.  */
11698       optab1 = vec_unpacks_sbool_lo_optab;
11699       optab2 = vec_unpacks_sbool_hi_optab;
11700     }
11701   else
11702     {
11703       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11704       optab2 = optab_for_tree_code (c2, vectype, optab_default);
11705     }
11706 
11707   if (!optab1 || !optab2)
11708     return false;
11709 
11710   vec_mode = TYPE_MODE (vectype);
11711   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11712        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11713     return false;
11714 
11715   *code1 = c1;
11716   *code2 = c2;
11717 
11718   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11719       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11720     {
11721       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11722 	return true;
11723       /* For scalar masks we may have different boolean
11724 	 vector types having the same QImode.  Thus we
11725 	 add additional check for elements number.  */
11726       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11727 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11728 	return true;
11729     }
11730 
11731   /* Check if it's a multi-step conversion that can be done using intermediate
11732      types.  */
11733 
11734   prev_type = vectype;
11735   prev_mode = vec_mode;
11736 
11737   if (!CONVERT_EXPR_CODE_P (code))
11738     return false;
11739 
11740   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11741      intermediate steps in promotion sequence.  We try
11742      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11743      not.  */
11744   interm_types->create (MAX_INTERM_CVT_STEPS);
11745   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11746     {
11747       intermediate_mode = insn_data[icode1].operand[0].mode;
11748       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11749 	intermediate_type
11750 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
11751       else
11752 	intermediate_type
11753 	  = lang_hooks.types.type_for_mode (intermediate_mode,
11754 					    TYPE_UNSIGNED (prev_type));
11755 
11756       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11757 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
11758 	  && intermediate_mode == prev_mode
11759 	  && SCALAR_INT_MODE_P (prev_mode))
11760 	{
11761 	  /* If the input and result modes are the same, a different optab
11762 	     is needed where we pass in the number of units in vectype.  */
11763 	  optab3 = vec_unpacks_sbool_lo_optab;
11764 	  optab4 = vec_unpacks_sbool_hi_optab;
11765 	}
11766       else
11767 	{
11768 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11769 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11770 	}
11771 
11772       if (!optab3 || !optab4
11773           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11774 	  || insn_data[icode1].operand[0].mode != intermediate_mode
11775 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11776 	  || insn_data[icode2].operand[0].mode != intermediate_mode
11777 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
11778 	      == CODE_FOR_nothing)
11779 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
11780 	      == CODE_FOR_nothing))
11781 	break;
11782 
11783       interm_types->quick_push (intermediate_type);
11784       (*multi_step_cvt)++;
11785 
11786       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11787 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11788 	{
11789 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11790 	    return true;
11791 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11792 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11793 	    return true;
11794 	}
11795 
11796       prev_type = intermediate_type;
11797       prev_mode = intermediate_mode;
11798     }
11799 
11800   interm_types->release ();
11801   return false;
11802 }
11803 
11804 
11805 /* Function supportable_narrowing_operation
11806 
11807    Check whether an operation represented by the code CODE is a
11808    narrowing operation that is supported by the target platform in
11809    vector form (i.e., when operating on arguments of type VECTYPE_IN
11810    and producing a result of type VECTYPE_OUT).
11811 
11812    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11813    and FLOAT.  This function checks if these operations are supported by
11814    the target platform directly via vector tree-codes.
11815 
11816    Output:
11817    - CODE1 is the code of a vector operation to be used when
11818    vectorizing the operation, if available.
11819    - MULTI_STEP_CVT determines the number of required intermediate steps in
11820    case of multi-step conversion (like int->short->char - in that case
11821    MULTI_STEP_CVT will be 1).
11822    - INTERM_TYPES contains the intermediate type required to perform the
11823    narrowing operation (short in the above example).   */
11824 
11825 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11826 supportable_narrowing_operation (enum tree_code code,
11827 				 tree vectype_out, tree vectype_in,
11828 				 enum tree_code *code1, int *multi_step_cvt,
11829                                  vec<tree> *interm_types)
11830 {
11831   machine_mode vec_mode;
11832   enum insn_code icode1;
11833   optab optab1, interm_optab;
11834   tree vectype = vectype_in;
11835   tree narrow_vectype = vectype_out;
11836   enum tree_code c1;
11837   tree intermediate_type, prev_type;
11838   machine_mode intermediate_mode, prev_mode;
11839   int i;
11840   bool uns;
11841 
11842   *multi_step_cvt = 0;
11843   switch (code)
11844     {
11845     CASE_CONVERT:
11846       c1 = VEC_PACK_TRUNC_EXPR;
11847       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11848 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
11849 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11850 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11851 	optab1 = vec_pack_sbool_trunc_optab;
11852       else
11853 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
11854       break;
11855 
11856     case FIX_TRUNC_EXPR:
11857       c1 = VEC_PACK_FIX_TRUNC_EXPR;
11858       /* The signedness is determined from output operand.  */
11859       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11860       break;
11861 
11862     case FLOAT_EXPR:
11863       c1 = VEC_PACK_FLOAT_EXPR;
11864       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11865       break;
11866 
11867     default:
11868       gcc_unreachable ();
11869     }
11870 
11871   if (!optab1)
11872     return false;
11873 
11874   vec_mode = TYPE_MODE (vectype);
11875   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11876     return false;
11877 
11878   *code1 = c1;
11879 
11880   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11881     {
11882       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11883 	return true;
11884       /* For scalar masks we may have different boolean
11885 	 vector types having the same QImode.  Thus we
11886 	 add additional check for elements number.  */
11887       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11888 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11889 	return true;
11890     }
11891 
11892   if (code == FLOAT_EXPR)
11893     return false;
11894 
11895   /* Check if it's a multi-step conversion that can be done using intermediate
11896      types.  */
11897   prev_mode = vec_mode;
11898   prev_type = vectype;
11899   if (code == FIX_TRUNC_EXPR)
11900     uns = TYPE_UNSIGNED (vectype_out);
11901   else
11902     uns = TYPE_UNSIGNED (vectype);
11903 
11904   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11905      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11906      costly than signed.  */
11907   if (code == FIX_TRUNC_EXPR && uns)
11908     {
11909       enum insn_code icode2;
11910 
11911       intermediate_type
11912 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11913       interm_optab
11914 	= optab_for_tree_code (c1, intermediate_type, optab_default);
11915       if (interm_optab != unknown_optab
11916 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11917 	  && insn_data[icode1].operand[0].mode
11918 	     == insn_data[icode2].operand[0].mode)
11919 	{
11920 	  uns = false;
11921 	  optab1 = interm_optab;
11922 	  icode1 = icode2;
11923 	}
11924     }
11925 
11926   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11927      intermediate steps in promotion sequence.  We try
11928      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
11929   interm_types->create (MAX_INTERM_CVT_STEPS);
11930   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11931     {
11932       intermediate_mode = insn_data[icode1].operand[0].mode;
11933       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11934 	intermediate_type
11935 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
11936       else
11937 	intermediate_type
11938 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11939       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11940 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
11941 	  && intermediate_mode == prev_mode
11942 	  && SCALAR_INT_MODE_P (prev_mode))
11943 	interm_optab = vec_pack_sbool_trunc_optab;
11944       else
11945 	interm_optab
11946 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11947 				 optab_default);
11948       if (!interm_optab
11949 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11950 	  || insn_data[icode1].operand[0].mode != intermediate_mode
11951 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11952 	      == CODE_FOR_nothing))
11953 	break;
11954 
11955       interm_types->quick_push (intermediate_type);
11956       (*multi_step_cvt)++;
11957 
11958       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11959 	{
11960 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11961 	    return true;
11962 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11963 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11964 	    return true;
11965 	}
11966 
11967       prev_mode = intermediate_mode;
11968       prev_type = intermediate_type;
11969       optab1 = interm_optab;
11970     }
11971 
11972   interm_types->release ();
11973   return false;
11974 }
11975 
11976 /* Generate and return a statement that sets vector mask MASK such that
11977    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
11978 
11979 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)11980 vect_gen_while (tree mask, tree start_index, tree end_index)
11981 {
11982   tree cmp_type = TREE_TYPE (start_index);
11983   tree mask_type = TREE_TYPE (mask);
11984   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11985 						       cmp_type, mask_type,
11986 						       OPTIMIZE_FOR_SPEED));
11987   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11988 					    start_index, end_index,
11989 					    build_zero_cst (mask_type));
11990   gimple_call_set_lhs (call, mask);
11991   return call;
11992 }
11993 
11994 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11995    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
11996 
11997 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)11998 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11999 		    tree end_index)
12000 {
12001   tree tmp = make_ssa_name (mask_type);
12002   gcall *call = vect_gen_while (tmp, start_index, end_index);
12003   gimple_seq_add_stmt (seq, call);
12004   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12005 }
12006 
12007 /* Try to compute the vector types required to vectorize STMT_INFO,
12008    returning true on success and false if vectorization isn't possible.
12009    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12010    take sure that the number of elements in the vectors is no bigger
12011    than GROUP_SIZE.
12012 
12013    On success:
12014 
12015    - Set *STMT_VECTYPE_OUT to:
12016      - NULL_TREE if the statement doesn't need to be vectorized;
12017      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12018 
12019    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12020      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12021      statement does not help to determine the overall number of units.  */
12022 
12023 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12024 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12025 				tree *stmt_vectype_out,
12026 				tree *nunits_vectype_out,
12027 				unsigned int group_size)
12028 {
12029   gimple *stmt = stmt_info->stmt;
12030 
12031   /* For BB vectorization, we should always have a group size once we've
12032      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12033      are tentative requests during things like early data reference
12034      analysis and pattern recognition.  */
12035   if (is_a <bb_vec_info> (vinfo))
12036     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12037   else
12038     group_size = 0;
12039 
12040   *stmt_vectype_out = NULL_TREE;
12041   *nunits_vectype_out = NULL_TREE;
12042 
12043   if (gimple_get_lhs (stmt) == NULL_TREE
12044       /* MASK_STORE has no lhs, but is ok.  */
12045       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12046     {
12047       if (is_a <gcall *> (stmt))
12048 	{
12049 	  /* Ignore calls with no lhs.  These must be calls to
12050 	     #pragma omp simd functions, and what vectorization factor
12051 	     it really needs can't be determined until
12052 	     vectorizable_simd_clone_call.  */
12053 	  if (dump_enabled_p ())
12054 	    dump_printf_loc (MSG_NOTE, vect_location,
12055 			     "defer to SIMD clone analysis.\n");
12056 	  return opt_result::success ();
12057 	}
12058 
12059       return opt_result::failure_at (stmt,
12060 				     "not vectorized: irregular stmt.%G", stmt);
12061     }
12062 
12063   if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12064     return opt_result::failure_at (stmt,
12065 				   "not vectorized: vector stmt in loop:%G",
12066 				   stmt);
12067 
12068   tree vectype;
12069   tree scalar_type = NULL_TREE;
12070   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12071     {
12072       vectype = STMT_VINFO_VECTYPE (stmt_info);
12073       if (dump_enabled_p ())
12074 	dump_printf_loc (MSG_NOTE, vect_location,
12075 			 "precomputed vectype: %T\n", vectype);
12076     }
12077   else if (vect_use_mask_type_p (stmt_info))
12078     {
12079       unsigned int precision = stmt_info->mask_precision;
12080       scalar_type = build_nonstandard_integer_type (precision, 1);
12081       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12082       if (!vectype)
12083 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
12084 				       " data-type %T\n", scalar_type);
12085       if (dump_enabled_p ())
12086 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12087     }
12088   else
12089     {
12090       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12091 	scalar_type = TREE_TYPE (DR_REF (dr));
12092       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12093 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12094       else
12095 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12096 
12097       if (dump_enabled_p ())
12098 	{
12099 	  if (group_size)
12100 	    dump_printf_loc (MSG_NOTE, vect_location,
12101 			     "get vectype for scalar type (group size %d):"
12102 			     " %T\n", group_size, scalar_type);
12103 	  else
12104 	    dump_printf_loc (MSG_NOTE, vect_location,
12105 			     "get vectype for scalar type: %T\n", scalar_type);
12106 	}
12107       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12108       if (!vectype)
12109 	return opt_result::failure_at (stmt,
12110 				       "not vectorized:"
12111 				       " unsupported data-type %T\n",
12112 				       scalar_type);
12113 
12114       if (dump_enabled_p ())
12115 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12116     }
12117   *stmt_vectype_out = vectype;
12118 
12119   /* Don't try to compute scalar types if the stmt produces a boolean
12120      vector; use the existing vector type instead.  */
12121   tree nunits_vectype = vectype;
12122   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12123     {
12124       /* The number of units is set according to the smallest scalar
12125 	 type (or the largest vector size, but we only support one
12126 	 vector size per vectorization).  */
12127       HOST_WIDE_INT dummy;
12128       scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12129       if (scalar_type != TREE_TYPE (vectype))
12130 	{
12131 	  if (dump_enabled_p ())
12132 	    dump_printf_loc (MSG_NOTE, vect_location,
12133 			     "get vectype for smallest scalar type: %T\n",
12134 			     scalar_type);
12135 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12136 							group_size);
12137 	  if (!nunits_vectype)
12138 	    return opt_result::failure_at
12139 	      (stmt, "not vectorized: unsupported data-type %T\n",
12140 	       scalar_type);
12141 	  if (dump_enabled_p ())
12142 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12143 			     nunits_vectype);
12144 	}
12145     }
12146 
12147   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12148 		   TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12149     return opt_result::failure_at (stmt,
12150 				   "Not vectorized: Incompatible number "
12151 				   "of vector subparts between %T and %T\n",
12152 				   nunits_vectype, *stmt_vectype_out);
12153 
12154   if (dump_enabled_p ())
12155     {
12156       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12157       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12158       dump_printf (MSG_NOTE, "\n");
12159     }
12160 
12161   *nunits_vectype_out = nunits_vectype;
12162   return opt_result::success ();
12163 }
12164 
12165 /* Generate and return statement sequence that sets vector length LEN that is:
12166 
12167    min_of_start_and_end = min (START_INDEX, END_INDEX);
12168    left_len = END_INDEX - min_of_start_and_end;
12169    rhs = min (left_len, LEN_LIMIT);
12170    LEN = rhs;
12171 
12172    Note: the cost of the code generated by this function is modeled
12173    by vect_estimate_min_profitable_iters, so changes here may need
12174    corresponding changes there.  */
12175 
12176 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12177 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12178 {
12179   gimple_seq stmts = NULL;
12180   tree len_type = TREE_TYPE (len);
12181   gcc_assert (TREE_TYPE (start_index) == len_type);
12182 
12183   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12184   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12185   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12186   gimple* stmt = gimple_build_assign (len, rhs);
12187   gimple_seq_add_stmt (&stmts, stmt);
12188 
12189   return stmts;
12190 }
12191 
12192