1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 
58 /* For lang_hooks.types.type_for_mode.  */
59 #include "langhooks.h"
60 
61 /* Return the vectorized type for the given statement.  */
62 
63 tree
stmt_vectype(struct _stmt_vec_info * stmt_info)64 stmt_vectype (struct _stmt_vec_info *stmt_info)
65 {
66   return STMT_VINFO_VECTYPE (stmt_info);
67 }
68 
69 /* Return TRUE iff the given statement is in an inner loop relative to
70    the loop being vectorized.  */
71 bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)72 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
73 {
74   gimple *stmt = STMT_VINFO_STMT (stmt_info);
75   basic_block bb = gimple_bb (stmt);
76   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
77   struct loop* loop;
78 
79   if (!loop_vinfo)
80     return false;
81 
82   loop = LOOP_VINFO_LOOP (loop_vinfo);
83 
84   return (bb->loop_father == loop->inner);
85 }
86 
87 /* Record the cost of a statement, either by directly informing the
88    target model or by saving it in a vector for later processing.
89    Return a preliminary estimate of the statement's cost.  */
90 
91 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)92 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
93 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
94 		  int misalign, enum vect_cost_model_location where)
95 {
96   if ((kind == vector_load || kind == unaligned_load)
97       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
98     kind = vector_gather_load;
99   if ((kind == vector_store || kind == unaligned_store)
100       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101     kind = vector_scatter_store;
102 
103   stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
104   body_cost_vec->safe_push (si);
105 
106   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107   return (unsigned)
108       (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 }
110 
111 /* Return a variable of type ELEM_TYPE[NELEMS].  */
112 
113 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115 {
116   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 			 "vect_array");
118 }
119 
120 /* ARRAY is an array of vectors created by create_vector_array.
121    Return an SSA_NAME for the vector in index N.  The reference
122    is part of the vectorization of STMT_INFO and the vector is associated
123    with scalar destination SCALAR_DEST.  */
124 
125 static tree
read_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)126 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
127 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
128 {
129   tree vect_type, vect, vect_name, array_ref;
130   gimple *new_stmt;
131 
132   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133   vect_type = TREE_TYPE (TREE_TYPE (array));
134   vect = vect_create_destination_var (scalar_dest, vect_type);
135   array_ref = build4 (ARRAY_REF, vect_type, array,
136 		      build_int_cst (size_type_node, n),
137 		      NULL_TREE, NULL_TREE);
138 
139   new_stmt = gimple_build_assign (vect, array_ref);
140   vect_name = make_ssa_name (vect, new_stmt);
141   gimple_assign_set_lhs (new_stmt, vect_name);
142   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
143 
144   return vect_name;
145 }
146 
147 /* ARRAY is an array of vectors created by create_vector_array.
148    Emit code to store SSA_NAME VECT in index N of the array.
149    The store is part of the vectorization of STMT_INFO.  */
150 
151 static void
write_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)152 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
153 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
154 {
155   tree array_ref;
156   gimple *new_stmt;
157 
158   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 		      build_int_cst (size_type_node, n),
160 		      NULL_TREE, NULL_TREE);
161 
162   new_stmt = gimple_build_assign (array_ref, vect);
163   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 }
165 
166 /* PTR is a pointer to an array of type TYPE.  Return a representation
167    of *PTR.  The memory reference replaces those in FIRST_DR
168    (and its group).  */
169 
170 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
172 {
173   tree mem_ref;
174 
175   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176   /* Arrays have the same alignment as their type.  */
177   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178   return mem_ref;
179 }
180 
181 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
182    Emit the clobber before *GSI.  */
183 
184 static void
vect_clobber_variable(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)185 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
186 		       tree var)
187 {
188   tree clobber = build_clobber (TREE_TYPE (var));
189   gimple *new_stmt = gimple_build_assign (var, clobber);
190   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 }
192 
193 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
194 
195 /* Function vect_mark_relevant.
196 
197    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
198 
199 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)200 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
201 		    enum vect_relevant relevant, bool live_p)
202 {
203   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 
206   if (dump_enabled_p ())
207     dump_printf_loc (MSG_NOTE, vect_location,
208 		     "mark relevant %d, live %d: %G", relevant, live_p,
209 		     stmt_info->stmt);
210 
211   /* If this stmt is an original stmt in a pattern, we might need to mark its
212      related pattern stmt instead of the original stmt.  However, such stmts
213      may have their own uses that are not in any pattern, in such cases the
214      stmt itself should be marked.  */
215   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
216     {
217       /* This is the last stmt in a sequence that was detected as a
218 	 pattern that can potentially be vectorized.  Don't mark the stmt
219 	 as relevant/live because it's not going to be vectorized.
220 	 Instead mark the pattern-stmt that replaces it.  */
221 
222       if (dump_enabled_p ())
223 	dump_printf_loc (MSG_NOTE, vect_location,
224 			 "last stmt in pattern. don't mark"
225 			 " relevant/live.\n");
226       stmt_vec_info old_stmt_info = stmt_info;
227       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231     }
232 
233   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
234   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
235     STMT_VINFO_RELEVANT (stmt_info) = relevant;
236 
237   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
238       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
239     {
240       if (dump_enabled_p ())
241         dump_printf_loc (MSG_NOTE, vect_location,
242                          "already marked relevant/live.\n");
243       return;
244     }
245 
246   worklist->safe_push (stmt_info);
247 }
248 
249 
250 /* Function is_simple_and_all_uses_invariant
251 
252    Return true if STMT_INFO is simple and all uses of it are invariant.  */
253 
254 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
256 				  loop_vec_info loop_vinfo)
257 {
258   tree op;
259   ssa_op_iter iter;
260 
261   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
262   if (!stmt)
263     return false;
264 
265   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
266     {
267       enum vect_def_type dt = vect_uninitialized_def;
268 
269       if (!vect_is_simple_use (op, loop_vinfo, &dt))
270 	{
271 	  if (dump_enabled_p ())
272 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
273 			     "use not simple.\n");
274 	  return false;
275 	}
276 
277       if (dt != vect_external_def && dt != vect_constant_def)
278 	return false;
279     }
280   return true;
281 }
282 
283 /* Function vect_stmt_relevant_p.
284 
285    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286    is "relevant for vectorization".
287 
288    A stmt is considered "relevant for vectorization" if:
289    - it has uses outside the loop.
290    - it has vdefs (it alters memory).
291    - control stmts in the loop (except for the exit condition).
292 
293    CHECKME: what other side effects would the vectorizer allow?  */
294 
295 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)296 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
297 		      enum vect_relevant *relevant, bool *live_p)
298 {
299   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300   ssa_op_iter op_iter;
301   imm_use_iterator imm_iter;
302   use_operand_p use_p;
303   def_operand_p def_p;
304 
305   *relevant = vect_unused_in_scope;
306   *live_p = false;
307 
308   /* cond stmt other than loop exit cond.  */
309   if (is_ctrl_stmt (stmt_info->stmt)
310       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
311     *relevant = vect_used_in_scope;
312 
313   /* changing memory.  */
314   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
315     if (gimple_vdef (stmt_info->stmt)
316 	&& !gimple_clobber_p (stmt_info->stmt))
317       {
318 	if (dump_enabled_p ())
319 	  dump_printf_loc (MSG_NOTE, vect_location,
320                            "vec_stmt_relevant_p: stmt has vdefs.\n");
321 	*relevant = vect_used_in_scope;
322       }
323 
324   /* uses outside the loop.  */
325   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
326     {
327       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
328 	{
329 	  basic_block bb = gimple_bb (USE_STMT (use_p));
330 	  if (!flow_bb_inside_loop_p (loop, bb))
331 	    {
332 	      if (dump_enabled_p ())
333 		dump_printf_loc (MSG_NOTE, vect_location,
334                                  "vec_stmt_relevant_p: used out of loop.\n");
335 
336 	      if (is_gimple_debug (USE_STMT (use_p)))
337 		continue;
338 
339 	      /* We expect all such uses to be in the loop exit phis
340 		 (because of loop closed form)   */
341 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 	      gcc_assert (bb == single_exit (loop)->dest);
343 
344               *live_p = true;
345 	    }
346 	}
347     }
348 
349   if (*live_p && *relevant == vect_unused_in_scope
350       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
351     {
352       if (dump_enabled_p ())
353 	dump_printf_loc (MSG_NOTE, vect_location,
354 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355       *relevant = vect_used_only_live;
356     }
357 
358   return (*live_p || *relevant);
359 }
360 
361 
362 /* Function exist_non_indexing_operands_for_use_p
363 
364    USE is one of the uses attached to STMT_INFO.  Check if USE is
365    used in STMT_INFO for anything other than indexing an array.  */
366 
367 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)368 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
369 {
370   tree operand;
371 
372   /* USE corresponds to some operand in STMT.  If there is no data
373      reference in STMT, then any operand that corresponds to USE
374      is not indexing an array.  */
375   if (!STMT_VINFO_DATA_REF (stmt_info))
376     return true;
377 
378   /* STMT has a data_ref. FORNOW this means that its of one of
379      the following forms:
380      -1- ARRAY_REF = var
381      -2- var = ARRAY_REF
382      (This should have been verified in analyze_data_refs).
383 
384      'var' in the second case corresponds to a def, not a use,
385      so USE cannot correspond to any operands that are not used
386      for array indexing.
387 
388      Therefore, all we need to check is if STMT falls into the
389      first case, and whether var corresponds to USE.  */
390 
391   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
392   if (!assign || !gimple_assign_copy_p (assign))
393     {
394       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
395       if (call && gimple_call_internal_p (call))
396 	{
397 	  internal_fn ifn = gimple_call_internal_fn (call);
398 	  int mask_index = internal_fn_mask_index (ifn);
399 	  if (mask_index >= 0
400 	      && use == gimple_call_arg (call, mask_index))
401 	    return true;
402 	  int stored_value_index = internal_fn_stored_value_index (ifn);
403 	  if (stored_value_index >= 0
404 	      && use == gimple_call_arg (call, stored_value_index))
405 	    return true;
406 	  if (internal_gather_scatter_fn_p (ifn)
407 	      && use == gimple_call_arg (call, 1))
408 	    return true;
409 	}
410       return false;
411     }
412 
413   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
414     return false;
415   operand = gimple_assign_rhs1 (assign);
416   if (TREE_CODE (operand) != SSA_NAME)
417     return false;
418 
419   if (operand == use)
420     return true;
421 
422   return false;
423 }
424 
425 
426 /*
427    Function process_use.
428 
429    Inputs:
430    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432      that defined USE.  This is done by calling mark_relevant and passing it
433      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435      be performed.
436 
437    Outputs:
438    Generally, LIVE_P and RELEVANT are used to define the liveness and
439    relevance info of the DEF_STMT of this USE:
440        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
442    Exceptions:
443    - case 1: If USE is used only for address computations (e.g. array indexing),
444    which does not need to be directly vectorized, then the liveness/relevance
445    of the respective DEF_STMT is left unchanged.
446    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447    we skip DEF_STMT cause it had already been processed.
448    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449    "relevant" will be modified accordingly.
450 
451    Return true if everything is as expected. Return false otherwise.  */
452 
453 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)454 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
455 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
456 	     bool force)
457 {
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   enum vect_def_type dt;
461 
462   /* case 1: we are only interested in uses that need to be vectorized.  Uses
463      that are used for address computation are not considered relevant.  */
464   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465     return opt_result::success ();
466 
467   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468     return opt_result::failure_at (stmt_vinfo->stmt,
469 				   "not vectorized:"
470 				   " unsupported use in stmt.\n");
471 
472   if (!dstmt_vinfo)
473     return opt_result::success ();
474 
475   def_bb = gimple_bb (dstmt_vinfo->stmt);
476 
477   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
478      DSTMT_VINFO must have already been processed, because this should be the
479      only way that STMT, which is a reduction-phi, was put in the worklist,
480      as there should be no other uses for DSTMT_VINFO in the loop.  So we just
481      check that everything is as expected, and we are done.  */
482   bb = gimple_bb (stmt_vinfo->stmt);
483   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487       && bb->loop_father == def_bb->loop_father)
488     {
489       if (dump_enabled_p ())
490 	dump_printf_loc (MSG_NOTE, vect_location,
491                          "reduc-stmt defining reduc-phi in the same nest.\n");
492       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
493       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
494 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
495       return opt_result::success ();
496     }
497 
498   /* case 3a: outer-loop stmt defining an inner-loop stmt:
499 	outer-loop-header-bb:
500 		d = dstmt_vinfo
501 	inner-loop:
502 		stmt # use (d)
503 	outer-loop-tail-bb:
504 		...		  */
505   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
506     {
507       if (dump_enabled_p ())
508 	dump_printf_loc (MSG_NOTE, vect_location,
509                          "outer-loop def-stmt defining inner-loop stmt.\n");
510 
511       switch (relevant)
512 	{
513 	case vect_unused_in_scope:
514 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
515 		      vect_used_in_scope : vect_unused_in_scope;
516 	  break;
517 
518 	case vect_used_in_outer_by_reduction:
519           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
520 	  relevant = vect_used_by_reduction;
521 	  break;
522 
523 	case vect_used_in_outer:
524           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 	  relevant = vect_used_in_scope;
526 	  break;
527 
528 	case vect_used_in_scope:
529 	  break;
530 
531 	default:
532 	  gcc_unreachable ();
533 	}
534     }
535 
536   /* case 3b: inner-loop stmt defining an outer-loop stmt:
537 	outer-loop-header-bb:
538 		...
539 	inner-loop:
540 		d = dstmt_vinfo
541 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
542 		stmt # use (d)		*/
543   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
544     {
545       if (dump_enabled_p ())
546 	dump_printf_loc (MSG_NOTE, vect_location,
547                          "inner-loop def-stmt defining outer-loop stmt.\n");
548 
549       switch (relevant)
550         {
551         case vect_unused_in_scope:
552           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
553             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
554                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
555           break;
556 
557         case vect_used_by_reduction:
558 	case vect_used_only_live:
559           relevant = vect_used_in_outer_by_reduction;
560           break;
561 
562         case vect_used_in_scope:
563           relevant = vect_used_in_outer;
564           break;
565 
566         default:
567           gcc_unreachable ();
568         }
569     }
570   /* We are also not interested in uses on loop PHI backedges that are
571      inductions.  Otherwise we'll needlessly vectorize the IV increment
572      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
573      of course.  */
574   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
575 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
576 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
577 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
578 				      loop_latch_edge (bb->loop_father))
579 	       == use))
580     {
581       if (dump_enabled_p ())
582 	dump_printf_loc (MSG_NOTE, vect_location,
583                          "induction value on backedge.\n");
584       return opt_result::success ();
585     }
586 
587 
588   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
589   return opt_result::success ();
590 }
591 
592 
593 /* Function vect_mark_stmts_to_be_vectorized.
594 
595    Not all stmts in the loop need to be vectorized. For example:
596 
597      for i...
598        for j...
599    1.    T0 = i + j
600    2.	 T1 = a[T0]
601 
602    3.    j = j + 1
603 
604    Stmt 1 and 3 do not need to be vectorized, because loop control and
605    addressing of vectorized data-refs are handled differently.
606 
607    This pass detects such stmts.  */
608 
609 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)610 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
611 {
612   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
613   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
614   unsigned int nbbs = loop->num_nodes;
615   gimple_stmt_iterator si;
616   unsigned int i;
617   basic_block bb;
618   bool live_p;
619   enum vect_relevant relevant;
620 
621   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
622 
623   auto_vec<stmt_vec_info, 64> worklist;
624 
625   /* 1. Init worklist.  */
626   for (i = 0; i < nbbs; i++)
627     {
628       bb = bbs[i];
629       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
630 	{
631 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
632 	  if (dump_enabled_p ())
633 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
634 			     phi_info->stmt);
635 
636 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
637 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
638 	}
639       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
640 	{
641 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 	  if (dump_enabled_p ())
643 	      dump_printf_loc (MSG_NOTE, vect_location,
644 			       "init: stmt relevant? %G", stmt_info->stmt);
645 
646 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 	}
649     }
650 
651   /* 2. Process_worklist */
652   while (worklist.length () > 0)
653     {
654       use_operand_p use_p;
655       ssa_op_iter iter;
656 
657       stmt_vec_info stmt_vinfo = worklist.pop ();
658       if (dump_enabled_p ())
659 	dump_printf_loc (MSG_NOTE, vect_location,
660 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661 
662       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 	 of STMT.  */
665       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666 
667       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 	 propagated as is to the DEF_STMTs of its USEs.
669 
670 	 One exception is when STMT has been identified as defining a reduction
671 	 variable; in this case we set the relevance to vect_used_by_reduction.
672 	 This is because we distinguish between two kinds of relevant stmts -
673 	 those that are used by a reduction computation, and those that are
674 	 (also) used by a regular computation.  This allows us later on to
675 	 identify stmts that are used solely by a reduction, and therefore the
676 	 order of the results that they produce does not have to be kept.  */
677 
678       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679         {
680           case vect_reduction_def:
681 	    gcc_assert (relevant != vect_unused_in_scope);
682 	    if (relevant != vect_unused_in_scope
683 		&& relevant != vect_used_in_scope
684 		&& relevant != vect_used_by_reduction
685 		&& relevant != vect_used_only_live)
686 	      return opt_result::failure_at
687 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 	    break;
689 
690           case vect_nested_cycle:
691 	    if (relevant != vect_unused_in_scope
692 		&& relevant != vect_used_in_outer_by_reduction
693 		&& relevant != vect_used_in_outer)
694 	      return opt_result::failure_at
695 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696             break;
697 
698           case vect_double_reduction_def:
699 	    if (relevant != vect_unused_in_scope
700 		&& relevant != vect_used_by_reduction
701 		&& relevant != vect_used_only_live)
702 	      return opt_result::failure_at
703 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704             break;
705 
706           default:
707             break;
708         }
709 
710       if (is_pattern_stmt_p (stmt_vinfo))
711         {
712           /* Pattern statements are not inserted into the code, so
713              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714              have to scan the RHS or function arguments instead.  */
715 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 	    {
717 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 	      tree op = gimple_assign_rhs1 (assign);
719 
720 	      i = 1;
721 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 		{
723 		  opt_result res
724 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 				   loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 				     loop_vinfo, relevant, &worklist, false);
730 		  if (!res)
731 		    return res;
732 		  i = 2;
733 		}
734 	      for (; i < gimple_num_ops (assign); i++)
735 		{
736 		  op = gimple_op (assign, i);
737                   if (TREE_CODE (op) == SSA_NAME)
738 		    {
739 		      opt_result res
740 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 				       &worklist, false);
742 		      if (!res)
743 			return res;
744 		    }
745                  }
746             }
747 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 	    {
749 	      for (i = 0; i < gimple_call_num_args (call); i++)
750 		{
751 		  tree arg = gimple_call_arg (call, i);
752 		  opt_result res
753 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 				   &worklist, false);
755 		  if (!res)
756 		    return res;
757 		}
758 	    }
759         }
760       else
761 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762           {
763             tree op = USE_FROM_PTR (use_p);
764 	    opt_result res
765 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 			     &worklist, false);
767 	    if (!res)
768 	      return res;
769           }
770 
771       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 	{
773 	  gather_scatter_info gs_info;
774 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 	    gcc_unreachable ();
776 	  opt_result res
777 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 			   &worklist, true);
779 	  if (!res)
780 	    return res;
781 	}
782     } /* while worklist */
783 
784   return opt_result::success ();
785 }
786 
787 /* Compute the prologue cost for invariant or constant operands.  */
788 
789 static unsigned
vect_prologue_cost_for_slp_op(slp_tree node,stmt_vec_info stmt_info,unsigned opno,enum vect_def_type dt,stmt_vector_for_cost * cost_vec)790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 			       unsigned opno, enum vect_def_type dt,
792 			       stmt_vector_for_cost *cost_vec)
793 {
794   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
795   tree op = gimple_op (stmt, opno);
796   unsigned prologue_cost = 0;
797 
798   /* Without looking at the actual initializer a vector of
799      constants can be implemented as load from the constant pool.
800      When all elements are the same we can use a splat.  */
801   tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
802   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
803   unsigned num_vects_to_check;
804   unsigned HOST_WIDE_INT const_nunits;
805   unsigned nelt_limit;
806   if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
807       && ! multiple_p (const_nunits, group_size))
808     {
809       num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
810       nelt_limit = const_nunits;
811     }
812   else
813     {
814       /* If either the vector has variable length or the vectors
815 	 are composed of repeated whole groups we only need to
816 	 cost construction once.  All vectors will be the same.  */
817       num_vects_to_check = 1;
818       nelt_limit = group_size;
819     }
820   tree elt = NULL_TREE;
821   unsigned nelt = 0;
822   for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
823     {
824       unsigned si = j % group_size;
825       if (nelt == 0)
826 	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
827       /* ???  We're just tracking whether all operands of a single
828 	 vector initializer are the same, ideally we'd check if
829 	 we emitted the same one already.  */
830       else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
831 				 opno))
832 	elt = NULL_TREE;
833       nelt++;
834       if (nelt == nelt_limit)
835 	{
836 	  /* ???  We need to pass down stmt_info for a vector type
837 	     even if it points to the wrong stmt.  */
838 	  prologue_cost += record_stmt_cost
839 	      (cost_vec, 1,
840 	       dt == vect_external_def
841 	       ? (elt ? scalar_to_vec : vec_construct)
842 	       : vector_load,
843 	       stmt_info, 0, vect_prologue);
844 	  nelt = 0;
845 	}
846     }
847 
848   return prologue_cost;
849 }
850 
851 /* Function vect_model_simple_cost.
852 
853    Models cost for simple operations, i.e. those that only emit ncopies of a
854    single op.  Right now, this does not account for multiple insns that could
855    be generated for the single vector op.  We will handle that shortly.  */
856 
857 static void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,slp_tree node,stmt_vector_for_cost * cost_vec)858 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
859 			enum vect_def_type *dt,
860 			int ndts,
861 			slp_tree node,
862 			stmt_vector_for_cost *cost_vec)
863 {
864   int inside_cost = 0, prologue_cost = 0;
865 
866   gcc_assert (cost_vec != NULL);
867 
868   /* ???  Somehow we need to fix this at the callers.  */
869   if (node)
870     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
871 
872   if (node)
873     {
874       /* Scan operands and account for prologue cost of constants/externals.
875 	 ???  This over-estimates cost for multiple uses and should be
876 	 re-engineered.  */
877       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
878       tree lhs = gimple_get_lhs (stmt);
879       for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
880 	{
881 	  tree op = gimple_op (stmt, i);
882 	  enum vect_def_type dt;
883 	  if (!op || op == lhs)
884 	    continue;
885 	  if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
886 	      && (dt == vect_constant_def || dt == vect_external_def))
887 	    prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
888 							    i, dt, cost_vec);
889 	}
890     }
891   else
892     /* Cost the "broadcast" of a scalar operand in to a vector operand.
893        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
894        cost model.  */
895     for (int i = 0; i < ndts; i++)
896       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
898 					   stmt_info, 0, vect_prologue);
899 
900   /* Adjust for two-operator SLP nodes.  */
901   if (node && SLP_TREE_TWO_OPERATORS (node))
902     {
903       ncopies *= 2;
904       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
905 				       stmt_info, 0, vect_body);
906     }
907 
908   /* Pass the inside-of-loop statements to the target-specific cost model.  */
909   inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
910 				   stmt_info, 0, vect_body);
911 
912   if (dump_enabled_p ())
913     dump_printf_loc (MSG_NOTE, vect_location,
914                      "vect_model_simple_cost: inside_cost = %d, "
915                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
916 }
917 
918 
919 /* Model cost for type demotion and promotion operations.  PWR is normally
920    zero for single-step promotions and demotions.  It will be one if
921    two-step promotion/demotion is required, and so on.  Each additional
922    step doubles the number of instructions required.  */
923 
924 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr,stmt_vector_for_cost * cost_vec)925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
926 				    enum vect_def_type *dt, int pwr,
927 				    stmt_vector_for_cost *cost_vec)
928 {
929   int i, tmp;
930   int inside_cost = 0, prologue_cost = 0;
931 
932   for (i = 0; i < pwr + 1; i++)
933     {
934       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
935 	(i + 1) : i;
936       inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
937 				       vec_promote_demote, stmt_info, 0,
938 				       vect_body);
939     }
940 
941   /* FORNOW: Assuming maximum 2 args per stmts.  */
942   for (i = 0; i < 2; i++)
943     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
944       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
945 					 stmt_info, 0, vect_prologue);
946 
947   if (dump_enabled_p ())
948     dump_printf_loc (MSG_NOTE, vect_location,
949                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
950                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 }
952 
953 /* Returns true if the current function returns DECL.  */
954 
955 static bool
cfun_returns(tree decl)956 cfun_returns (tree decl)
957 {
958   edge_iterator ei;
959   edge e;
960   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
961     {
962       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
963       if (!ret)
964 	continue;
965       if (gimple_return_retval (ret) == decl)
966 	return true;
967       /* We often end up with an aggregate copy to the result decl,
968          handle that case as well.  First skip intermediate clobbers
969 	 though.  */
970       gimple *def = ret;
971       do
972 	{
973 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
974 	}
975       while (gimple_clobber_p (def));
976       if (is_a <gassign *> (def)
977 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
978 	  && gimple_assign_rhs1 (def) == decl)
979 	return true;
980     }
981   return false;
982 }
983 
984 /* Function vect_model_store_cost
985 
986    Models cost for stores.  In the case of grouped accesses, one access
987    has the overhead of the grouped access attributed to it.  */
988 
989 static void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type dt,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)990 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
991 		       enum vect_def_type dt,
992 		       vect_memory_access_type memory_access_type,
993 		       vec_load_store_type vls_type, slp_tree slp_node,
994 		       stmt_vector_for_cost *cost_vec)
995 {
996   unsigned int inside_cost = 0, prologue_cost = 0;
997   stmt_vec_info first_stmt_info = stmt_info;
998   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
999 
1000   /* ???  Somehow we need to fix this at the callers.  */
1001   if (slp_node)
1002     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1003 
1004   if (vls_type == VLS_STORE_INVARIANT)
1005     {
1006       if (slp_node)
1007 	prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1008 							1, dt, cost_vec);
1009       else
1010 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1011 					   stmt_info, 0, vect_prologue);
1012     }
1013 
1014   /* Grouped stores update all elements in the group at once,
1015      so we want the DR for the first statement.  */
1016   if (!slp_node && grouped_access_p)
1017     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1018 
1019   /* True if we should include any once-per-group costs as well as
1020      the cost of the statement itself.  For SLP we only get called
1021      once per group anyhow.  */
1022   bool first_stmt_p = (first_stmt_info == stmt_info);
1023 
1024   /* We assume that the cost of a single store-lanes instruction is
1025      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
1026      access is instead being provided by a permute-and-store operation,
1027      include the cost of the permutes.  */
1028   if (first_stmt_p
1029       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1030     {
1031       /* Uses a high and low interleave or shuffle operations for each
1032 	 needed permute.  */
1033       int group_size = DR_GROUP_SIZE (first_stmt_info);
1034       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1035       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1036 				      stmt_info, 0, vect_body);
1037 
1038       if (dump_enabled_p ())
1039         dump_printf_loc (MSG_NOTE, vect_location,
1040                          "vect_model_store_cost: strided group_size = %d .\n",
1041                          group_size);
1042     }
1043 
1044   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1045   /* Costs of the stores.  */
1046   if (memory_access_type == VMAT_ELEMENTWISE
1047       || memory_access_type == VMAT_GATHER_SCATTER)
1048     {
1049       /* N scalar stores plus extracting the elements.  */
1050       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1051       inside_cost += record_stmt_cost (cost_vec,
1052 				       ncopies * assumed_nunits,
1053 				       scalar_store, stmt_info, 0, vect_body);
1054     }
1055   else
1056     vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1057 
1058   if (memory_access_type == VMAT_ELEMENTWISE
1059       || memory_access_type == VMAT_STRIDED_SLP)
1060     {
1061       /* N scalar stores plus extracting the elements.  */
1062       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1063       inside_cost += record_stmt_cost (cost_vec,
1064 				       ncopies * assumed_nunits,
1065 				       vec_to_scalar, stmt_info, 0, vect_body);
1066     }
1067 
1068   /* When vectorizing a store into the function result assign
1069      a penalty if the function returns in a multi-register location.
1070      In this case we assume we'll end up with having to spill the
1071      vector result and do piecewise loads as a conservative estimate.  */
1072   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1073   if (base
1074       && (TREE_CODE (base) == RESULT_DECL
1075 	  || (DECL_P (base) && cfun_returns (base)))
1076       && !aggregate_value_p (base, cfun->decl))
1077     {
1078       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1079       /* ???  Handle PARALLEL in some way.  */
1080       if (REG_P (reg))
1081 	{
1082 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1083 	  /* Assume that a single reg-reg move is possible and cheap,
1084 	     do not account for vector to gp register move cost.  */
1085 	  if (nregs > 1)
1086 	    {
1087 	      /* Spill.  */
1088 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1089 						 vector_store,
1090 						 stmt_info, 0, vect_epilogue);
1091 	      /* Loads.  */
1092 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1093 						 scalar_load,
1094 						 stmt_info, 0, vect_epilogue);
1095 	    }
1096 	}
1097     }
1098 
1099   if (dump_enabled_p ())
1100     dump_printf_loc (MSG_NOTE, vect_location,
1101                      "vect_model_store_cost: inside_cost = %d, "
1102                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1103 }
1104 
1105 
1106 /* Calculate cost of DR's memory access.  */
1107 void
vect_get_store_cost(stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1108 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1109 		     unsigned int *inside_cost,
1110 		     stmt_vector_for_cost *body_cost_vec)
1111 {
1112   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1113   int alignment_support_scheme
1114     = vect_supportable_dr_alignment (dr_info, false);
1115 
1116   switch (alignment_support_scheme)
1117     {
1118     case dr_aligned:
1119       {
1120 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 					  vector_store, stmt_info, 0,
1122 					  vect_body);
1123 
1124         if (dump_enabled_p ())
1125           dump_printf_loc (MSG_NOTE, vect_location,
1126                            "vect_model_store_cost: aligned.\n");
1127         break;
1128       }
1129 
1130     case dr_unaligned_supported:
1131       {
1132         /* Here, we assign an additional cost for the unaligned store.  */
1133 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1134 					  unaligned_store, stmt_info,
1135 					  DR_MISALIGNMENT (dr_info),
1136 					  vect_body);
1137         if (dump_enabled_p ())
1138           dump_printf_loc (MSG_NOTE, vect_location,
1139                            "vect_model_store_cost: unaligned supported by "
1140                            "hardware.\n");
1141         break;
1142       }
1143 
1144     case dr_unaligned_unsupported:
1145       {
1146         *inside_cost = VECT_MAX_COST;
1147 
1148         if (dump_enabled_p ())
1149           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1150                            "vect_model_store_cost: unsupported access.\n");
1151         break;
1152       }
1153 
1154     default:
1155       gcc_unreachable ();
1156     }
1157 }
1158 
1159 
1160 /* Function vect_model_load_cost
1161 
1162    Models cost for loads.  In the case of grouped accesses, one access has
1163    the overhead of the grouped access attributed to it.  Since unaligned
1164    accesses are supported for loads, we also account for the costs of the
1165    access scheme chosen.  */
1166 
1167 static void
vect_model_load_cost(stmt_vec_info stmt_info,unsigned ncopies,vect_memory_access_type memory_access_type,slp_instance instance,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1168 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1169 		      vect_memory_access_type memory_access_type,
1170 		      slp_instance instance,
1171 		      slp_tree slp_node,
1172 		      stmt_vector_for_cost *cost_vec)
1173 {
1174   unsigned int inside_cost = 0, prologue_cost = 0;
1175   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1176 
1177   gcc_assert (cost_vec);
1178 
1179   /* ???  Somehow we need to fix this at the callers.  */
1180   if (slp_node)
1181     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1182 
1183   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1184     {
1185       /* If the load is permuted then the alignment is determined by
1186 	 the first group element not by the first scalar stmt DR.  */
1187       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1188       /* Record the cost for the permutation.  */
1189       unsigned n_perms;
1190       unsigned assumed_nunits
1191 	= vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1192       unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1193       vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1194 				    slp_vf, instance, true,
1195 				    &n_perms);
1196       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1197 				       first_stmt_info, 0, vect_body);
1198       /* And adjust the number of loads performed.  This handles
1199 	 redundancies as well as loads that are later dead.  */
1200       auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1201       bitmap_clear (perm);
1202       for (unsigned i = 0;
1203 	   i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1204 	bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1205       ncopies = 0;
1206       bool load_seen = false;
1207       for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1208 	{
1209 	  if (i % assumed_nunits == 0)
1210 	    {
1211 	      if (load_seen)
1212 		ncopies++;
1213 	      load_seen = false;
1214 	    }
1215 	  if (bitmap_bit_p (perm, i))
1216 	    load_seen = true;
1217 	}
1218       if (load_seen)
1219 	ncopies++;
1220       gcc_assert (ncopies
1221 		  <= (DR_GROUP_SIZE (first_stmt_info)
1222 		      - DR_GROUP_GAP (first_stmt_info)
1223 		      + assumed_nunits - 1) / assumed_nunits);
1224     }
1225 
1226   /* Grouped loads read all elements in the group at once,
1227      so we want the DR for the first statement.  */
1228   stmt_vec_info first_stmt_info = stmt_info;
1229   if (!slp_node && grouped_access_p)
1230     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1231 
1232   /* True if we should include any once-per-group costs as well as
1233      the cost of the statement itself.  For SLP we only get called
1234      once per group anyhow.  */
1235   bool first_stmt_p = (first_stmt_info == stmt_info);
1236 
1237   /* We assume that the cost of a single load-lanes instruction is
1238      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1239      access is instead being provided by a load-and-permute operation,
1240      include the cost of the permutes.  */
1241   if (first_stmt_p
1242       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1243     {
1244       /* Uses an even and odd extract operations or shuffle operations
1245 	 for each needed permute.  */
1246       int group_size = DR_GROUP_SIZE (first_stmt_info);
1247       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1248       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1249 				       stmt_info, 0, vect_body);
1250 
1251       if (dump_enabled_p ())
1252         dump_printf_loc (MSG_NOTE, vect_location,
1253                          "vect_model_load_cost: strided group_size = %d .\n",
1254                          group_size);
1255     }
1256 
1257   /* The loads themselves.  */
1258   if (memory_access_type == VMAT_ELEMENTWISE
1259       || memory_access_type == VMAT_GATHER_SCATTER)
1260     {
1261       /* N scalar loads plus gathering them into a vector.  */
1262       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1264       inside_cost += record_stmt_cost (cost_vec,
1265 				       ncopies * assumed_nunits,
1266 				       scalar_load, stmt_info, 0, vect_body);
1267     }
1268   else
1269     vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1270 			&inside_cost, &prologue_cost,
1271 			cost_vec, cost_vec, true);
1272   if (memory_access_type == VMAT_ELEMENTWISE
1273       || memory_access_type == VMAT_STRIDED_SLP)
1274     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1275 				     stmt_info, 0, vect_body);
1276 
1277   if (dump_enabled_p ())
1278     dump_printf_loc (MSG_NOTE, vect_location,
1279                      "vect_model_load_cost: inside_cost = %d, "
1280                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1281 }
1282 
1283 
1284 /* Calculate cost of DR's memory access.  */
1285 void
vect_get_load_cost(stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1286 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1287 		    bool add_realign_cost, unsigned int *inside_cost,
1288 		    unsigned int *prologue_cost,
1289 		    stmt_vector_for_cost *prologue_cost_vec,
1290 		    stmt_vector_for_cost *body_cost_vec,
1291 		    bool record_prologue_costs)
1292 {
1293   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1294   int alignment_support_scheme
1295     = vect_supportable_dr_alignment (dr_info, false);
1296 
1297   switch (alignment_support_scheme)
1298     {
1299     case dr_aligned:
1300       {
1301 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1302 					  stmt_info, 0, vect_body);
1303 
1304         if (dump_enabled_p ())
1305           dump_printf_loc (MSG_NOTE, vect_location,
1306                            "vect_model_load_cost: aligned.\n");
1307 
1308         break;
1309       }
1310     case dr_unaligned_supported:
1311       {
1312         /* Here, we assign an additional cost for the unaligned load.  */
1313 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1314 					  unaligned_load, stmt_info,
1315 					  DR_MISALIGNMENT (dr_info),
1316 					  vect_body);
1317 
1318         if (dump_enabled_p ())
1319           dump_printf_loc (MSG_NOTE, vect_location,
1320                            "vect_model_load_cost: unaligned supported by "
1321                            "hardware.\n");
1322 
1323         break;
1324       }
1325     case dr_explicit_realign:
1326       {
1327 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1328 					  vector_load, stmt_info, 0, vect_body);
1329 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1330 					  vec_perm, stmt_info, 0, vect_body);
1331 
1332         /* FIXME: If the misalignment remains fixed across the iterations of
1333            the containing loop, the following cost should be added to the
1334            prologue costs.  */
1335         if (targetm.vectorize.builtin_mask_for_load)
1336 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1337 					    stmt_info, 0, vect_body);
1338 
1339         if (dump_enabled_p ())
1340           dump_printf_loc (MSG_NOTE, vect_location,
1341                            "vect_model_load_cost: explicit realign\n");
1342 
1343         break;
1344       }
1345     case dr_explicit_realign_optimized:
1346       {
1347         if (dump_enabled_p ())
1348           dump_printf_loc (MSG_NOTE, vect_location,
1349                            "vect_model_load_cost: unaligned software "
1350                            "pipelined.\n");
1351 
1352         /* Unaligned software pipeline has a load of an address, an initial
1353            load, and possibly a mask operation to "prime" the loop.  However,
1354            if this is an access in a group of loads, which provide grouped
1355            access, then the above cost should only be considered for one
1356            access in the group.  Inside the loop, there is a load op
1357            and a realignment op.  */
1358 
1359         if (add_realign_cost && record_prologue_costs)
1360           {
1361 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1362 						vector_stmt, stmt_info,
1363 						0, vect_prologue);
1364             if (targetm.vectorize.builtin_mask_for_load)
1365 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1366 						  vector_stmt, stmt_info,
1367 						  0, vect_prologue);
1368           }
1369 
1370 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1371 					  stmt_info, 0, vect_body);
1372 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1373 					  stmt_info, 0, vect_body);
1374 
1375         if (dump_enabled_p ())
1376           dump_printf_loc (MSG_NOTE, vect_location,
1377                            "vect_model_load_cost: explicit realign optimized"
1378                            "\n");
1379 
1380         break;
1381       }
1382 
1383     case dr_unaligned_unsupported:
1384       {
1385         *inside_cost = VECT_MAX_COST;
1386 
1387         if (dump_enabled_p ())
1388           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1389                            "vect_model_load_cost: unsupported access.\n");
1390         break;
1391       }
1392 
1393     default:
1394       gcc_unreachable ();
1395     }
1396 }
1397 
1398 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1399    the loop preheader for the vectorized stmt STMT_VINFO.  */
1400 
1401 static void
vect_init_vector_1(stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1402 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1403 		    gimple_stmt_iterator *gsi)
1404 {
1405   if (gsi)
1406     vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1407   else
1408     {
1409       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1410 
1411       if (loop_vinfo)
1412         {
1413           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1414 	  basic_block new_bb;
1415 	  edge pe;
1416 
1417 	  if (nested_in_vect_loop_p (loop, stmt_vinfo))
1418 	    loop = loop->inner;
1419 
1420 	  pe = loop_preheader_edge (loop);
1421           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1422           gcc_assert (!new_bb);
1423 	}
1424       else
1425        {
1426           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1427           basic_block bb;
1428           gimple_stmt_iterator gsi_bb_start;
1429 
1430           gcc_assert (bb_vinfo);
1431           bb = BB_VINFO_BB (bb_vinfo);
1432           gsi_bb_start = gsi_after_labels (bb);
1433           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1434        }
1435     }
1436 
1437   if (dump_enabled_p ())
1438     dump_printf_loc (MSG_NOTE, vect_location,
1439 		     "created new init_stmt: %G", new_stmt);
1440 }
1441 
1442 /* Function vect_init_vector.
1443 
1444    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1445    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1446    vector type a vector with all elements equal to VAL is created first.
1447    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1448    initialization at the loop preheader.
1449    Return the DEF of INIT_STMT.
1450    It will be used in the vectorization of STMT_INFO.  */
1451 
1452 tree
vect_init_vector(stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1453 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1454 		  gimple_stmt_iterator *gsi)
1455 {
1456   gimple *init_stmt;
1457   tree new_temp;
1458 
1459   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1460   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1461     {
1462       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1463       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1464 	{
1465 	  /* Scalar boolean value should be transformed into
1466 	     all zeros or all ones value before building a vector.  */
1467 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1468 	    {
1469 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1470 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1471 
1472 	      if (CONSTANT_CLASS_P (val))
1473 		val = integer_zerop (val) ? false_val : true_val;
1474 	      else
1475 		{
1476 		  new_temp = make_ssa_name (TREE_TYPE (type));
1477 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1478 						   val, true_val, false_val);
1479 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1480 		  val = new_temp;
1481 		}
1482 	    }
1483 	  else if (CONSTANT_CLASS_P (val))
1484 	    val = fold_convert (TREE_TYPE (type), val);
1485 	  else
1486 	    {
1487 	      new_temp = make_ssa_name (TREE_TYPE (type));
1488 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1489 		init_stmt = gimple_build_assign (new_temp,
1490 						 fold_build1 (VIEW_CONVERT_EXPR,
1491 							      TREE_TYPE (type),
1492 							      val));
1493 	      else
1494 		init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1495 	      vect_init_vector_1 (stmt_info, init_stmt, gsi);
1496 	      val = new_temp;
1497 	    }
1498 	}
1499       val = build_vector_from_val (type, val);
1500     }
1501 
1502   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1503   init_stmt = gimple_build_assign  (new_temp, val);
1504   vect_init_vector_1 (stmt_info, init_stmt, gsi);
1505   return new_temp;
1506 }
1507 
1508 /* Function vect_get_vec_def_for_operand_1.
1509 
1510    For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1511    with type DT that will be used in the vectorized stmt.  */
1512 
1513 tree
vect_get_vec_def_for_operand_1(stmt_vec_info def_stmt_info,enum vect_def_type dt)1514 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1515 				enum vect_def_type dt)
1516 {
1517   tree vec_oprnd;
1518   stmt_vec_info vec_stmt_info;
1519 
1520   switch (dt)
1521     {
1522     /* operand is a constant or a loop invariant.  */
1523     case vect_constant_def:
1524     case vect_external_def:
1525       /* Code should use vect_get_vec_def_for_operand.  */
1526       gcc_unreachable ();
1527 
1528     /* Operand is defined by a loop header phi.  In case of nested
1529        cycles we also may have uses of the backedge def.  */
1530     case vect_reduction_def:
1531     case vect_double_reduction_def:
1532     case vect_nested_cycle:
1533     case vect_induction_def:
1534       gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1535 		  || dt == vect_nested_cycle);
1536       /* Fallthru.  */
1537 
1538     /* operand is defined inside the loop.  */
1539     case vect_internal_def:
1540       {
1541         /* Get the def from the vectorized stmt.  */
1542 	vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1543 	/* Get vectorized pattern statement.  */
1544 	if (!vec_stmt_info
1545 	    && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1546 	    && !STMT_VINFO_RELEVANT (def_stmt_info))
1547 	  vec_stmt_info = (STMT_VINFO_VEC_STMT
1548 			   (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1549 	gcc_assert (vec_stmt_info);
1550 	if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1551 	  vec_oprnd = PHI_RESULT (phi);
1552 	else
1553 	  vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1554 	return vec_oprnd;
1555       }
1556 
1557     default:
1558       gcc_unreachable ();
1559     }
1560 }
1561 
1562 
1563 /* Function vect_get_vec_def_for_operand.
1564 
1565    OP is an operand in STMT_VINFO.  This function returns a (vector) def
1566    that will be used in the vectorized stmt for STMT_VINFO.
1567 
1568    In the case that OP is an SSA_NAME which is defined in the loop, then
1569    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1570 
1571    In case OP is an invariant or constant, a new stmt that creates a vector def
1572    needs to be introduced.  VECTYPE may be used to specify a required type for
1573    vector invariant.  */
1574 
1575 tree
vect_get_vec_def_for_operand(tree op,stmt_vec_info stmt_vinfo,tree vectype)1576 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1577 {
1578   gimple *def_stmt;
1579   enum vect_def_type dt;
1580   bool is_simple_use;
1581   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1582 
1583   if (dump_enabled_p ())
1584     dump_printf_loc (MSG_NOTE, vect_location,
1585 		     "vect_get_vec_def_for_operand: %T\n", op);
1586 
1587   stmt_vec_info def_stmt_info;
1588   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1589 				      &def_stmt_info, &def_stmt);
1590   gcc_assert (is_simple_use);
1591   if (def_stmt && dump_enabled_p ())
1592     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1593 
1594   if (dt == vect_constant_def || dt == vect_external_def)
1595     {
1596       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1597       tree vector_type;
1598 
1599       if (vectype)
1600 	vector_type = vectype;
1601       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1602 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1603 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1604       else
1605 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1606 
1607       gcc_assert (vector_type);
1608       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1609     }
1610   else
1611     return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1612 }
1613 
1614 
1615 /* Function vect_get_vec_def_for_stmt_copy
1616 
1617    Return a vector-def for an operand.  This function is used when the
1618    vectorized stmt to be created (by the caller to this function) is a "copy"
1619    created in case the vectorized result cannot fit in one vector, and several
1620    copies of the vector-stmt are required.  In this case the vector-def is
1621    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1622    of the stmt that defines VEC_OPRND.  VINFO describes the vectorization.
1623 
1624    Context:
1625         In case the vectorization factor (VF) is bigger than the number
1626    of elements that can fit in a vectype (nunits), we have to generate
1627    more than one vector stmt to vectorize the scalar stmt.  This situation
1628    arises when there are multiple data-types operated upon in the loop; the
1629    smallest data-type determines the VF, and as a result, when vectorizing
1630    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1631    vector stmt (each computing a vector of 'nunits' results, and together
1632    computing 'VF' results in each iteration).  This function is called when
1633    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1634    which VF=16 and nunits=4, so the number of copies required is 4):
1635 
1636    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1637 
1638    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1639                         VS1.1:  vx.1 = memref1      VS1.2
1640                         VS1.2:  vx.2 = memref2      VS1.3
1641                         VS1.3:  vx.3 = memref3
1642 
1643    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1644                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1645                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1646                         VSnew.3:  vz3 = vx.3 + ...
1647 
1648    The vectorization of S1 is explained in vectorizable_load.
1649    The vectorization of S2:
1650         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1651    the function 'vect_get_vec_def_for_operand' is called to
1652    get the relevant vector-def for each operand of S2.  For operand x it
1653    returns  the vector-def 'vx.0'.
1654 
1655         To create the remaining copies of the vector-stmt (VSnew.j), this
1656    function is called to get the relevant vector-def for each operand.  It is
1657    obtained from the respective VS1.j stmt, which is recorded in the
1658    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1659 
1660         For example, to obtain the vector-def 'vx.1' in order to create the
1661    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1662    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1663    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1664    and return its def ('vx.1').
1665    Overall, to create the above sequence this function will be called 3 times:
1666 	vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1667 	vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1668 	vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2);  */
1669 
1670 tree
vect_get_vec_def_for_stmt_copy(vec_info * vinfo,tree vec_oprnd)1671 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1672 {
1673   stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1674   if (!def_stmt_info)
1675     /* Do nothing; can reuse same def.  */
1676     return vec_oprnd;
1677 
1678   def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1679   gcc_assert (def_stmt_info);
1680   if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1681     vec_oprnd = PHI_RESULT (phi);
1682   else
1683     vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1684   return vec_oprnd;
1685 }
1686 
1687 
1688 /* Get vectorized definitions for the operands to create a copy of an original
1689    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1690 
1691 void
vect_get_vec_defs_for_stmt_copy(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1692 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1693 				 vec<tree> *vec_oprnds0,
1694 				 vec<tree> *vec_oprnds1)
1695 {
1696   tree vec_oprnd = vec_oprnds0->pop ();
1697 
1698   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1699   vec_oprnds0->quick_push (vec_oprnd);
1700 
1701   if (vec_oprnds1 && vec_oprnds1->length ())
1702     {
1703       vec_oprnd = vec_oprnds1->pop ();
1704       vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705       vec_oprnds1->quick_push (vec_oprnd);
1706     }
1707 }
1708 
1709 
1710 /* Get vectorized definitions for OP0 and OP1.  */
1711 
1712 void
vect_get_vec_defs(tree op0,tree op1,stmt_vec_info stmt_info,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1713 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1714 		   vec<tree> *vec_oprnds0,
1715 		   vec<tree> *vec_oprnds1,
1716 		   slp_tree slp_node)
1717 {
1718   if (slp_node)
1719     {
1720       int nops = (op1 == NULL_TREE) ? 1 : 2;
1721       auto_vec<tree> ops (nops);
1722       auto_vec<vec<tree> > vec_defs (nops);
1723 
1724       ops.quick_push (op0);
1725       if (op1)
1726         ops.quick_push (op1);
1727 
1728       vect_get_slp_defs (ops, slp_node, &vec_defs);
1729 
1730       *vec_oprnds0 = vec_defs[0];
1731       if (op1)
1732 	*vec_oprnds1 = vec_defs[1];
1733     }
1734   else
1735     {
1736       tree vec_oprnd;
1737 
1738       vec_oprnds0->create (1);
1739       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1740       vec_oprnds0->quick_push (vec_oprnd);
1741 
1742       if (op1)
1743 	{
1744 	  vec_oprnds1->create (1);
1745 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1746 	  vec_oprnds1->quick_push (vec_oprnd);
1747 	}
1748     }
1749 }
1750 
1751 /* Helper function called by vect_finish_replace_stmt and
1752    vect_finish_stmt_generation.  Set the location of the new
1753    statement and create and return a stmt_vec_info for it.  */
1754 
1755 static stmt_vec_info
vect_finish_stmt_generation_1(stmt_vec_info stmt_info,gimple * vec_stmt)1756 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1757 {
1758   vec_info *vinfo = stmt_info->vinfo;
1759 
1760   stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1761 
1762   if (dump_enabled_p ())
1763     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1764 
1765   gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1766 
1767   /* While EH edges will generally prevent vectorization, stmt might
1768      e.g. be in a must-not-throw region.  Ensure newly created stmts
1769      that could throw are part of the same region.  */
1770   int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1771   if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1772     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1773 
1774   return vec_stmt_info;
1775 }
1776 
1777 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1778    which sets the same scalar result as STMT_INFO did.  Create and return a
1779    stmt_vec_info for VEC_STMT.  */
1780 
1781 stmt_vec_info
vect_finish_replace_stmt(stmt_vec_info stmt_info,gimple * vec_stmt)1782 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1783 {
1784   gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1785 
1786   gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1787   gsi_replace (&gsi, vec_stmt, true);
1788 
1789   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1790 }
1791 
1792 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1793    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1794 
1795 stmt_vec_info
vect_finish_stmt_generation(stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1796 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1797 			     gimple_stmt_iterator *gsi)
1798 {
1799   gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1800 
1801   if (!gsi_end_p (*gsi)
1802       && gimple_has_mem_ops (vec_stmt))
1803     {
1804       gimple *at_stmt = gsi_stmt (*gsi);
1805       tree vuse = gimple_vuse (at_stmt);
1806       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1807 	{
1808 	  tree vdef = gimple_vdef (at_stmt);
1809 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1810 	  /* If we have an SSA vuse and insert a store, update virtual
1811 	     SSA form to avoid triggering the renamer.  Do so only
1812 	     if we can easily see all uses - which is what almost always
1813 	     happens with the way vectorized stmts are inserted.  */
1814 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1815 	      && ((is_gimple_assign (vec_stmt)
1816 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1817 		  || (is_gimple_call (vec_stmt)
1818 		      && !(gimple_call_flags (vec_stmt)
1819 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1820 	    {
1821 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1822 	      gimple_set_vdef (vec_stmt, new_vdef);
1823 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1824 	    }
1825 	}
1826     }
1827   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1828   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1829 }
1830 
1831 /* We want to vectorize a call to combined function CFN with function
1832    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1833    as the types of all inputs.  Check whether this is possible using
1834    an internal function, returning its code if so or IFN_LAST if not.  */
1835 
1836 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1837 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1838 				tree vectype_out, tree vectype_in)
1839 {
1840   internal_fn ifn;
1841   if (internal_fn_p (cfn))
1842     ifn = as_internal_fn (cfn);
1843   else
1844     ifn = associated_internal_fn (fndecl);
1845   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1846     {
1847       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1848       if (info.vectorizable)
1849 	{
1850 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1851 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1852 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1853 					      OPTIMIZE_FOR_SPEED))
1854 	    return ifn;
1855 	}
1856     }
1857   return IFN_LAST;
1858 }
1859 
1860 
1861 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1862 				  gimple_stmt_iterator *);
1863 
1864 /* Check whether a load or store statement in the loop described by
1865    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1866    whether the vectorizer pass has the appropriate support, as well as
1867    whether the target does.
1868 
1869    VLS_TYPE says whether the statement is a load or store and VECTYPE
1870    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1871    says how the load or store is going to be implemented and GROUP_SIZE
1872    is the number of load or store statements in the containing group.
1873    If the access is a gather load or scatter store, GS_INFO describes
1874    its arguments.
1875 
1876    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877    supported, otherwise record the required mask types.  */
1878 
1879 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 			  vec_load_store_type vls_type, int group_size,
1882 			  vect_memory_access_type memory_access_type,
1883 			  gather_scatter_info *gs_info)
1884 {
1885   /* Invariant loads need no special support.  */
1886   if (memory_access_type == VMAT_INVARIANT)
1887     return;
1888 
1889   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890   machine_mode vecmode = TYPE_MODE (vectype);
1891   bool is_load = (vls_type == VLS_LOAD);
1892   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1893     {
1894       if (is_load
1895 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1896 	  : !vect_store_lanes_supported (vectype, group_size, true))
1897 	{
1898 	  if (dump_enabled_p ())
1899 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 			     "can't use a fully-masked loop because the"
1901 			     " target doesn't have an appropriate masked"
1902 			     " load/store-lanes instruction.\n");
1903 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 	  return;
1905 	}
1906       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1908       return;
1909     }
1910 
1911   if (memory_access_type == VMAT_GATHER_SCATTER)
1912     {
1913       internal_fn ifn = (is_load
1914 			 ? IFN_MASK_GATHER_LOAD
1915 			 : IFN_MASK_SCATTER_STORE);
1916       tree offset_type = TREE_TYPE (gs_info->offset);
1917       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1918 						   gs_info->memory_type,
1919 						   TYPE_SIGN (offset_type),
1920 						   gs_info->scale))
1921 	{
1922 	  if (dump_enabled_p ())
1923 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924 			     "can't use a fully-masked loop because the"
1925 			     " target doesn't have an appropriate masked"
1926 			     " gather load or scatter store instruction.\n");
1927 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1928 	  return;
1929 	}
1930       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1931       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1932       return;
1933     }
1934 
1935   if (memory_access_type != VMAT_CONTIGUOUS
1936       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1937     {
1938       /* Element X of the data must come from iteration i * VF + X of the
1939 	 scalar loop.  We need more work to support other mappings.  */
1940       if (dump_enabled_p ())
1941 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 			 "can't use a fully-masked loop because an access"
1943 			 " isn't contiguous.\n");
1944       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1945       return;
1946     }
1947 
1948   machine_mode mask_mode;
1949   if (!(targetm.vectorize.get_mask_mode
1950 	(GET_MODE_NUNITS (vecmode),
1951 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1952       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1953     {
1954       if (dump_enabled_p ())
1955 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 			 "can't use a fully-masked loop because the target"
1957 			 " doesn't have the appropriate masked load or"
1958 			 " store.\n");
1959       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1960       return;
1961     }
1962   /* We might load more scalars than we need for permuting SLP loads.
1963      We checked in get_group_load_store_type that the extra elements
1964      don't leak into a new vector.  */
1965   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1967   unsigned int nvectors;
1968   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1969     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1970   else
1971     gcc_unreachable ();
1972 }
1973 
1974 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1975    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1976    that needs to be applied to all loads and stores in a vectorized loop.
1977    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1978 
1979    MASK_TYPE is the type of both masks.  If new statements are needed,
1980    insert them before GSI.  */
1981 
1982 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1983 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1984 			 gimple_stmt_iterator *gsi)
1985 {
1986   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1987   if (!loop_mask)
1988     return vec_mask;
1989 
1990   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1991   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1992   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1993 					  vec_mask, loop_mask);
1994   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1995   return and_res;
1996 }
1997 
1998 /* Determine whether we can use a gather load or scatter store to vectorize
1999    strided load or store STMT_INFO by truncating the current offset to a
2000    smaller width.  We need to be able to construct an offset vector:
2001 
2002      { 0, X, X*2, X*3, ... }
2003 
2004    without loss of precision, where X is STMT_INFO's DR_STEP.
2005 
2006    Return true if this is possible, describing the gather load or scatter
2007    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
2008 
2009 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2010 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2011 				     loop_vec_info loop_vinfo, bool masked_p,
2012 				     gather_scatter_info *gs_info)
2013 {
2014   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2015   data_reference *dr = dr_info->dr;
2016   tree step = DR_STEP (dr);
2017   if (TREE_CODE (step) != INTEGER_CST)
2018     {
2019       /* ??? Perhaps we could use range information here?  */
2020       if (dump_enabled_p ())
2021 	dump_printf_loc (MSG_NOTE, vect_location,
2022 			 "cannot truncate variable step.\n");
2023       return false;
2024     }
2025 
2026   /* Get the number of bits in an element.  */
2027   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2028   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2029   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2030 
2031   /* Set COUNT to the upper limit on the number of elements - 1.
2032      Start with the maximum vectorization factor.  */
2033   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2034 
2035   /* Try lowering COUNT to the number of scalar latch iterations.  */
2036   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2037   widest_int max_iters;
2038   if (max_loop_iterations (loop, &max_iters)
2039       && max_iters < count)
2040     count = max_iters.to_shwi ();
2041 
2042   /* Try scales of 1 and the element size.  */
2043   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2044   wi::overflow_type overflow = wi::OVF_NONE;
2045   for (int i = 0; i < 2; ++i)
2046     {
2047       int scale = scales[i];
2048       widest_int factor;
2049       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2050 	continue;
2051 
2052       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2053 	 in OFFSET_BITS bits.  */
2054       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2055       if (overflow)
2056 	continue;
2057       signop sign = range >= 0 ? UNSIGNED : SIGNED;
2058       if (wi::min_precision (range, sign) > element_bits)
2059 	{
2060 	  overflow = wi::OVF_UNKNOWN;
2061 	  continue;
2062 	}
2063 
2064       /* See whether the target supports the operation.  */
2065       tree memory_type = TREE_TYPE (DR_REF (dr));
2066       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2067 				     memory_type, element_bits, sign, scale,
2068 				     &gs_info->ifn, &gs_info->element_type))
2069 	continue;
2070 
2071       tree offset_type = build_nonstandard_integer_type (element_bits,
2072 							 sign == UNSIGNED);
2073 
2074       gs_info->decl = NULL_TREE;
2075       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2076 	 but we don't need to store that here.  */
2077       gs_info->base = NULL_TREE;
2078       gs_info->offset = fold_convert (offset_type, step);
2079       gs_info->offset_dt = vect_constant_def;
2080       gs_info->offset_vectype = NULL_TREE;
2081       gs_info->scale = scale;
2082       gs_info->memory_type = memory_type;
2083       return true;
2084     }
2085 
2086   if (overflow && dump_enabled_p ())
2087     dump_printf_loc (MSG_NOTE, vect_location,
2088 		     "truncating gather/scatter offset to %d bits"
2089 		     " might change its value.\n", element_bits);
2090 
2091   return false;
2092 }
2093 
2094 /* Return true if we can use gather/scatter internal functions to
2095    vectorize STMT_INFO, which is a grouped or strided load or store.
2096    MASKED_P is true if load or store is conditional.  When returning
2097    true, fill in GS_INFO with the information required to perform the
2098    operation.  */
2099 
2100 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2101 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2102 				    loop_vec_info loop_vinfo, bool masked_p,
2103 				    gather_scatter_info *gs_info)
2104 {
2105   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2106       || gs_info->decl)
2107     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2108 						masked_p, gs_info);
2109 
2110   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2111   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2112   tree offset_type = TREE_TYPE (gs_info->offset);
2113   unsigned int offset_bits = TYPE_PRECISION (offset_type);
2114 
2115   /* Enforced by vect_check_gather_scatter.  */
2116   gcc_assert (element_bits >= offset_bits);
2117 
2118   /* If the elements are wider than the offset, convert the offset to the
2119      same width, without changing its sign.  */
2120   if (element_bits > offset_bits)
2121     {
2122       bool unsigned_p = TYPE_UNSIGNED (offset_type);
2123       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2124       gs_info->offset = fold_convert (offset_type, gs_info->offset);
2125     }
2126 
2127   if (dump_enabled_p ())
2128     dump_printf_loc (MSG_NOTE, vect_location,
2129 		     "using gather/scatter for strided/grouped access,"
2130 		     " scale = %d\n", gs_info->scale);
2131 
2132   return true;
2133 }
2134 
2135 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2136    elements with a known constant step.  Return -1 if that step
2137    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
2138 
2139 static int
compare_step_with_zero(stmt_vec_info stmt_info)2140 compare_step_with_zero (stmt_vec_info stmt_info)
2141 {
2142   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2143   return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2144 			       size_zero_node);
2145 }
2146 
2147 /* If the target supports a permute mask that reverses the elements in
2148    a vector of type VECTYPE, return that mask, otherwise return null.  */
2149 
2150 static tree
perm_mask_for_reverse(tree vectype)2151 perm_mask_for_reverse (tree vectype)
2152 {
2153   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2154 
2155   /* The encoding has a single stepped pattern.  */
2156   vec_perm_builder sel (nunits, 1, 3);
2157   for (int i = 0; i < 3; ++i)
2158     sel.quick_push (nunits - 1 - i);
2159 
2160   vec_perm_indices indices (sel, 1, nunits);
2161   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2162     return NULL_TREE;
2163   return vect_gen_perm_mask_checked (vectype, indices);
2164 }
2165 
2166 /* STMT_INFO is either a masked or unconditional store.  Return the value
2167    being stored.  */
2168 
2169 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2170 vect_get_store_rhs (stmt_vec_info stmt_info)
2171 {
2172   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2173     {
2174       gcc_assert (gimple_assign_single_p (assign));
2175       return gimple_assign_rhs1 (assign);
2176     }
2177   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2178     {
2179       internal_fn ifn = gimple_call_internal_fn (call);
2180       int index = internal_fn_stored_value_index (ifn);
2181       gcc_assert (index >= 0);
2182       return gimple_call_arg (call, index);
2183     }
2184   gcc_unreachable ();
2185 }
2186 
2187 /* A subroutine of get_load_store_type, with a subset of the same
2188    arguments.  Handle the case where STMT_INFO is part of a grouped load
2189    or store.
2190 
2191    For stores, the statements in the group are all consecutive
2192    and there is no gap at the end.  For loads, the statements in the
2193    group might not be consecutive; there can be gaps between statements
2194    as well as at the end.  */
2195 
2196 static bool
get_group_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2197 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2198 			   bool masked_p, vec_load_store_type vls_type,
2199 			   vect_memory_access_type *memory_access_type,
2200 			   gather_scatter_info *gs_info)
2201 {
2202   vec_info *vinfo = stmt_info->vinfo;
2203   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2204   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2205   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2206   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2207   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2208   bool single_element_p = (stmt_info == first_stmt_info
2209 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2210   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2211   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2212 
2213   /* True if the vectorized statements would access beyond the last
2214      statement in the group.  */
2215   bool overrun_p = false;
2216 
2217   /* True if we can cope with such overrun by peeling for gaps, so that
2218      there is at least one final scalar iteration after the vector loop.  */
2219   bool can_overrun_p = (!masked_p
2220 			&& vls_type == VLS_LOAD
2221 			&& loop_vinfo
2222 			&& !loop->inner);
2223 
2224   /* There can only be a gap at the end of the group if the stride is
2225      known at compile time.  */
2226   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2227 
2228   /* Stores can't yet have gaps.  */
2229   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2230 
2231   if (slp)
2232     {
2233       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2234 	{
2235 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 	     separated by the stride, until we have a complete vector.
2237 	     Fall back to scalar accesses if that isn't possible.  */
2238 	  if (multiple_p (nunits, group_size))
2239 	    *memory_access_type = VMAT_STRIDED_SLP;
2240 	  else
2241 	    *memory_access_type = VMAT_ELEMENTWISE;
2242 	}
2243       else
2244 	{
2245 	  overrun_p = loop_vinfo && gap != 0;
2246 	  if (overrun_p && vls_type != VLS_LOAD)
2247 	    {
2248 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249 			       "Grouped store with gaps requires"
2250 			       " non-consecutive accesses\n");
2251 	      return false;
2252 	    }
2253 	  /* An overrun is fine if the trailing elements are smaller
2254 	     than the alignment boundary B.  Every vector access will
2255 	     be a multiple of B and so we are guaranteed to access a
2256 	     non-gap element in the same B-sized block.  */
2257 	  if (overrun_p
2258 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
2259 			/ vect_get_scalar_dr_size (first_dr_info)))
2260 	    overrun_p = false;
2261 	  if (overrun_p && !can_overrun_p)
2262 	    {
2263 	      if (dump_enabled_p ())
2264 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 				 "Peeling for outer loop is not supported\n");
2266 	      return false;
2267 	    }
2268 	  *memory_access_type = VMAT_CONTIGUOUS;
2269 	}
2270     }
2271   else
2272     {
2273       /* We can always handle this case using elementwise accesses,
2274 	 but see if something more efficient is available.  */
2275       *memory_access_type = VMAT_ELEMENTWISE;
2276 
2277       /* If there is a gap at the end of the group then these optimizations
2278 	 would access excess elements in the last iteration.  */
2279       bool would_overrun_p = (gap != 0);
2280       /* An overrun is fine if the trailing elements are smaller than the
2281 	 alignment boundary B.  Every vector access will be a multiple of B
2282 	 and so we are guaranteed to access a non-gap element in the
2283 	 same B-sized block.  */
2284       if (would_overrun_p
2285 	  && !masked_p
2286 	  && gap < (vect_known_alignment_in_bytes (first_dr_info)
2287 		    / vect_get_scalar_dr_size (first_dr_info)))
2288 	would_overrun_p = false;
2289 
2290       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2291 	  && (can_overrun_p || !would_overrun_p)
2292 	  && compare_step_with_zero (stmt_info) > 0)
2293 	{
2294 	  /* First cope with the degenerate case of a single-element
2295 	     vector.  */
2296 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2297 	    *memory_access_type = VMAT_CONTIGUOUS;
2298 
2299 	  /* Otherwise try using LOAD/STORE_LANES.  */
2300 	  if (*memory_access_type == VMAT_ELEMENTWISE
2301 	      && (vls_type == VLS_LOAD
2302 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2303 		  : vect_store_lanes_supported (vectype, group_size,
2304 						masked_p)))
2305 	    {
2306 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2307 	      overrun_p = would_overrun_p;
2308 	    }
2309 
2310 	  /* If that fails, try using permuting loads.  */
2311 	  if (*memory_access_type == VMAT_ELEMENTWISE
2312 	      && (vls_type == VLS_LOAD
2313 		  ? vect_grouped_load_supported (vectype, single_element_p,
2314 						 group_size)
2315 		  : vect_grouped_store_supported (vectype, group_size)))
2316 	    {
2317 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2318 	      overrun_p = would_overrun_p;
2319 	    }
2320 	}
2321 
2322       /* As a last resort, trying using a gather load or scatter store.
2323 
2324 	 ??? Although the code can handle all group sizes correctly,
2325 	 it probably isn't a win to use separate strided accesses based
2326 	 on nearby locations.  Or, even if it's a win over scalar code,
2327 	 it might not be a win over vectorizing at a lower VF, if that
2328 	 allows us to use contiguous accesses.  */
2329       if (*memory_access_type == VMAT_ELEMENTWISE
2330 	  && single_element_p
2331 	  && loop_vinfo
2332 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2333 						 masked_p, gs_info))
2334 	*memory_access_type = VMAT_GATHER_SCATTER;
2335     }
2336 
2337   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2338     {
2339       /* STMT is the leader of the group. Check the operands of all the
2340 	 stmts of the group.  */
2341       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2342       while (next_stmt_info)
2343 	{
2344 	  tree op = vect_get_store_rhs (next_stmt_info);
2345 	  enum vect_def_type dt;
2346 	  if (!vect_is_simple_use (op, vinfo, &dt))
2347 	    {
2348 	      if (dump_enabled_p ())
2349 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 				 "use not simple.\n");
2351 	      return false;
2352 	    }
2353 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2354 	}
2355     }
2356 
2357   if (overrun_p)
2358     {
2359       gcc_assert (can_overrun_p);
2360       if (dump_enabled_p ())
2361 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2362 			 "Data access with gaps requires scalar "
2363 			 "epilogue loop\n");
2364       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2365     }
2366 
2367   return true;
2368 }
2369 
2370 /* A subroutine of get_load_store_type, with a subset of the same
2371    arguments.  Handle the case where STMT_INFO is a load or store that
2372    accesses consecutive elements with a negative step.  */
2373 
2374 static vect_memory_access_type
get_negative_load_store_type(stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2375 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2376 			      vec_load_store_type vls_type,
2377 			      unsigned int ncopies)
2378 {
2379   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2380   dr_alignment_support alignment_support_scheme;
2381 
2382   if (ncopies > 1)
2383     {
2384       if (dump_enabled_p ())
2385 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2386 			 "multiple types with negative step.\n");
2387       return VMAT_ELEMENTWISE;
2388     }
2389 
2390   alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2391   if (alignment_support_scheme != dr_aligned
2392       && alignment_support_scheme != dr_unaligned_supported)
2393     {
2394       if (dump_enabled_p ())
2395 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 			 "negative step but alignment required.\n");
2397       return VMAT_ELEMENTWISE;
2398     }
2399 
2400   if (vls_type == VLS_STORE_INVARIANT)
2401     {
2402       if (dump_enabled_p ())
2403 	dump_printf_loc (MSG_NOTE, vect_location,
2404 			 "negative step with invariant source;"
2405 			 " no permute needed.\n");
2406       return VMAT_CONTIGUOUS_DOWN;
2407     }
2408 
2409   if (!perm_mask_for_reverse (vectype))
2410     {
2411       if (dump_enabled_p ())
2412 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2413 			 "negative step and reversing not supported.\n");
2414       return VMAT_ELEMENTWISE;
2415     }
2416 
2417   return VMAT_CONTIGUOUS_REVERSE;
2418 }
2419 
2420 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2421    if there is a memory access type that the vectorized form can use,
2422    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2423    or scatters, fill in GS_INFO accordingly.
2424 
2425    SLP says whether we're performing SLP rather than loop vectorization.
2426    MASKED_P is true if the statement is conditional on a vectorized mask.
2427    VECTYPE is the vector type that the vectorized statements will use.
2428    NCOPIES is the number of vector statements that will be needed.  */
2429 
2430 static bool
get_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2431 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2432 		     bool masked_p, vec_load_store_type vls_type,
2433 		     unsigned int ncopies,
2434 		     vect_memory_access_type *memory_access_type,
2435 		     gather_scatter_info *gs_info)
2436 {
2437   vec_info *vinfo = stmt_info->vinfo;
2438   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2439   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2440   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2441     {
2442       *memory_access_type = VMAT_GATHER_SCATTER;
2443       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2444 	gcc_unreachable ();
2445       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2446 				    &gs_info->offset_dt,
2447 				    &gs_info->offset_vectype))
2448 	{
2449 	  if (dump_enabled_p ())
2450 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451 			     "%s index use not simple.\n",
2452 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2453 	  return false;
2454 	}
2455     }
2456   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2457     {
2458       if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2459 				      vls_type, memory_access_type, gs_info))
2460 	return false;
2461     }
2462   else if (STMT_VINFO_STRIDED_P (stmt_info))
2463     {
2464       gcc_assert (!slp);
2465       if (loop_vinfo
2466 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2467 						 masked_p, gs_info))
2468 	*memory_access_type = VMAT_GATHER_SCATTER;
2469       else
2470 	*memory_access_type = VMAT_ELEMENTWISE;
2471     }
2472   else
2473     {
2474       int cmp = compare_step_with_zero (stmt_info);
2475       if (cmp < 0)
2476 	*memory_access_type = get_negative_load_store_type
2477 	  (stmt_info, vectype, vls_type, ncopies);
2478       else if (cmp == 0)
2479 	{
2480 	  gcc_assert (vls_type == VLS_LOAD);
2481 	  *memory_access_type = VMAT_INVARIANT;
2482 	}
2483       else
2484 	*memory_access_type = VMAT_CONTIGUOUS;
2485     }
2486 
2487   if ((*memory_access_type == VMAT_ELEMENTWISE
2488        || *memory_access_type == VMAT_STRIDED_SLP)
2489       && !nunits.is_constant ())
2490     {
2491       if (dump_enabled_p ())
2492 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 			 "Not using elementwise accesses due to variable "
2494 			 "vectorization factor.\n");
2495       return false;
2496     }
2497 
2498   /* FIXME: At the moment the cost model seems to underestimate the
2499      cost of using elementwise accesses.  This check preserves the
2500      traditional behavior until that can be fixed.  */
2501   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2502   if (!first_stmt_info)
2503     first_stmt_info = stmt_info;
2504   if (*memory_access_type == VMAT_ELEMENTWISE
2505       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2506       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2507 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2508 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2509     {
2510       if (dump_enabled_p ())
2511 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 			 "not falling back to elementwise accesses\n");
2513       return false;
2514     }
2515   return true;
2516 }
2517 
2518 /* Return true if boolean argument MASK is suitable for vectorizing
2519    conditional load or store STMT_INFO.  When returning true, store the type
2520    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2521    in *MASK_VECTYPE_OUT.  */
2522 
2523 static bool
vect_check_load_store_mask(stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2524 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2525 			    vect_def_type *mask_dt_out,
2526 			    tree *mask_vectype_out)
2527 {
2528   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2529     {
2530       if (dump_enabled_p ())
2531 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 			 "mask argument is not a boolean.\n");
2533       return false;
2534     }
2535 
2536   if (TREE_CODE (mask) != SSA_NAME)
2537     {
2538       if (dump_enabled_p ())
2539 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 			 "mask argument is not an SSA name.\n");
2541       return false;
2542     }
2543 
2544   enum vect_def_type mask_dt;
2545   tree mask_vectype;
2546   if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2547     {
2548       if (dump_enabled_p ())
2549 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 			 "mask use not simple.\n");
2551       return false;
2552     }
2553 
2554   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2555   if (!mask_vectype)
2556     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2557 
2558   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2559     {
2560       if (dump_enabled_p ())
2561 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 			 "could not find an appropriate vector mask type.\n");
2563       return false;
2564     }
2565 
2566   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2567 		TYPE_VECTOR_SUBPARTS (vectype)))
2568     {
2569       if (dump_enabled_p ())
2570 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 			 "vector mask type %T",
2572 			 " does not match vector data type %T.\n",
2573 			 mask_vectype, vectype);
2574 
2575       return false;
2576     }
2577 
2578   *mask_dt_out = mask_dt;
2579   *mask_vectype_out = mask_vectype;
2580   return true;
2581 }
2582 
2583 /* Return true if stored value RHS is suitable for vectorizing store
2584    statement STMT_INFO.  When returning true, store the type of the
2585    definition in *RHS_DT_OUT, the type of the vectorized store value in
2586    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2587 
2588 static bool
vect_check_store_rhs(stmt_vec_info stmt_info,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2589 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2590 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2591 		      vec_load_store_type *vls_type_out)
2592 {
2593   /* In the case this is a store from a constant make sure
2594      native_encode_expr can handle it.  */
2595   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2596     {
2597       if (dump_enabled_p ())
2598 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2599 			 "cannot encode constant as a byte sequence.\n");
2600       return false;
2601     }
2602 
2603   enum vect_def_type rhs_dt;
2604   tree rhs_vectype;
2605   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2606     {
2607       if (dump_enabled_p ())
2608 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2609 			 "use not simple.\n");
2610       return false;
2611     }
2612 
2613   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2614   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2615     {
2616       if (dump_enabled_p ())
2617 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 			 "incompatible vector types.\n");
2619       return false;
2620     }
2621 
2622   *rhs_dt_out = rhs_dt;
2623   *rhs_vectype_out = rhs_vectype;
2624   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2625     *vls_type_out = VLS_STORE_INVARIANT;
2626   else
2627     *vls_type_out = VLS_STORE;
2628   return true;
2629 }
2630 
2631 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2632    Note that we support masks with floating-point type, in which case the
2633    floats are interpreted as a bitmask.  */
2634 
2635 static tree
vect_build_all_ones_mask(stmt_vec_info stmt_info,tree masktype)2636 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2637 {
2638   if (TREE_CODE (masktype) == INTEGER_TYPE)
2639     return build_int_cst (masktype, -1);
2640   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2641     {
2642       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2643       mask = build_vector_from_val (masktype, mask);
2644       return vect_init_vector (stmt_info, mask, masktype, NULL);
2645     }
2646   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2647     {
2648       REAL_VALUE_TYPE r;
2649       long tmp[6];
2650       for (int j = 0; j < 6; ++j)
2651 	tmp[j] = -1;
2652       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2653       tree mask = build_real (TREE_TYPE (masktype), r);
2654       mask = build_vector_from_val (masktype, mask);
2655       return vect_init_vector (stmt_info, mask, masktype, NULL);
2656     }
2657   gcc_unreachable ();
2658 }
2659 
2660 /* Build an all-zero merge value of type VECTYPE while vectorizing
2661    STMT_INFO as a gather load.  */
2662 
2663 static tree
vect_build_zero_merge_argument(stmt_vec_info stmt_info,tree vectype)2664 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2665 {
2666   tree merge;
2667   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2668     merge = build_int_cst (TREE_TYPE (vectype), 0);
2669   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2670     {
2671       REAL_VALUE_TYPE r;
2672       long tmp[6];
2673       for (int j = 0; j < 6; ++j)
2674 	tmp[j] = 0;
2675       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2676       merge = build_real (TREE_TYPE (vectype), r);
2677     }
2678   else
2679     gcc_unreachable ();
2680   merge = build_vector_from_val (vectype, merge);
2681   return vect_init_vector (stmt_info, merge, vectype, NULL);
2682 }
2683 
2684 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2685    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2686    the gather load operation.  If the load is conditional, MASK is the
2687    unvectorized condition and MASK_DT is its definition type, otherwise
2688    MASK is null.  */
2689 
2690 static void
vect_build_gather_load_calls(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,gather_scatter_info * gs_info,tree mask)2691 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2692 			      gimple_stmt_iterator *gsi,
2693 			      stmt_vec_info *vec_stmt,
2694 			      gather_scatter_info *gs_info,
2695 			      tree mask)
2696 {
2697   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2698   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2699   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2701   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2702   edge pe = loop_preheader_edge (loop);
2703   enum { NARROW, NONE, WIDEN } modifier;
2704   poly_uint64 gather_off_nunits
2705     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2706 
2707   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2708   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2709   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2710   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2711   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2712   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2713   tree scaletype = TREE_VALUE (arglist);
2714   tree real_masktype = masktype;
2715   gcc_checking_assert (types_compatible_p (srctype, rettype)
2716 		       && (!mask
2717 			   || TREE_CODE (masktype) == INTEGER_TYPE
2718 			   || types_compatible_p (srctype, masktype)));
2719   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2720     masktype = build_same_sized_truth_vector_type (srctype);
2721 
2722   tree mask_halftype = masktype;
2723   tree perm_mask = NULL_TREE;
2724   tree mask_perm_mask = NULL_TREE;
2725   if (known_eq (nunits, gather_off_nunits))
2726     modifier = NONE;
2727   else if (known_eq (nunits * 2, gather_off_nunits))
2728     {
2729       modifier = WIDEN;
2730 
2731       /* Currently widening gathers and scatters are only supported for
2732 	 fixed-length vectors.  */
2733       int count = gather_off_nunits.to_constant ();
2734       vec_perm_builder sel (count, count, 1);
2735       for (int i = 0; i < count; ++i)
2736 	sel.quick_push (i | (count / 2));
2737 
2738       vec_perm_indices indices (sel, 1, count);
2739       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2740 					      indices);
2741     }
2742   else if (known_eq (nunits, gather_off_nunits * 2))
2743     {
2744       modifier = NARROW;
2745 
2746       /* Currently narrowing gathers and scatters are only supported for
2747 	 fixed-length vectors.  */
2748       int count = nunits.to_constant ();
2749       vec_perm_builder sel (count, count, 1);
2750       sel.quick_grow (count);
2751       for (int i = 0; i < count; ++i)
2752 	sel[i] = i < count / 2 ? i : i + count / 2;
2753       vec_perm_indices indices (sel, 2, count);
2754       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2755 
2756       ncopies *= 2;
2757 
2758       if (mask && masktype == real_masktype)
2759 	{
2760 	  for (int i = 0; i < count; ++i)
2761 	    sel[i] = i | (count / 2);
2762 	  indices.new_vector (sel, 2, count);
2763 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2764 	}
2765       else if (mask)
2766 	mask_halftype
2767 	  = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2768     }
2769   else
2770     gcc_unreachable ();
2771 
2772   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2773   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2774 
2775   tree ptr = fold_convert (ptrtype, gs_info->base);
2776   if (!is_gimple_min_invariant (ptr))
2777     {
2778       gimple_seq seq;
2779       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2780       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2781       gcc_assert (!new_bb);
2782     }
2783 
2784   tree scale = build_int_cst (scaletype, gs_info->scale);
2785 
2786   tree vec_oprnd0 = NULL_TREE;
2787   tree vec_mask = NULL_TREE;
2788   tree src_op = NULL_TREE;
2789   tree mask_op = NULL_TREE;
2790   tree prev_res = NULL_TREE;
2791   stmt_vec_info prev_stmt_info = NULL;
2792 
2793   if (!mask)
2794     {
2795       src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2796       mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2797     }
2798 
2799   for (int j = 0; j < ncopies; ++j)
2800     {
2801       tree op, var;
2802       if (modifier == WIDEN && (j & 1))
2803 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2804 				   perm_mask, stmt_info, gsi);
2805       else if (j == 0)
2806 	op = vec_oprnd0
2807 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2808       else
2809 	op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2810 							  vec_oprnd0);
2811 
2812       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2813 	{
2814 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2815 				TYPE_VECTOR_SUBPARTS (idxtype)));
2816 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2817 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2818 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2819 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2820 	  op = var;
2821 	}
2822 
2823       if (mask)
2824 	{
2825 	  if (mask_perm_mask && (j & 1))
2826 	    mask_op = permute_vec_elements (mask_op, mask_op,
2827 					    mask_perm_mask, stmt_info, gsi);
2828 	  else
2829 	    {
2830 	      if (j == 0)
2831 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2832 	      else if (modifier != NARROW || (j & 1) == 0)
2833 		vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2834 							   vec_mask);
2835 
2836 	      mask_op = vec_mask;
2837 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2838 		{
2839 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2840 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2841 		  gcc_assert (known_eq (sub1, sub2));
2842 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2843 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2844 		  gassign *new_stmt
2845 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2846 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2847 		  mask_op = var;
2848 		}
2849 	    }
2850 	  if (modifier == NARROW && masktype != real_masktype)
2851 	    {
2852 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2853 	      gassign *new_stmt
2854 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2855 						    : VEC_UNPACK_LO_EXPR,
2856 				       mask_op);
2857 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2858 	      mask_op = var;
2859 	    }
2860 	  src_op = mask_op;
2861 	}
2862 
2863       tree mask_arg = mask_op;
2864       if (masktype != real_masktype)
2865 	{
2866 	  tree utype, optype = TREE_TYPE (mask_op);
2867 	  if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2868 	    utype = real_masktype;
2869 	  else
2870 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2871 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2872 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2873 	  gassign *new_stmt
2874 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2875 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2876 	  mask_arg = var;
2877 	  if (!useless_type_conversion_p (real_masktype, utype))
2878 	    {
2879 	      gcc_assert (TYPE_PRECISION (utype)
2880 			  <= TYPE_PRECISION (real_masktype));
2881 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2882 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2883 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2884 	      mask_arg = var;
2885 	    }
2886 	  src_op = build_zero_cst (srctype);
2887 	}
2888       gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2889 					   mask_arg, scale);
2890 
2891       stmt_vec_info new_stmt_info;
2892       if (!useless_type_conversion_p (vectype, rettype))
2893 	{
2894 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2895 				TYPE_VECTOR_SUBPARTS (rettype)));
2896 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2897 	  gimple_call_set_lhs (new_call, op);
2898 	  vect_finish_stmt_generation (stmt_info, new_call, gsi);
2899 	  var = make_ssa_name (vec_dest);
2900 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2901 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2902 	  new_stmt_info
2903 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2904 	}
2905       else
2906 	{
2907 	  var = make_ssa_name (vec_dest, new_call);
2908 	  gimple_call_set_lhs (new_call, var);
2909 	  new_stmt_info
2910 	    = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2911 	}
2912 
2913       if (modifier == NARROW)
2914 	{
2915 	  if ((j & 1) == 0)
2916 	    {
2917 	      prev_res = var;
2918 	      continue;
2919 	    }
2920 	  var = permute_vec_elements (prev_res, var, perm_mask,
2921 				      stmt_info, gsi);
2922 	  new_stmt_info = loop_vinfo->lookup_def (var);
2923 	}
2924 
2925       if (prev_stmt_info == NULL)
2926 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2927       else
2928 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2929       prev_stmt_info = new_stmt_info;
2930     }
2931 }
2932 
2933 /* Prepare the base and offset in GS_INFO for vectorization.
2934    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2935    to the vectorized offset argument for the first copy of STMT_INFO.
2936    STMT_INFO is the statement described by GS_INFO and LOOP is the
2937    containing loop.  */
2938 
2939 static void
vect_get_gather_scatter_ops(struct loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)2940 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2941 			     gather_scatter_info *gs_info,
2942 			     tree *dataref_ptr, tree *vec_offset)
2943 {
2944   gimple_seq stmts = NULL;
2945   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2946   if (stmts != NULL)
2947     {
2948       basic_block new_bb;
2949       edge pe = loop_preheader_edge (loop);
2950       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2951       gcc_assert (!new_bb);
2952     }
2953   tree offset_type = TREE_TYPE (gs_info->offset);
2954   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2955   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2956 					      offset_vectype);
2957 }
2958 
2959 /* Prepare to implement a grouped or strided load or store using
2960    the gather load or scatter store operation described by GS_INFO.
2961    STMT_INFO is the load or store statement.
2962 
2963    Set *DATAREF_BUMP to the amount that should be added to the base
2964    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2965    to an invariant offset vector in which element I has the value
2966    I * DR_STEP / SCALE.  */
2967 
2968 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2969 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2970 				 loop_vec_info loop_vinfo,
2971 				 gather_scatter_info *gs_info,
2972 				 tree *dataref_bump, tree *vec_offset)
2973 {
2974   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2975   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2976   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2977   gimple_seq stmts;
2978 
2979   tree bump = size_binop (MULT_EXPR,
2980 			  fold_convert (sizetype, DR_STEP (dr)),
2981 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2982   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2983   if (stmts)
2984     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2985 
2986   /* The offset given in GS_INFO can have pointer type, so use the element
2987      type of the vector instead.  */
2988   tree offset_type = TREE_TYPE (gs_info->offset);
2989   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2990   offset_type = TREE_TYPE (offset_vectype);
2991 
2992   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2993   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2994 			  ssize_int (gs_info->scale));
2995   step = fold_convert (offset_type, step);
2996   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2997 
2998   /* Create {0, X, X*2, X*3, ...}.  */
2999   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3000 			      build_zero_cst (offset_type), step);
3001   if (stmts)
3002     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3003 }
3004 
3005 /* Return the amount that should be added to a vector pointer to move
3006    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3007    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3008    vectorization.  */
3009 
3010 static tree
vect_get_data_ptr_increment(dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3011 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3012 			     vect_memory_access_type memory_access_type)
3013 {
3014   if (memory_access_type == VMAT_INVARIANT)
3015     return size_zero_node;
3016 
3017   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3018   tree step = vect_dr_behavior (dr_info)->step;
3019   if (tree_int_cst_sgn (step) == -1)
3020     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3021   return iv_step;
3022 }
3023 
3024 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
3025 
3026 static bool
vectorizable_bswap(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,tree vectype_in,stmt_vector_for_cost * cost_vec)3027 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3028 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
3029 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3030 {
3031   tree op, vectype;
3032   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3033   vec_info *vinfo = stmt_info->vinfo;
3034   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3035   unsigned ncopies;
3036 
3037   op = gimple_call_arg (stmt, 0);
3038   vectype = STMT_VINFO_VECTYPE (stmt_info);
3039   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3040 
3041   /* Multiple types in SLP are handled by creating the appropriate number of
3042      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3043      case of SLP.  */
3044   if (slp_node)
3045     ncopies = 1;
3046   else
3047     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3048 
3049   gcc_assert (ncopies >= 1);
3050 
3051   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3052   if (! char_vectype)
3053     return false;
3054 
3055   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3056   unsigned word_bytes;
3057   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3058     return false;
3059 
3060   /* The encoding uses one stepped pattern for each byte in the word.  */
3061   vec_perm_builder elts (num_bytes, word_bytes, 3);
3062   for (unsigned i = 0; i < 3; ++i)
3063     for (unsigned j = 0; j < word_bytes; ++j)
3064       elts.quick_push ((i + 1) * word_bytes - j - 1);
3065 
3066   vec_perm_indices indices (elts, 1, num_bytes);
3067   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3068     return false;
3069 
3070   if (! vec_stmt)
3071     {
3072       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3073       DUMP_VECT_SCOPE ("vectorizable_bswap");
3074       if (! slp_node)
3075 	{
3076 	  record_stmt_cost (cost_vec,
3077 			    1, vector_stmt, stmt_info, 0, vect_prologue);
3078 	  record_stmt_cost (cost_vec,
3079 			    ncopies, vec_perm, stmt_info, 0, vect_body);
3080 	}
3081       return true;
3082     }
3083 
3084   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3085 
3086   /* Transform.  */
3087   vec<tree> vec_oprnds = vNULL;
3088   stmt_vec_info new_stmt_info = NULL;
3089   stmt_vec_info prev_stmt_info = NULL;
3090   for (unsigned j = 0; j < ncopies; j++)
3091     {
3092       /* Handle uses.  */
3093       if (j == 0)
3094 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3095       else
3096 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3097 
3098       /* Arguments are ready. create the new vector stmt.  */
3099       unsigned i;
3100       tree vop;
3101       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3102        {
3103 	 gimple *new_stmt;
3104 	 tree tem = make_ssa_name (char_vectype);
3105 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3106 						      char_vectype, vop));
3107 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3108 	 tree tem2 = make_ssa_name (char_vectype);
3109 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3110 					 tem, tem, bswap_vconst);
3111 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3112 	 tem = make_ssa_name (vectype);
3113 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3114 						      vectype, tem2));
3115 	 new_stmt_info
3116 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3117          if (slp_node)
3118 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3119        }
3120 
3121       if (slp_node)
3122         continue;
3123 
3124       if (j == 0)
3125 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3126       else
3127 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3128 
3129       prev_stmt_info = new_stmt_info;
3130     }
3131 
3132   vec_oprnds.release ();
3133   return true;
3134 }
3135 
3136 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3137    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3138    in a single step.  On success, store the binary pack code in
3139    *CONVERT_CODE.  */
3140 
3141 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3142 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3143 			  tree_code *convert_code)
3144 {
3145   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3146       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3147     return false;
3148 
3149   tree_code code;
3150   int multi_step_cvt = 0;
3151   auto_vec <tree, 8> interm_types;
3152   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3153 					&code, &multi_step_cvt,
3154 					&interm_types)
3155       || multi_step_cvt)
3156     return false;
3157 
3158   *convert_code = code;
3159   return true;
3160 }
3161 
3162 /* Function vectorizable_call.
3163 
3164    Check if STMT_INFO performs a function call that can be vectorized.
3165    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3166    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3167    Return true if STMT_INFO is vectorizable in this way.  */
3168 
3169 static bool
vectorizable_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3170 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3171 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
3172 		   stmt_vector_for_cost *cost_vec)
3173 {
3174   gcall *stmt;
3175   tree vec_dest;
3176   tree scalar_dest;
3177   tree op;
3178   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3179   stmt_vec_info prev_stmt_info;
3180   tree vectype_out, vectype_in;
3181   poly_uint64 nunits_in;
3182   poly_uint64 nunits_out;
3183   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3184   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3185   vec_info *vinfo = stmt_info->vinfo;
3186   tree fndecl, new_temp, rhs_type;
3187   enum vect_def_type dt[4]
3188     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3189 	vect_unknown_def_type };
3190   tree vectypes[ARRAY_SIZE (dt)] = {};
3191   int ndts = ARRAY_SIZE (dt);
3192   int ncopies, j;
3193   auto_vec<tree, 8> vargs;
3194   auto_vec<tree, 8> orig_vargs;
3195   enum { NARROW, NONE, WIDEN } modifier;
3196   size_t i, nargs;
3197   tree lhs;
3198 
3199   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3200     return false;
3201 
3202   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3203       && ! vec_stmt)
3204     return false;
3205 
3206   /* Is STMT_INFO a vectorizable call?   */
3207   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3208   if (!stmt)
3209     return false;
3210 
3211   if (gimple_call_internal_p (stmt)
3212       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3213 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3214     /* Handled by vectorizable_load and vectorizable_store.  */
3215     return false;
3216 
3217   if (gimple_call_lhs (stmt) == NULL_TREE
3218       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3219     return false;
3220 
3221   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3222 
3223   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3224 
3225   /* Process function arguments.  */
3226   rhs_type = NULL_TREE;
3227   vectype_in = NULL_TREE;
3228   nargs = gimple_call_num_args (stmt);
3229 
3230   /* Bail out if the function has more than three arguments, we do not have
3231      interesting builtin functions to vectorize with more than two arguments
3232      except for fma.  No arguments is also not good.  */
3233   if (nargs == 0 || nargs > 4)
3234     return false;
3235 
3236   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3237   combined_fn cfn = gimple_call_combined_fn (stmt);
3238   if (cfn == CFN_GOMP_SIMD_LANE)
3239     {
3240       nargs = 0;
3241       rhs_type = unsigned_type_node;
3242     }
3243 
3244   int mask_opno = -1;
3245   if (internal_fn_p (cfn))
3246     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3247 
3248   for (i = 0; i < nargs; i++)
3249     {
3250       op = gimple_call_arg (stmt, i);
3251       if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3252 	{
3253 	  if (dump_enabled_p ())
3254 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 			     "use not simple.\n");
3256 	  return false;
3257 	}
3258 
3259       /* Skip the mask argument to an internal function.  This operand
3260 	 has been converted via a pattern if necessary.  */
3261       if ((int) i == mask_opno)
3262 	continue;
3263 
3264       /* We can only handle calls with arguments of the same type.  */
3265       if (rhs_type
3266 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3267 	{
3268 	  if (dump_enabled_p ())
3269 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270                              "argument types differ.\n");
3271 	  return false;
3272 	}
3273       if (!rhs_type)
3274 	rhs_type = TREE_TYPE (op);
3275 
3276       if (!vectype_in)
3277 	vectype_in = vectypes[i];
3278       else if (vectypes[i]
3279 	       && vectypes[i] != vectype_in)
3280 	{
3281 	  if (dump_enabled_p ())
3282 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3283                              "argument vector types differ.\n");
3284 	  return false;
3285 	}
3286     }
3287   /* If all arguments are external or constant defs use a vector type with
3288      the same size as the output vector type.  */
3289   if (!vectype_in)
3290     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3291   if (vec_stmt)
3292     gcc_assert (vectype_in);
3293   if (!vectype_in)
3294     {
3295       if (dump_enabled_p ())
3296 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297 			 "no vectype for scalar type %T\n", rhs_type);
3298 
3299       return false;
3300     }
3301 
3302   /* FORNOW */
3303   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3304   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3305   if (known_eq (nunits_in * 2, nunits_out))
3306     modifier = NARROW;
3307   else if (known_eq (nunits_out, nunits_in))
3308     modifier = NONE;
3309   else if (known_eq (nunits_out * 2, nunits_in))
3310     modifier = WIDEN;
3311   else
3312     return false;
3313 
3314   /* We only handle functions that do not read or clobber memory.  */
3315   if (gimple_vuse (stmt))
3316     {
3317       if (dump_enabled_p ())
3318 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 			 "function reads from or writes to memory.\n");
3320       return false;
3321     }
3322 
3323   /* For now, we only vectorize functions if a target specific builtin
3324      is available.  TODO -- in some cases, it might be profitable to
3325      insert the calls for pieces of the vector, in order to be able
3326      to vectorize other operations in the loop.  */
3327   fndecl = NULL_TREE;
3328   internal_fn ifn = IFN_LAST;
3329   tree callee = gimple_call_fndecl (stmt);
3330 
3331   /* First try using an internal function.  */
3332   tree_code convert_code = ERROR_MARK;
3333   if (cfn != CFN_LAST
3334       && (modifier == NONE
3335 	  || (modifier == NARROW
3336 	      && simple_integer_narrowing (vectype_out, vectype_in,
3337 					   &convert_code))))
3338     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3339 					  vectype_in);
3340 
3341   /* If that fails, try asking for a target-specific built-in function.  */
3342   if (ifn == IFN_LAST)
3343     {
3344       if (cfn != CFN_LAST)
3345 	fndecl = targetm.vectorize.builtin_vectorized_function
3346 	  (cfn, vectype_out, vectype_in);
3347       else if (callee)
3348 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3349 	  (callee, vectype_out, vectype_in);
3350     }
3351 
3352   if (ifn == IFN_LAST && !fndecl)
3353     {
3354       if (cfn == CFN_GOMP_SIMD_LANE
3355 	  && !slp_node
3356 	  && loop_vinfo
3357 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3358 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3359 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3360 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3361 	{
3362 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3363 	     { 0, 1, 2, ... vf - 1 } vector.  */
3364 	  gcc_assert (nargs == 0);
3365 	}
3366       else if (modifier == NONE
3367 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3368 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3369 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3370 	return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3371 				   vectype_in, cost_vec);
3372       else
3373 	{
3374 	  if (dump_enabled_p ())
3375 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3376 			     "function is not vectorizable.\n");
3377 	  return false;
3378 	}
3379     }
3380 
3381   if (slp_node)
3382     ncopies = 1;
3383   else if (modifier == NARROW && ifn == IFN_LAST)
3384     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3385   else
3386     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3387 
3388   /* Sanity check: make sure that at least one copy of the vectorized stmt
3389      needs to be generated.  */
3390   gcc_assert (ncopies >= 1);
3391 
3392   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3393   if (!vec_stmt) /* transformation not required.  */
3394     {
3395       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3396       DUMP_VECT_SCOPE ("vectorizable_call");
3397       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3398       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3399 	record_stmt_cost (cost_vec, ncopies / 2,
3400 			  vec_promote_demote, stmt_info, 0, vect_body);
3401 
3402       if (loop_vinfo && mask_opno >= 0)
3403 	{
3404 	  unsigned int nvectors = (slp_node
3405 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3406 				   : ncopies);
3407 	  vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3408 	}
3409       return true;
3410     }
3411 
3412   /* Transform.  */
3413 
3414   if (dump_enabled_p ())
3415     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3416 
3417   /* Handle def.  */
3418   scalar_dest = gimple_call_lhs (stmt);
3419   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3420 
3421   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3422 
3423   stmt_vec_info new_stmt_info = NULL;
3424   prev_stmt_info = NULL;
3425   if (modifier == NONE || ifn != IFN_LAST)
3426     {
3427       tree prev_res = NULL_TREE;
3428       vargs.safe_grow (nargs);
3429       orig_vargs.safe_grow (nargs);
3430       for (j = 0; j < ncopies; ++j)
3431 	{
3432 	  /* Build argument list for the vectorized call.  */
3433 	  if (slp_node)
3434 	    {
3435 	      auto_vec<vec<tree> > vec_defs (nargs);
3436 	      vec<tree> vec_oprnds0;
3437 
3438 	      for (i = 0; i < nargs; i++)
3439 		vargs[i] = gimple_call_arg (stmt, i);
3440 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3441 	      vec_oprnds0 = vec_defs[0];
3442 
3443 	      /* Arguments are ready.  Create the new vector stmt.  */
3444 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3445 		{
3446 		  size_t k;
3447 		  for (k = 0; k < nargs; k++)
3448 		    {
3449 		      vec<tree> vec_oprndsk = vec_defs[k];
3450 		      vargs[k] = vec_oprndsk[i];
3451 		    }
3452 		  if (modifier == NARROW)
3453 		    {
3454 		      /* We don't define any narrowing conditional functions
3455 			 at present.  */
3456 		      gcc_assert (mask_opno < 0);
3457 		      tree half_res = make_ssa_name (vectype_in);
3458 		      gcall *call
3459 			= gimple_build_call_internal_vec (ifn, vargs);
3460 		      gimple_call_set_lhs (call, half_res);
3461 		      gimple_call_set_nothrow (call, true);
3462 		      new_stmt_info
3463 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3464 		      if ((i & 1) == 0)
3465 			{
3466 			  prev_res = half_res;
3467 			  continue;
3468 			}
3469 		      new_temp = make_ssa_name (vec_dest);
3470 		      gimple *new_stmt
3471 			= gimple_build_assign (new_temp, convert_code,
3472 					       prev_res, half_res);
3473 		      new_stmt_info
3474 			= vect_finish_stmt_generation (stmt_info, new_stmt,
3475 						       gsi);
3476 		    }
3477 		  else
3478 		    {
3479 		      if (mask_opno >= 0 && masked_loop_p)
3480 			{
3481 			  unsigned int vec_num = vec_oprnds0.length ();
3482 			  /* Always true for SLP.  */
3483 			  gcc_assert (ncopies == 1);
3484 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3485 							  vectype_out, i);
3486 			  vargs[mask_opno] = prepare_load_store_mask
3487 			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3488 			}
3489 
3490 		      gcall *call;
3491 		      if (ifn != IFN_LAST)
3492 			call = gimple_build_call_internal_vec (ifn, vargs);
3493 		      else
3494 			call = gimple_build_call_vec (fndecl, vargs);
3495 		      new_temp = make_ssa_name (vec_dest, call);
3496 		      gimple_call_set_lhs (call, new_temp);
3497 		      gimple_call_set_nothrow (call, true);
3498 		      new_stmt_info
3499 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3500 		    }
3501 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3502 		}
3503 
3504 	      for (i = 0; i < nargs; i++)
3505 		{
3506 		  vec<tree> vec_oprndsi = vec_defs[i];
3507 		  vec_oprndsi.release ();
3508 		}
3509 	      continue;
3510 	    }
3511 
3512 	  if (mask_opno >= 0 && !vectypes[mask_opno])
3513 	    {
3514 	      gcc_assert (modifier != WIDEN);
3515 	      vectypes[mask_opno]
3516 		= build_same_sized_truth_vector_type (vectype_in);
3517 	    }
3518 
3519 	  for (i = 0; i < nargs; i++)
3520 	    {
3521 	      op = gimple_call_arg (stmt, i);
3522 	      if (j == 0)
3523 		vec_oprnd0
3524 		  = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3525 	      else
3526 		vec_oprnd0
3527 		  = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3528 
3529 	      orig_vargs[i] = vargs[i] = vec_oprnd0;
3530 	    }
3531 
3532 	  if (mask_opno >= 0 && masked_loop_p)
3533 	    {
3534 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3535 					      vectype_out, j);
3536 	      vargs[mask_opno]
3537 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
3538 					   vargs[mask_opno], gsi);
3539 	    }
3540 
3541 	  if (cfn == CFN_GOMP_SIMD_LANE)
3542 	    {
3543 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3544 	      tree new_var
3545 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3546 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3547 	      vect_init_vector_1 (stmt_info, init_stmt, NULL);
3548 	      new_temp = make_ssa_name (vec_dest);
3549 	      gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3550 	      new_stmt_info
3551 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3552 	    }
3553 	  else if (modifier == NARROW)
3554 	    {
3555 	      /* We don't define any narrowing conditional functions at
3556 		 present.  */
3557 	      gcc_assert (mask_opno < 0);
3558 	      tree half_res = make_ssa_name (vectype_in);
3559 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3560 	      gimple_call_set_lhs (call, half_res);
3561 	      gimple_call_set_nothrow (call, true);
3562 	      new_stmt_info
3563 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3564 	      if ((j & 1) == 0)
3565 		{
3566 		  prev_res = half_res;
3567 		  continue;
3568 		}
3569 	      new_temp = make_ssa_name (vec_dest);
3570 	      gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3571 						       prev_res, half_res);
3572 	      new_stmt_info
3573 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3574 	    }
3575 	  else
3576 	    {
3577 	      gcall *call;
3578 	      if (ifn != IFN_LAST)
3579 		call = gimple_build_call_internal_vec (ifn, vargs);
3580 	      else
3581 		call = gimple_build_call_vec (fndecl, vargs);
3582 	      new_temp = make_ssa_name (vec_dest, call);
3583 	      gimple_call_set_lhs (call, new_temp);
3584 	      gimple_call_set_nothrow (call, true);
3585 	      new_stmt_info
3586 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3587 	    }
3588 
3589 	  if (j == (modifier == NARROW ? 1 : 0))
3590 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3591 	  else
3592 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3593 
3594 	  prev_stmt_info = new_stmt_info;
3595 	}
3596     }
3597   else if (modifier == NARROW)
3598     {
3599       /* We don't define any narrowing conditional functions at present.  */
3600       gcc_assert (mask_opno < 0);
3601       for (j = 0; j < ncopies; ++j)
3602 	{
3603 	  /* Build argument list for the vectorized call.  */
3604 	  if (j == 0)
3605 	    vargs.create (nargs * 2);
3606 	  else
3607 	    vargs.truncate (0);
3608 
3609 	  if (slp_node)
3610 	    {
3611 	      auto_vec<vec<tree> > vec_defs (nargs);
3612 	      vec<tree> vec_oprnds0;
3613 
3614 	      for (i = 0; i < nargs; i++)
3615 		vargs.quick_push (gimple_call_arg (stmt, i));
3616 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3617 	      vec_oprnds0 = vec_defs[0];
3618 
3619 	      /* Arguments are ready.  Create the new vector stmt.  */
3620 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3621 		{
3622 		  size_t k;
3623 		  vargs.truncate (0);
3624 		  for (k = 0; k < nargs; k++)
3625 		    {
3626 		      vec<tree> vec_oprndsk = vec_defs[k];
3627 		      vargs.quick_push (vec_oprndsk[i]);
3628 		      vargs.quick_push (vec_oprndsk[i + 1]);
3629 		    }
3630 		  gcall *call;
3631 		  if (ifn != IFN_LAST)
3632 		    call = gimple_build_call_internal_vec (ifn, vargs);
3633 		  else
3634 		    call = gimple_build_call_vec (fndecl, vargs);
3635 		  new_temp = make_ssa_name (vec_dest, call);
3636 		  gimple_call_set_lhs (call, new_temp);
3637 		  gimple_call_set_nothrow (call, true);
3638 		  new_stmt_info
3639 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
3640 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3641 		}
3642 
3643 	      for (i = 0; i < nargs; i++)
3644 		{
3645 		  vec<tree> vec_oprndsi = vec_defs[i];
3646 		  vec_oprndsi.release ();
3647 		}
3648 	      continue;
3649 	    }
3650 
3651 	  for (i = 0; i < nargs; i++)
3652 	    {
3653 	      op = gimple_call_arg (stmt, i);
3654 	      if (j == 0)
3655 		{
3656 		  vec_oprnd0
3657 		    = vect_get_vec_def_for_operand (op, stmt_info,
3658 						    vectypes[i]);
3659 		  vec_oprnd1
3660 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3661 		}
3662 	      else
3663 		{
3664 		  vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3665 						2 * i + 1);
3666 		  vec_oprnd0
3667 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3668 		  vec_oprnd1
3669 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3670 		}
3671 
3672 	      vargs.quick_push (vec_oprnd0);
3673 	      vargs.quick_push (vec_oprnd1);
3674 	    }
3675 
3676 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3677 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3678 	  gimple_call_set_lhs (new_stmt, new_temp);
3679 	  new_stmt_info
3680 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3681 
3682 	  if (j == 0)
3683 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3684 	  else
3685 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3686 
3687 	  prev_stmt_info = new_stmt_info;
3688 	}
3689 
3690       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3691     }
3692   else
3693     /* No current target implements this case.  */
3694     return false;
3695 
3696   vargs.release ();
3697 
3698   /* The call in STMT might prevent it from being removed in dce.
3699      We however cannot remove it here, due to the way the ssa name
3700      it defines is mapped to the new definition.  So just replace
3701      rhs of the statement with something harmless.  */
3702 
3703   if (slp_node)
3704     return true;
3705 
3706   stmt_info = vect_orig_stmt (stmt_info);
3707   lhs = gimple_get_lhs (stmt_info->stmt);
3708 
3709   gassign *new_stmt
3710     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3711   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3712 
3713   return true;
3714 }
3715 
3716 
3717 struct simd_call_arg_info
3718 {
3719   tree vectype;
3720   tree op;
3721   HOST_WIDE_INT linear_step;
3722   enum vect_def_type dt;
3723   unsigned int align;
3724   bool simd_lane_linear;
3725 };
3726 
3727 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3728    is linear within simd lane (but not within whole loop), note it in
3729    *ARGINFO.  */
3730 
3731 static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)3732 vect_simd_lane_linear (tree op, struct loop *loop,
3733 		       struct simd_call_arg_info *arginfo)
3734 {
3735   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3736 
3737   if (!is_gimple_assign (def_stmt)
3738       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3739       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3740     return;
3741 
3742   tree base = gimple_assign_rhs1 (def_stmt);
3743   HOST_WIDE_INT linear_step = 0;
3744   tree v = gimple_assign_rhs2 (def_stmt);
3745   while (TREE_CODE (v) == SSA_NAME)
3746     {
3747       tree t;
3748       def_stmt = SSA_NAME_DEF_STMT (v);
3749       if (is_gimple_assign (def_stmt))
3750 	switch (gimple_assign_rhs_code (def_stmt))
3751 	  {
3752 	  case PLUS_EXPR:
3753 	    t = gimple_assign_rhs2 (def_stmt);
3754 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3755 	      return;
3756 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3757 	    v = gimple_assign_rhs1 (def_stmt);
3758 	    continue;
3759 	  case MULT_EXPR:
3760 	    t = gimple_assign_rhs2 (def_stmt);
3761 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3762 	      return;
3763 	    linear_step = tree_to_shwi (t);
3764 	    v = gimple_assign_rhs1 (def_stmt);
3765 	    continue;
3766 	  CASE_CONVERT:
3767 	    t = gimple_assign_rhs1 (def_stmt);
3768 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3769 		|| (TYPE_PRECISION (TREE_TYPE (v))
3770 		    < TYPE_PRECISION (TREE_TYPE (t))))
3771 	      return;
3772 	    if (!linear_step)
3773 	      linear_step = 1;
3774 	    v = t;
3775 	    continue;
3776 	  default:
3777 	    return;
3778 	  }
3779       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3780 	       && loop->simduid
3781 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3782 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3783 		   == loop->simduid))
3784 	{
3785 	  if (!linear_step)
3786 	    linear_step = 1;
3787 	  arginfo->linear_step = linear_step;
3788 	  arginfo->op = base;
3789 	  arginfo->simd_lane_linear = true;
3790 	  return;
3791 	}
3792     }
3793 }
3794 
3795 /* Return the number of elements in vector type VECTYPE, which is associated
3796    with a SIMD clone.  At present these vectors always have a constant
3797    length.  */
3798 
3799 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3800 simd_clone_subparts (tree vectype)
3801 {
3802   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3803 }
3804 
3805 /* Function vectorizable_simd_clone_call.
3806 
3807    Check if STMT_INFO performs a function call that can be vectorized
3808    by calling a simd clone of the function.
3809    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3810    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3811    Return true if STMT_INFO is vectorizable in this way.  */
3812 
3813 static bool
vectorizable_simd_clone_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3814 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3815 			      gimple_stmt_iterator *gsi,
3816 			      stmt_vec_info *vec_stmt, slp_tree slp_node,
3817 			      stmt_vector_for_cost *)
3818 {
3819   tree vec_dest;
3820   tree scalar_dest;
3821   tree op, type;
3822   tree vec_oprnd0 = NULL_TREE;
3823   stmt_vec_info prev_stmt_info;
3824   tree vectype;
3825   unsigned int nunits;
3826   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3827   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3828   vec_info *vinfo = stmt_info->vinfo;
3829   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3830   tree fndecl, new_temp;
3831   int ncopies, j;
3832   auto_vec<simd_call_arg_info> arginfo;
3833   vec<tree> vargs = vNULL;
3834   size_t i, nargs;
3835   tree lhs, rtype, ratype;
3836   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3837 
3838   /* Is STMT a vectorizable call?   */
3839   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3840   if (!stmt)
3841     return false;
3842 
3843   fndecl = gimple_call_fndecl (stmt);
3844   if (fndecl == NULL_TREE)
3845     return false;
3846 
3847   struct cgraph_node *node = cgraph_node::get (fndecl);
3848   if (node == NULL || node->simd_clones == NULL)
3849     return false;
3850 
3851   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3852     return false;
3853 
3854   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3855       && ! vec_stmt)
3856     return false;
3857 
3858   if (gimple_call_lhs (stmt)
3859       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3860     return false;
3861 
3862   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3863 
3864   vectype = STMT_VINFO_VECTYPE (stmt_info);
3865 
3866   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3867     return false;
3868 
3869   /* FORNOW */
3870   if (slp_node)
3871     return false;
3872 
3873   /* Process function arguments.  */
3874   nargs = gimple_call_num_args (stmt);
3875 
3876   /* Bail out if the function has zero arguments.  */
3877   if (nargs == 0)
3878     return false;
3879 
3880   arginfo.reserve (nargs, true);
3881 
3882   for (i = 0; i < nargs; i++)
3883     {
3884       simd_call_arg_info thisarginfo;
3885       affine_iv iv;
3886 
3887       thisarginfo.linear_step = 0;
3888       thisarginfo.align = 0;
3889       thisarginfo.op = NULL_TREE;
3890       thisarginfo.simd_lane_linear = false;
3891 
3892       op = gimple_call_arg (stmt, i);
3893       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3894 			       &thisarginfo.vectype)
3895 	  || thisarginfo.dt == vect_uninitialized_def)
3896 	{
3897 	  if (dump_enabled_p ())
3898 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3899 			     "use not simple.\n");
3900 	  return false;
3901 	}
3902 
3903       if (thisarginfo.dt == vect_constant_def
3904 	  || thisarginfo.dt == vect_external_def)
3905 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3906       else
3907 	gcc_assert (thisarginfo.vectype != NULL_TREE);
3908 
3909       /* For linear arguments, the analyze phase should have saved
3910 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3911       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3912 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3913 	{
3914 	  gcc_assert (vec_stmt);
3915 	  thisarginfo.linear_step
3916 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3917 	  thisarginfo.op
3918 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3919 	  thisarginfo.simd_lane_linear
3920 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3921 	       == boolean_true_node);
3922 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3923 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3924 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3925 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3926 	    {
3927 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3928 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3929 	      tree opt = TREE_TYPE (thisarginfo.op);
3930 	      bias = fold_convert (TREE_TYPE (step), bias);
3931 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3932 	      thisarginfo.op
3933 		= fold_build2 (POINTER_TYPE_P (opt)
3934 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3935 			       thisarginfo.op, bias);
3936 	    }
3937 	}
3938       else if (!vec_stmt
3939 	       && thisarginfo.dt != vect_constant_def
3940 	       && thisarginfo.dt != vect_external_def
3941 	       && loop_vinfo
3942 	       && TREE_CODE (op) == SSA_NAME
3943 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3944 			     &iv, false)
3945 	       && tree_fits_shwi_p (iv.step))
3946 	{
3947 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3948 	  thisarginfo.op = iv.base;
3949 	}
3950       else if ((thisarginfo.dt == vect_constant_def
3951 		|| thisarginfo.dt == vect_external_def)
3952 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3953 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3954       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3955 	 linear too.  */
3956       if (POINTER_TYPE_P (TREE_TYPE (op))
3957 	  && !thisarginfo.linear_step
3958 	  && !vec_stmt
3959 	  && thisarginfo.dt != vect_constant_def
3960 	  && thisarginfo.dt != vect_external_def
3961 	  && loop_vinfo
3962 	  && !slp_node
3963 	  && TREE_CODE (op) == SSA_NAME)
3964 	vect_simd_lane_linear (op, loop, &thisarginfo);
3965 
3966       arginfo.quick_push (thisarginfo);
3967     }
3968 
3969   unsigned HOST_WIDE_INT vf;
3970   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3971     {
3972       if (dump_enabled_p ())
3973 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3974 			 "not considering SIMD clones; not yet supported"
3975 			 " for variable-width vectors.\n");
3976       return false;
3977     }
3978 
3979   unsigned int badness = 0;
3980   struct cgraph_node *bestn = NULL;
3981   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3982     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3983   else
3984     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3985 	 n = n->simdclone->next_clone)
3986       {
3987 	unsigned int this_badness = 0;
3988 	if (n->simdclone->simdlen > vf
3989 	    || n->simdclone->nargs != nargs)
3990 	  continue;
3991 	if (n->simdclone->simdlen < vf)
3992 	  this_badness += (exact_log2 (vf)
3993 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
3994 	if (n->simdclone->inbranch)
3995 	  this_badness += 2048;
3996 	int target_badness = targetm.simd_clone.usable (n);
3997 	if (target_badness < 0)
3998 	  continue;
3999 	this_badness += target_badness * 512;
4000 	/* FORNOW: Have to add code to add the mask argument.  */
4001 	if (n->simdclone->inbranch)
4002 	  continue;
4003 	for (i = 0; i < nargs; i++)
4004 	  {
4005 	    switch (n->simdclone->args[i].arg_type)
4006 	      {
4007 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4008 		if (!useless_type_conversion_p
4009 			(n->simdclone->args[i].orig_type,
4010 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4011 		  i = -1;
4012 		else if (arginfo[i].dt == vect_constant_def
4013 			 || arginfo[i].dt == vect_external_def
4014 			 || arginfo[i].linear_step)
4015 		  this_badness += 64;
4016 		break;
4017 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4018 		if (arginfo[i].dt != vect_constant_def
4019 		    && arginfo[i].dt != vect_external_def)
4020 		  i = -1;
4021 		break;
4022 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4023 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4024 		if (arginfo[i].dt == vect_constant_def
4025 		    || arginfo[i].dt == vect_external_def
4026 		    || (arginfo[i].linear_step
4027 			!= n->simdclone->args[i].linear_step))
4028 		  i = -1;
4029 		break;
4030 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4031 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4032 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4033 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4034 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4035 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4036 		/* FORNOW */
4037 		i = -1;
4038 		break;
4039 	      case SIMD_CLONE_ARG_TYPE_MASK:
4040 		gcc_unreachable ();
4041 	      }
4042 	    if (i == (size_t) -1)
4043 	      break;
4044 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4045 	      {
4046 		i = -1;
4047 		break;
4048 	      }
4049 	    if (arginfo[i].align)
4050 	      this_badness += (exact_log2 (arginfo[i].align)
4051 			       - exact_log2 (n->simdclone->args[i].alignment));
4052 	  }
4053 	if (i == (size_t) -1)
4054 	  continue;
4055 	if (bestn == NULL || this_badness < badness)
4056 	  {
4057 	    bestn = n;
4058 	    badness = this_badness;
4059 	  }
4060       }
4061 
4062   if (bestn == NULL)
4063     return false;
4064 
4065   for (i = 0; i < nargs; i++)
4066     if ((arginfo[i].dt == vect_constant_def
4067 	 || arginfo[i].dt == vect_external_def)
4068 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4069       {
4070 	arginfo[i].vectype
4071 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4072 								     i)));
4073 	if (arginfo[i].vectype == NULL
4074 	    || (simd_clone_subparts (arginfo[i].vectype)
4075 		> bestn->simdclone->simdlen))
4076 	  return false;
4077       }
4078 
4079   fndecl = bestn->decl;
4080   nunits = bestn->simdclone->simdlen;
4081   ncopies = vf / nunits;
4082 
4083   /* If the function isn't const, only allow it in simd loops where user
4084      has asserted that at least nunits consecutive iterations can be
4085      performed using SIMD instructions.  */
4086   if ((loop == NULL || (unsigned) loop->safelen < nunits)
4087       && gimple_vuse (stmt))
4088     return false;
4089 
4090   /* Sanity check: make sure that at least one copy of the vectorized stmt
4091      needs to be generated.  */
4092   gcc_assert (ncopies >= 1);
4093 
4094   if (!vec_stmt) /* transformation not required.  */
4095     {
4096       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4097       for (i = 0; i < nargs; i++)
4098 	if ((bestn->simdclone->args[i].arg_type
4099 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4100 	    || (bestn->simdclone->args[i].arg_type
4101 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4102 	  {
4103 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4104 									+ 1);
4105 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4106 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4107 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4108 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4109 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4110 	    tree sll = arginfo[i].simd_lane_linear
4111 		       ? boolean_true_node : boolean_false_node;
4112 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4113 	  }
4114       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4115       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4116 /*      vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4117       return true;
4118     }
4119 
4120   /* Transform.  */
4121 
4122   if (dump_enabled_p ())
4123     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4124 
4125   /* Handle def.  */
4126   scalar_dest = gimple_call_lhs (stmt);
4127   vec_dest = NULL_TREE;
4128   rtype = NULL_TREE;
4129   ratype = NULL_TREE;
4130   if (scalar_dest)
4131     {
4132       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4133       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4134       if (TREE_CODE (rtype) == ARRAY_TYPE)
4135 	{
4136 	  ratype = rtype;
4137 	  rtype = TREE_TYPE (ratype);
4138 	}
4139     }
4140 
4141   prev_stmt_info = NULL;
4142   for (j = 0; j < ncopies; ++j)
4143     {
4144       /* Build argument list for the vectorized call.  */
4145       if (j == 0)
4146 	vargs.create (nargs);
4147       else
4148 	vargs.truncate (0);
4149 
4150       for (i = 0; i < nargs; i++)
4151 	{
4152 	  unsigned int k, l, m, o;
4153 	  tree atype;
4154 	  op = gimple_call_arg (stmt, i);
4155 	  switch (bestn->simdclone->args[i].arg_type)
4156 	    {
4157 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4158 	      atype = bestn->simdclone->args[i].vector_type;
4159 	      o = nunits / simd_clone_subparts (atype);
4160 	      for (m = j * o; m < (j + 1) * o; m++)
4161 		{
4162 		  if (simd_clone_subparts (atype)
4163 		      < simd_clone_subparts (arginfo[i].vectype))
4164 		    {
4165 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4166 		      k = (simd_clone_subparts (arginfo[i].vectype)
4167 			   / simd_clone_subparts (atype));
4168 		      gcc_assert ((k & (k - 1)) == 0);
4169 		      if (m == 0)
4170 			vec_oprnd0
4171 			  = vect_get_vec_def_for_operand (op, stmt_info);
4172 		      else
4173 			{
4174 			  vec_oprnd0 = arginfo[i].op;
4175 			  if ((m & (k - 1)) == 0)
4176 			    vec_oprnd0
4177 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4178 								vec_oprnd0);
4179 			}
4180 		      arginfo[i].op = vec_oprnd0;
4181 		      vec_oprnd0
4182 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4183 				  bitsize_int (prec),
4184 				  bitsize_int ((m & (k - 1)) * prec));
4185 		      gassign *new_stmt
4186 			= gimple_build_assign (make_ssa_name (atype),
4187 					       vec_oprnd0);
4188 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4189 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4190 		    }
4191 		  else
4192 		    {
4193 		      k = (simd_clone_subparts (atype)
4194 			   / simd_clone_subparts (arginfo[i].vectype));
4195 		      gcc_assert ((k & (k - 1)) == 0);
4196 		      vec<constructor_elt, va_gc> *ctor_elts;
4197 		      if (k != 1)
4198 			vec_alloc (ctor_elts, k);
4199 		      else
4200 			ctor_elts = NULL;
4201 		      for (l = 0; l < k; l++)
4202 			{
4203 			  if (m == 0 && l == 0)
4204 			    vec_oprnd0
4205 			      = vect_get_vec_def_for_operand (op, stmt_info);
4206 			  else
4207 			    vec_oprnd0
4208 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4209 								arginfo[i].op);
4210 			  arginfo[i].op = vec_oprnd0;
4211 			  if (k == 1)
4212 			    break;
4213 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4214 						  vec_oprnd0);
4215 			}
4216 		      if (k == 1)
4217 			vargs.safe_push (vec_oprnd0);
4218 		      else
4219 			{
4220 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4221 			  gassign *new_stmt
4222 			    = gimple_build_assign (make_ssa_name (atype),
4223 						   vec_oprnd0);
4224 			  vect_finish_stmt_generation (stmt_info, new_stmt,
4225 						       gsi);
4226 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4227 			}
4228 		    }
4229 		}
4230 	      break;
4231 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4232 	      vargs.safe_push (op);
4233 	      break;
4234 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4235 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4236 	      if (j == 0)
4237 		{
4238 		  gimple_seq stmts;
4239 		  arginfo[i].op
4240 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
4241 					    NULL_TREE);
4242 		  if (stmts != NULL)
4243 		    {
4244 		      basic_block new_bb;
4245 		      edge pe = loop_preheader_edge (loop);
4246 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4247 		      gcc_assert (!new_bb);
4248 		    }
4249 		  if (arginfo[i].simd_lane_linear)
4250 		    {
4251 		      vargs.safe_push (arginfo[i].op);
4252 		      break;
4253 		    }
4254 		  tree phi_res = copy_ssa_name (op);
4255 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4256 		  loop_vinfo->add_stmt (new_phi);
4257 		  add_phi_arg (new_phi, arginfo[i].op,
4258 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4259 		  enum tree_code code
4260 		    = POINTER_TYPE_P (TREE_TYPE (op))
4261 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4262 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4263 			      ? sizetype : TREE_TYPE (op);
4264 		  widest_int cst
4265 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4266 			       ncopies * nunits);
4267 		  tree tcst = wide_int_to_tree (type, cst);
4268 		  tree phi_arg = copy_ssa_name (op);
4269 		  gassign *new_stmt
4270 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4271 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4272 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4273 		  loop_vinfo->add_stmt (new_stmt);
4274 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4275 			       UNKNOWN_LOCATION);
4276 		  arginfo[i].op = phi_res;
4277 		  vargs.safe_push (phi_res);
4278 		}
4279 	      else
4280 		{
4281 		  enum tree_code code
4282 		    = POINTER_TYPE_P (TREE_TYPE (op))
4283 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4284 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4285 			      ? sizetype : TREE_TYPE (op);
4286 		  widest_int cst
4287 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4288 			       j * nunits);
4289 		  tree tcst = wide_int_to_tree (type, cst);
4290 		  new_temp = make_ssa_name (TREE_TYPE (op));
4291 		  gassign *new_stmt
4292 		    = gimple_build_assign (new_temp, code,
4293 					   arginfo[i].op, tcst);
4294 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4295 		  vargs.safe_push (new_temp);
4296 		}
4297 	      break;
4298 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4299 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4300 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4301 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4302 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4303 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4304 	    default:
4305 	      gcc_unreachable ();
4306 	    }
4307 	}
4308 
4309       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4310       if (vec_dest)
4311 	{
4312 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4313 	  if (ratype)
4314 	    new_temp = create_tmp_var (ratype);
4315 	  else if (simd_clone_subparts (vectype)
4316 		   == simd_clone_subparts (rtype))
4317 	    new_temp = make_ssa_name (vec_dest, new_call);
4318 	  else
4319 	    new_temp = make_ssa_name (rtype, new_call);
4320 	  gimple_call_set_lhs (new_call, new_temp);
4321 	}
4322       stmt_vec_info new_stmt_info
4323 	= vect_finish_stmt_generation (stmt_info, new_call, gsi);
4324 
4325       if (vec_dest)
4326 	{
4327 	  if (simd_clone_subparts (vectype) < nunits)
4328 	    {
4329 	      unsigned int k, l;
4330 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4331 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4332 	      k = nunits / simd_clone_subparts (vectype);
4333 	      gcc_assert ((k & (k - 1)) == 0);
4334 	      for (l = 0; l < k; l++)
4335 		{
4336 		  tree t;
4337 		  if (ratype)
4338 		    {
4339 		      t = build_fold_addr_expr (new_temp);
4340 		      t = build2 (MEM_REF, vectype, t,
4341 				  build_int_cst (TREE_TYPE (t), l * bytes));
4342 		    }
4343 		  else
4344 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4345 				bitsize_int (prec), bitsize_int (l * prec));
4346 		  gimple *new_stmt
4347 		    = gimple_build_assign (make_ssa_name (vectype), t);
4348 		  new_stmt_info
4349 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4350 
4351 		  if (j == 0 && l == 0)
4352 		    STMT_VINFO_VEC_STMT (stmt_info)
4353 		      = *vec_stmt = new_stmt_info;
4354 		  else
4355 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4356 
4357 		  prev_stmt_info = new_stmt_info;
4358 		}
4359 
4360 	      if (ratype)
4361 		vect_clobber_variable (stmt_info, gsi, new_temp);
4362 	      continue;
4363 	    }
4364 	  else if (simd_clone_subparts (vectype) > nunits)
4365 	    {
4366 	      unsigned int k = (simd_clone_subparts (vectype)
4367 				/ simd_clone_subparts (rtype));
4368 	      gcc_assert ((k & (k - 1)) == 0);
4369 	      if ((j & (k - 1)) == 0)
4370 		vec_alloc (ret_ctor_elts, k);
4371 	      if (ratype)
4372 		{
4373 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4374 		  for (m = 0; m < o; m++)
4375 		    {
4376 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4377 					 size_int (m), NULL_TREE, NULL_TREE);
4378 		      gimple *new_stmt
4379 			= gimple_build_assign (make_ssa_name (rtype), tem);
4380 		      new_stmt_info
4381 			= vect_finish_stmt_generation (stmt_info, new_stmt,
4382 						       gsi);
4383 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4384 					      gimple_assign_lhs (new_stmt));
4385 		    }
4386 		  vect_clobber_variable (stmt_info, gsi, new_temp);
4387 		}
4388 	      else
4389 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4390 	      if ((j & (k - 1)) != k - 1)
4391 		continue;
4392 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4393 	      gimple *new_stmt
4394 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4395 	      new_stmt_info
4396 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4397 
4398 	      if ((unsigned) j == k - 1)
4399 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4400 	      else
4401 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4402 
4403 	      prev_stmt_info = new_stmt_info;
4404 	      continue;
4405 	    }
4406 	  else if (ratype)
4407 	    {
4408 	      tree t = build_fold_addr_expr (new_temp);
4409 	      t = build2 (MEM_REF, vectype, t,
4410 			  build_int_cst (TREE_TYPE (t), 0));
4411 	      gimple *new_stmt
4412 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4413 	      new_stmt_info
4414 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4415 	      vect_clobber_variable (stmt_info, gsi, new_temp);
4416 	    }
4417 	}
4418 
4419       if (j == 0)
4420 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4421       else
4422 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4423 
4424       prev_stmt_info = new_stmt_info;
4425     }
4426 
4427   vargs.release ();
4428 
4429   /* The call in STMT might prevent it from being removed in dce.
4430      We however cannot remove it here, due to the way the ssa name
4431      it defines is mapped to the new definition.  So just replace
4432      rhs of the statement with something harmless.  */
4433 
4434   if (slp_node)
4435     return true;
4436 
4437   gimple *new_stmt;
4438   if (scalar_dest)
4439     {
4440       type = TREE_TYPE (scalar_dest);
4441       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4442       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4443     }
4444   else
4445     new_stmt = gimple_build_nop ();
4446   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4447   unlink_stmt_vdef (stmt);
4448 
4449   return true;
4450 }
4451 
4452 
4453 /* Function vect_gen_widened_results_half
4454 
4455    Create a vector stmt whose code, type, number of arguments, and result
4456    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4457    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4458    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4459    needs to be created (DECL is a function-decl of a target-builtin).
4460    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4461 
4462 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4463 vect_gen_widened_results_half (enum tree_code code,
4464 			       tree decl,
4465                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4466 			       tree vec_dest, gimple_stmt_iterator *gsi,
4467 			       stmt_vec_info stmt_info)
4468 {
4469   gimple *new_stmt;
4470   tree new_temp;
4471 
4472   /* Generate half of the widened result:  */
4473   if (code == CALL_EXPR)
4474     {
4475       /* Target specific support  */
4476       if (op_type == binary_op)
4477 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4478       else
4479 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4480       new_temp = make_ssa_name (vec_dest, new_stmt);
4481       gimple_call_set_lhs (new_stmt, new_temp);
4482     }
4483   else
4484     {
4485       /* Generic support */
4486       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4487       if (op_type != binary_op)
4488 	vec_oprnd1 = NULL;
4489       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4490       new_temp = make_ssa_name (vec_dest, new_stmt);
4491       gimple_assign_set_lhs (new_stmt, new_temp);
4492     }
4493   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4494 
4495   return new_stmt;
4496 }
4497 
4498 
4499 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4500    For the first operand we call vect_get_vec_def_for_operand (with OPRND
4501    containing scalar operand), and for the rest we get a copy with
4502    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4503    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4504    The vectors are collected into VEC_OPRNDS.  */
4505 
4506 static void
vect_get_loop_based_defs(tree * oprnd,stmt_vec_info stmt_info,vec<tree> * vec_oprnds,int multi_step_cvt)4507 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4508 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4509 {
4510   vec_info *vinfo = stmt_info->vinfo;
4511   tree vec_oprnd;
4512 
4513   /* Get first vector operand.  */
4514   /* All the vector operands except the very first one (that is scalar oprnd)
4515      are stmt copies.  */
4516   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4517     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4518   else
4519     vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4520 
4521   vec_oprnds->quick_push (vec_oprnd);
4522 
4523   /* Get second vector operand.  */
4524   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4525   vec_oprnds->quick_push (vec_oprnd);
4526 
4527   *oprnd = vec_oprnd;
4528 
4529   /* For conversion in multiple steps, continue to get operands
4530      recursively.  */
4531   if (multi_step_cvt)
4532     vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4533 			      multi_step_cvt - 1);
4534 }
4535 
4536 
4537 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4538    For multi-step conversions store the resulting vectors and call the function
4539    recursively.  */
4540 
4541 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4542 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4543 				       int multi_step_cvt,
4544 				       stmt_vec_info stmt_info,
4545 				       vec<tree> vec_dsts,
4546 				       gimple_stmt_iterator *gsi,
4547 				       slp_tree slp_node, enum tree_code code,
4548 				       stmt_vec_info *prev_stmt_info)
4549 {
4550   unsigned int i;
4551   tree vop0, vop1, new_tmp, vec_dest;
4552 
4553   vec_dest = vec_dsts.pop ();
4554 
4555   for (i = 0; i < vec_oprnds->length (); i += 2)
4556     {
4557       /* Create demotion operation.  */
4558       vop0 = (*vec_oprnds)[i];
4559       vop1 = (*vec_oprnds)[i + 1];
4560       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4561       new_tmp = make_ssa_name (vec_dest, new_stmt);
4562       gimple_assign_set_lhs (new_stmt, new_tmp);
4563       stmt_vec_info new_stmt_info
4564 	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4565 
4566       if (multi_step_cvt)
4567 	/* Store the resulting vector for next recursive call.  */
4568 	(*vec_oprnds)[i/2] = new_tmp;
4569       else
4570 	{
4571 	  /* This is the last step of the conversion sequence. Store the
4572 	     vectors in SLP_NODE or in vector info of the scalar statement
4573 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4574 	  if (slp_node)
4575 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4576 	  else
4577 	    {
4578 	      if (!*prev_stmt_info)
4579 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4580 	      else
4581 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4582 
4583 	      *prev_stmt_info = new_stmt_info;
4584 	    }
4585 	}
4586     }
4587 
4588   /* For multi-step demotion operations we first generate demotion operations
4589      from the source type to the intermediate types, and then combine the
4590      results (stored in VEC_OPRNDS) in demotion operation to the destination
4591      type.  */
4592   if (multi_step_cvt)
4593     {
4594       /* At each level of recursion we have half of the operands we had at the
4595 	 previous level.  */
4596       vec_oprnds->truncate ((i+1)/2);
4597       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4598 					     stmt_info, vec_dsts, gsi,
4599 					     slp_node, VEC_PACK_TRUNC_EXPR,
4600 					     prev_stmt_info);
4601     }
4602 
4603   vec_dsts.quick_push (vec_dest);
4604 }
4605 
4606 
4607 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4608    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4609    STMT_INFO.  For multi-step conversions store the resulting vectors and
4610    call the function recursively.  */
4611 
4612 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)4613 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4614 					vec<tree> *vec_oprnds1,
4615 					stmt_vec_info stmt_info, tree vec_dest,
4616 					gimple_stmt_iterator *gsi,
4617 					enum tree_code code1,
4618 					enum tree_code code2, tree decl1,
4619 					tree decl2, int op_type)
4620 {
4621   int i;
4622   tree vop0, vop1, new_tmp1, new_tmp2;
4623   gimple *new_stmt1, *new_stmt2;
4624   vec<tree> vec_tmp = vNULL;
4625 
4626   vec_tmp.create (vec_oprnds0->length () * 2);
4627   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4628     {
4629       if (op_type == binary_op)
4630 	vop1 = (*vec_oprnds1)[i];
4631       else
4632 	vop1 = NULL_TREE;
4633 
4634       /* Generate the two halves of promotion operation.  */
4635       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4636 						 op_type, vec_dest, gsi,
4637 						 stmt_info);
4638       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4639 						 op_type, vec_dest, gsi,
4640 						 stmt_info);
4641       if (is_gimple_call (new_stmt1))
4642 	{
4643 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4644 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4645 	}
4646       else
4647 	{
4648 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4649 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4650 	}
4651 
4652       /* Store the results for the next step.  */
4653       vec_tmp.quick_push (new_tmp1);
4654       vec_tmp.quick_push (new_tmp2);
4655     }
4656 
4657   vec_oprnds0->release ();
4658   *vec_oprnds0 = vec_tmp;
4659 }
4660 
4661 
4662 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4663    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4664    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4665    Return true if STMT_INFO is vectorizable in this way.  */
4666 
4667 static bool
vectorizable_conversion(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4668 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4669 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
4670 			 stmt_vector_for_cost *cost_vec)
4671 {
4672   tree vec_dest;
4673   tree scalar_dest;
4674   tree op0, op1 = NULL_TREE;
4675   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4676   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4677   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4678   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4679   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4680   tree new_temp;
4681   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4682   int ndts = 2;
4683   stmt_vec_info prev_stmt_info;
4684   poly_uint64 nunits_in;
4685   poly_uint64 nunits_out;
4686   tree vectype_out, vectype_in;
4687   int ncopies, i, j;
4688   tree lhs_type, rhs_type;
4689   enum { NARROW, NONE, WIDEN } modifier;
4690   vec<tree> vec_oprnds0 = vNULL;
4691   vec<tree> vec_oprnds1 = vNULL;
4692   tree vop0;
4693   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4694   vec_info *vinfo = stmt_info->vinfo;
4695   int multi_step_cvt = 0;
4696   vec<tree> interm_types = vNULL;
4697   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4698   int op_type;
4699   unsigned short fltsz;
4700 
4701   /* Is STMT a vectorizable conversion?   */
4702 
4703   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4704     return false;
4705 
4706   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4707       && ! vec_stmt)
4708     return false;
4709 
4710   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4711   if (!stmt)
4712     return false;
4713 
4714   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4715     return false;
4716 
4717   code = gimple_assign_rhs_code (stmt);
4718   if (!CONVERT_EXPR_CODE_P (code)
4719       && code != FIX_TRUNC_EXPR
4720       && code != FLOAT_EXPR
4721       && code != WIDEN_MULT_EXPR
4722       && code != WIDEN_LSHIFT_EXPR)
4723     return false;
4724 
4725   op_type = TREE_CODE_LENGTH (code);
4726 
4727   /* Check types of lhs and rhs.  */
4728   scalar_dest = gimple_assign_lhs (stmt);
4729   lhs_type = TREE_TYPE (scalar_dest);
4730   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4731 
4732   op0 = gimple_assign_rhs1 (stmt);
4733   rhs_type = TREE_TYPE (op0);
4734 
4735   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4736       && !((INTEGRAL_TYPE_P (lhs_type)
4737 	    && INTEGRAL_TYPE_P (rhs_type))
4738 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4739 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4740     return false;
4741 
4742   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4743       && ((INTEGRAL_TYPE_P (lhs_type)
4744 	   && !type_has_mode_precision_p (lhs_type))
4745 	  || (INTEGRAL_TYPE_P (rhs_type)
4746 	      && !type_has_mode_precision_p (rhs_type))))
4747     {
4748       if (dump_enabled_p ())
4749 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4750                          "type conversion to/from bit-precision unsupported."
4751                          "\n");
4752       return false;
4753     }
4754 
4755   /* Check the operands of the operation.  */
4756   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4757     {
4758       if (dump_enabled_p ())
4759 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4760                          "use not simple.\n");
4761       return false;
4762     }
4763   if (op_type == binary_op)
4764     {
4765       bool ok;
4766 
4767       op1 = gimple_assign_rhs2 (stmt);
4768       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4769       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4770 	 OP1.  */
4771       if (CONSTANT_CLASS_P (op0))
4772 	ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4773       else
4774 	ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4775 
4776       if (!ok)
4777 	{
4778           if (dump_enabled_p ())
4779             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780                              "use not simple.\n");
4781 	  return false;
4782 	}
4783     }
4784 
4785   /* If op0 is an external or constant defs use a vector type of
4786      the same size as the output vector type.  */
4787   if (!vectype_in)
4788     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4789   if (vec_stmt)
4790     gcc_assert (vectype_in);
4791   if (!vectype_in)
4792     {
4793       if (dump_enabled_p ())
4794 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4795 			 "no vectype for scalar type %T\n", rhs_type);
4796 
4797       return false;
4798     }
4799 
4800   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4801       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4802     {
4803       if (dump_enabled_p ())
4804 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805 			 "can't convert between boolean and non "
4806 			 "boolean vectors %T\n", rhs_type);
4807 
4808       return false;
4809     }
4810 
4811   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4812   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4813   if (known_eq (nunits_out, nunits_in))
4814     modifier = NONE;
4815   else if (multiple_p (nunits_out, nunits_in))
4816     modifier = NARROW;
4817   else
4818     {
4819       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4820       modifier = WIDEN;
4821     }
4822 
4823   /* Multiple types in SLP are handled by creating the appropriate number of
4824      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4825      case of SLP.  */
4826   if (slp_node)
4827     ncopies = 1;
4828   else if (modifier == NARROW)
4829     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4830   else
4831     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4832 
4833   /* Sanity check: make sure that at least one copy of the vectorized stmt
4834      needs to be generated.  */
4835   gcc_assert (ncopies >= 1);
4836 
4837   bool found_mode = false;
4838   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4839   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4840   opt_scalar_mode rhs_mode_iter;
4841 
4842   /* Supportable by target?  */
4843   switch (modifier)
4844     {
4845     case NONE:
4846       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4847 	return false;
4848       if (supportable_convert_operation (code, vectype_out, vectype_in,
4849 					 &decl1, &code1))
4850 	break;
4851       /* FALLTHRU */
4852     unsupported:
4853       if (dump_enabled_p ())
4854 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4855                          "conversion not supported by target.\n");
4856       return false;
4857 
4858     case WIDEN:
4859       if (supportable_widening_operation (code, stmt_info, vectype_out,
4860 					  vectype_in, &code1, &code2,
4861 					  &multi_step_cvt, &interm_types))
4862 	{
4863 	  /* Binary widening operation can only be supported directly by the
4864 	     architecture.  */
4865 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4866 	  break;
4867 	}
4868 
4869       if (code != FLOAT_EXPR
4870 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4871 	goto unsupported;
4872 
4873       fltsz = GET_MODE_SIZE (lhs_mode);
4874       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4875 	{
4876 	  rhs_mode = rhs_mode_iter.require ();
4877 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4878 	    break;
4879 
4880 	  cvt_type
4881 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4882 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4883 	  if (cvt_type == NULL_TREE)
4884 	    goto unsupported;
4885 
4886 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4887 	    {
4888 	      if (!supportable_convert_operation (code, vectype_out,
4889 						  cvt_type, &decl1, &codecvt1))
4890 		goto unsupported;
4891 	    }
4892 	  else if (!supportable_widening_operation (code, stmt_info,
4893 						    vectype_out, cvt_type,
4894 						    &codecvt1, &codecvt2,
4895 						    &multi_step_cvt,
4896 						    &interm_types))
4897 	    continue;
4898 	  else
4899 	    gcc_assert (multi_step_cvt == 0);
4900 
4901 	  if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4902 					      vectype_in, &code1, &code2,
4903 					      &multi_step_cvt, &interm_types))
4904 	    {
4905 	      found_mode = true;
4906 	      break;
4907 	    }
4908 	}
4909 
4910       if (!found_mode)
4911 	goto unsupported;
4912 
4913       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4914 	codecvt2 = ERROR_MARK;
4915       else
4916 	{
4917 	  multi_step_cvt++;
4918 	  interm_types.safe_push (cvt_type);
4919 	  cvt_type = NULL_TREE;
4920 	}
4921       break;
4922 
4923     case NARROW:
4924       gcc_assert (op_type == unary_op);
4925       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4926 					   &code1, &multi_step_cvt,
4927 					   &interm_types))
4928 	break;
4929 
4930       if (code != FIX_TRUNC_EXPR
4931 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4932 	goto unsupported;
4933 
4934       cvt_type
4935 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4936       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4937       if (cvt_type == NULL_TREE)
4938 	goto unsupported;
4939       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4940 					  &decl1, &codecvt1))
4941 	goto unsupported;
4942       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4943 					   &code1, &multi_step_cvt,
4944 					   &interm_types))
4945 	break;
4946       goto unsupported;
4947 
4948     default:
4949       gcc_unreachable ();
4950     }
4951 
4952   if (!vec_stmt)		/* transformation not required.  */
4953     {
4954       DUMP_VECT_SCOPE ("vectorizable_conversion");
4955       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4956         {
4957 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4958 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4959 				  cost_vec);
4960 	}
4961       else if (modifier == NARROW)
4962 	{
4963 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4964 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4965 					      cost_vec);
4966 	}
4967       else
4968 	{
4969 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4970 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4971 					      cost_vec);
4972 	}
4973       interm_types.release ();
4974       return true;
4975     }
4976 
4977   /* Transform.  */
4978   if (dump_enabled_p ())
4979     dump_printf_loc (MSG_NOTE, vect_location,
4980                      "transform conversion. ncopies = %d.\n", ncopies);
4981 
4982   if (op_type == binary_op)
4983     {
4984       if (CONSTANT_CLASS_P (op0))
4985 	op0 = fold_convert (TREE_TYPE (op1), op0);
4986       else if (CONSTANT_CLASS_P (op1))
4987 	op1 = fold_convert (TREE_TYPE (op0), op1);
4988     }
4989 
4990   /* In case of multi-step conversion, we first generate conversion operations
4991      to the intermediate types, and then from that types to the final one.
4992      We create vector destinations for the intermediate type (TYPES) received
4993      from supportable_*_operation, and store them in the correct order
4994      for future use in vect_create_vectorized_*_stmts ().  */
4995   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4996   vec_dest = vect_create_destination_var (scalar_dest,
4997 					  (cvt_type && modifier == WIDEN)
4998 					  ? cvt_type : vectype_out);
4999   vec_dsts.quick_push (vec_dest);
5000 
5001   if (multi_step_cvt)
5002     {
5003       for (i = interm_types.length () - 1;
5004 	   interm_types.iterate (i, &intermediate_type); i--)
5005 	{
5006 	  vec_dest = vect_create_destination_var (scalar_dest,
5007 						  intermediate_type);
5008 	  vec_dsts.quick_push (vec_dest);
5009 	}
5010     }
5011 
5012   if (cvt_type)
5013     vec_dest = vect_create_destination_var (scalar_dest,
5014 					    modifier == WIDEN
5015 					    ? vectype_out : cvt_type);
5016 
5017   if (!slp_node)
5018     {
5019       if (modifier == WIDEN)
5020 	{
5021 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5022 	  if (op_type == binary_op)
5023 	    vec_oprnds1.create (1);
5024 	}
5025       else if (modifier == NARROW)
5026 	vec_oprnds0.create (
5027 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5028     }
5029   else if (code == WIDEN_LSHIFT_EXPR)
5030     vec_oprnds1.create (slp_node->vec_stmts_size);
5031 
5032   last_oprnd = op0;
5033   prev_stmt_info = NULL;
5034   switch (modifier)
5035     {
5036     case NONE:
5037       for (j = 0; j < ncopies; j++)
5038 	{
5039 	  if (j == 0)
5040 	    vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5041 			       NULL, slp_node);
5042 	  else
5043 	    vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5044 
5045 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5046 	    {
5047 	      stmt_vec_info new_stmt_info;
5048 	      /* Arguments are ready, create the new vector stmt.  */
5049 	      if (code1 == CALL_EXPR)
5050 		{
5051 		  gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5052 		  new_temp = make_ssa_name (vec_dest, new_stmt);
5053 		  gimple_call_set_lhs (new_stmt, new_temp);
5054 		  new_stmt_info
5055 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5056 		}
5057 	      else
5058 		{
5059 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5060 		  gassign *new_stmt
5061 		    = gimple_build_assign (vec_dest, code1, vop0);
5062 		  new_temp = make_ssa_name (vec_dest, new_stmt);
5063 		  gimple_assign_set_lhs (new_stmt, new_temp);
5064 		  new_stmt_info
5065 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5066 		}
5067 
5068 	      if (slp_node)
5069 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5070 	      else
5071 		{
5072 		  if (!prev_stmt_info)
5073 		    STMT_VINFO_VEC_STMT (stmt_info)
5074 		      = *vec_stmt = new_stmt_info;
5075 		  else
5076 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5077 		  prev_stmt_info = new_stmt_info;
5078 		}
5079 	    }
5080 	}
5081       break;
5082 
5083     case WIDEN:
5084       /* In case the vectorization factor (VF) is bigger than the number
5085 	 of elements that we can fit in a vectype (nunits), we have to
5086 	 generate more than one vector stmt - i.e - we need to "unroll"
5087 	 the vector stmt by a factor VF/nunits.  */
5088       for (j = 0; j < ncopies; j++)
5089 	{
5090 	  /* Handle uses.  */
5091 	  if (j == 0)
5092 	    {
5093 	      if (slp_node)
5094 		{
5095 		  if (code == WIDEN_LSHIFT_EXPR)
5096 		    {
5097 		      unsigned int k;
5098 
5099 		      vec_oprnd1 = op1;
5100 		      /* Store vec_oprnd1 for every vector stmt to be created
5101 			 for SLP_NODE.  We check during the analysis that all
5102 			 the shift arguments are the same.  */
5103 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5104 			vec_oprnds1.quick_push (vec_oprnd1);
5105 
5106 		      vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5107 					 &vec_oprnds0, NULL, slp_node);
5108 		    }
5109 		  else
5110 		    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5111 				       &vec_oprnds1, slp_node);
5112 		}
5113 	      else
5114 		{
5115 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5116 		  vec_oprnds0.quick_push (vec_oprnd0);
5117 		  if (op_type == binary_op)
5118 		    {
5119 		      if (code == WIDEN_LSHIFT_EXPR)
5120 			vec_oprnd1 = op1;
5121 		      else
5122 			vec_oprnd1
5123 			  = vect_get_vec_def_for_operand (op1, stmt_info);
5124 		      vec_oprnds1.quick_push (vec_oprnd1);
5125 		    }
5126 		}
5127 	    }
5128 	  else
5129 	    {
5130 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5131 	      vec_oprnds0.truncate (0);
5132 	      vec_oprnds0.quick_push (vec_oprnd0);
5133 	      if (op_type == binary_op)
5134 		{
5135 		  if (code == WIDEN_LSHIFT_EXPR)
5136 		    vec_oprnd1 = op1;
5137 		  else
5138 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5139 								 vec_oprnd1);
5140 		  vec_oprnds1.truncate (0);
5141 		  vec_oprnds1.quick_push (vec_oprnd1);
5142 		}
5143 	    }
5144 
5145 	  /* Arguments are ready.  Create the new vector stmts.  */
5146 	  for (i = multi_step_cvt; i >= 0; i--)
5147 	    {
5148 	      tree this_dest = vec_dsts[i];
5149 	      enum tree_code c1 = code1, c2 = code2;
5150 	      if (i == 0 && codecvt2 != ERROR_MARK)
5151 		{
5152 		  c1 = codecvt1;
5153 		  c2 = codecvt2;
5154 		}
5155 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5156 						      &vec_oprnds1, stmt_info,
5157 						      this_dest, gsi,
5158 						      c1, c2, decl1, decl2,
5159 						      op_type);
5160 	    }
5161 
5162 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5163 	    {
5164 	      stmt_vec_info new_stmt_info;
5165 	      if (cvt_type)
5166 		{
5167 		  if (codecvt1 == CALL_EXPR)
5168 		    {
5169 		      gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5170 		      new_temp = make_ssa_name (vec_dest, new_stmt);
5171 		      gimple_call_set_lhs (new_stmt, new_temp);
5172 		      new_stmt_info
5173 			= vect_finish_stmt_generation (stmt_info, new_stmt,
5174 						       gsi);
5175 		    }
5176 		  else
5177 		    {
5178 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5179 		      new_temp = make_ssa_name (vec_dest);
5180 		      gassign *new_stmt
5181 			= gimple_build_assign (new_temp, codecvt1, vop0);
5182 		      new_stmt_info
5183 			= vect_finish_stmt_generation (stmt_info, new_stmt,
5184 						       gsi);
5185 		    }
5186 		}
5187 	      else
5188 		new_stmt_info = vinfo->lookup_def (vop0);
5189 
5190 	      if (slp_node)
5191 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5192 	      else
5193 		{
5194 		  if (!prev_stmt_info)
5195 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5196 		  else
5197 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5198 		  prev_stmt_info = new_stmt_info;
5199 		}
5200 	    }
5201 	}
5202 
5203       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5204       break;
5205 
5206     case NARROW:
5207       /* In case the vectorization factor (VF) is bigger than the number
5208 	 of elements that we can fit in a vectype (nunits), we have to
5209 	 generate more than one vector stmt - i.e - we need to "unroll"
5210 	 the vector stmt by a factor VF/nunits.  */
5211       for (j = 0; j < ncopies; j++)
5212 	{
5213 	  /* Handle uses.  */
5214 	  if (slp_node)
5215 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5216 			       slp_node);
5217 	  else
5218 	    {
5219 	      vec_oprnds0.truncate (0);
5220 	      vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5221 					vect_pow2 (multi_step_cvt) - 1);
5222 	    }
5223 
5224 	  /* Arguments are ready.  Create the new vector stmts.  */
5225 	  if (cvt_type)
5226 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5227 	      {
5228 		if (codecvt1 == CALL_EXPR)
5229 		  {
5230 		    gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5231 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5232 		    gimple_call_set_lhs (new_stmt, new_temp);
5233 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5234 		  }
5235 		else
5236 		  {
5237 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5238 		    new_temp = make_ssa_name (vec_dest);
5239 		    gassign *new_stmt
5240 		      = gimple_build_assign (new_temp, codecvt1, vop0);
5241 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5242 		  }
5243 
5244 		vec_oprnds0[i] = new_temp;
5245 	      }
5246 
5247 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5248 						 stmt_info, vec_dsts, gsi,
5249 						 slp_node, code1,
5250 						 &prev_stmt_info);
5251 	}
5252 
5253       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5254       break;
5255     }
5256 
5257   vec_oprnds0.release ();
5258   vec_oprnds1.release ();
5259   interm_types.release ();
5260 
5261   return true;
5262 }
5263 
5264 
5265 /* Function vectorizable_assignment.
5266 
5267    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5268    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5269    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5270    Return true if STMT_INFO is vectorizable in this way.  */
5271 
5272 static bool
vectorizable_assignment(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5273 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5274 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
5275 			 stmt_vector_for_cost *cost_vec)
5276 {
5277   tree vec_dest;
5278   tree scalar_dest;
5279   tree op;
5280   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5281   tree new_temp;
5282   enum vect_def_type dt[1] = {vect_unknown_def_type};
5283   int ndts = 1;
5284   int ncopies;
5285   int i, j;
5286   vec<tree> vec_oprnds = vNULL;
5287   tree vop;
5288   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5289   vec_info *vinfo = stmt_info->vinfo;
5290   stmt_vec_info prev_stmt_info = NULL;
5291   enum tree_code code;
5292   tree vectype_in;
5293 
5294   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5295     return false;
5296 
5297   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5298       && ! vec_stmt)
5299     return false;
5300 
5301   /* Is vectorizable assignment?  */
5302   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5303   if (!stmt)
5304     return false;
5305 
5306   scalar_dest = gimple_assign_lhs (stmt);
5307   if (TREE_CODE (scalar_dest) != SSA_NAME)
5308     return false;
5309 
5310   code = gimple_assign_rhs_code (stmt);
5311   if (gimple_assign_single_p (stmt)
5312       || code == PAREN_EXPR
5313       || CONVERT_EXPR_CODE_P (code))
5314     op = gimple_assign_rhs1 (stmt);
5315   else
5316     return false;
5317 
5318   if (code == VIEW_CONVERT_EXPR)
5319     op = TREE_OPERAND (op, 0);
5320 
5321   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5322   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5323 
5324   /* Multiple types in SLP are handled by creating the appropriate number of
5325      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5326      case of SLP.  */
5327   if (slp_node)
5328     ncopies = 1;
5329   else
5330     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5331 
5332   gcc_assert (ncopies >= 1);
5333 
5334   if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5335     {
5336       if (dump_enabled_p ())
5337         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5338                          "use not simple.\n");
5339       return false;
5340     }
5341 
5342   /* We can handle NOP_EXPR conversions that do not change the number
5343      of elements or the vector size.  */
5344   if ((CONVERT_EXPR_CODE_P (code)
5345        || code == VIEW_CONVERT_EXPR)
5346       && (!vectype_in
5347 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5348 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5349 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5350     return false;
5351 
5352   /* We do not handle bit-precision changes.  */
5353   if ((CONVERT_EXPR_CODE_P (code)
5354        || code == VIEW_CONVERT_EXPR)
5355       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5356       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5357 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5358       /* But a conversion that does not change the bit-pattern is ok.  */
5359       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5360 	    > TYPE_PRECISION (TREE_TYPE (op)))
5361 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5362       /* Conversion between boolean types of different sizes is
5363 	 a simple assignment in case their vectypes are same
5364 	 boolean vectors.  */
5365       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5366 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5367     {
5368       if (dump_enabled_p ())
5369         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5370                          "type conversion to/from bit-precision "
5371                          "unsupported.\n");
5372       return false;
5373     }
5374 
5375   if (!vec_stmt) /* transformation not required.  */
5376     {
5377       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5378       DUMP_VECT_SCOPE ("vectorizable_assignment");
5379       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5380       return true;
5381     }
5382 
5383   /* Transform.  */
5384   if (dump_enabled_p ())
5385     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5386 
5387   /* Handle def.  */
5388   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5389 
5390   /* Handle use.  */
5391   for (j = 0; j < ncopies; j++)
5392     {
5393       /* Handle uses.  */
5394       if (j == 0)
5395 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5396       else
5397 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5398 
5399       /* Arguments are ready. create the new vector stmt.  */
5400       stmt_vec_info new_stmt_info = NULL;
5401       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5402        {
5403 	 if (CONVERT_EXPR_CODE_P (code)
5404 	     || code == VIEW_CONVERT_EXPR)
5405 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5406 	 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5407          new_temp = make_ssa_name (vec_dest, new_stmt);
5408          gimple_assign_set_lhs (new_stmt, new_temp);
5409 	 new_stmt_info
5410 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5411          if (slp_node)
5412 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5413        }
5414 
5415       if (slp_node)
5416         continue;
5417 
5418       if (j == 0)
5419 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5420       else
5421 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5422 
5423       prev_stmt_info = new_stmt_info;
5424     }
5425 
5426   vec_oprnds.release ();
5427   return true;
5428 }
5429 
5430 
5431 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5432    either as shift by a scalar or by a vector.  */
5433 
5434 bool
vect_supportable_shift(enum tree_code code,tree scalar_type)5435 vect_supportable_shift (enum tree_code code, tree scalar_type)
5436 {
5437 
5438   machine_mode vec_mode;
5439   optab optab;
5440   int icode;
5441   tree vectype;
5442 
5443   vectype = get_vectype_for_scalar_type (scalar_type);
5444   if (!vectype)
5445     return false;
5446 
5447   optab = optab_for_tree_code (code, vectype, optab_scalar);
5448   if (!optab
5449       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5450     {
5451       optab = optab_for_tree_code (code, vectype, optab_vector);
5452       if (!optab
5453           || (optab_handler (optab, TYPE_MODE (vectype))
5454                       == CODE_FOR_nothing))
5455         return false;
5456     }
5457 
5458   vec_mode = TYPE_MODE (vectype);
5459   icode = (int) optab_handler (optab, vec_mode);
5460   if (icode == CODE_FOR_nothing)
5461     return false;
5462 
5463   return true;
5464 }
5465 
5466 
5467 /* Function vectorizable_shift.
5468 
5469    Check if STMT_INFO performs a shift operation that can be vectorized.
5470    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5471    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5472    Return true if STMT_INFO is vectorizable in this way.  */
5473 
5474 bool
vectorizable_shift(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5475 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5476 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
5477 		    stmt_vector_for_cost *cost_vec)
5478 {
5479   tree vec_dest;
5480   tree scalar_dest;
5481   tree op0, op1 = NULL;
5482   tree vec_oprnd1 = NULL_TREE;
5483   tree vectype;
5484   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5485   enum tree_code code;
5486   machine_mode vec_mode;
5487   tree new_temp;
5488   optab optab;
5489   int icode;
5490   machine_mode optab_op2_mode;
5491   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5492   int ndts = 2;
5493   stmt_vec_info prev_stmt_info;
5494   poly_uint64 nunits_in;
5495   poly_uint64 nunits_out;
5496   tree vectype_out;
5497   tree op1_vectype;
5498   int ncopies;
5499   int j, i;
5500   vec<tree> vec_oprnds0 = vNULL;
5501   vec<tree> vec_oprnds1 = vNULL;
5502   tree vop0, vop1;
5503   unsigned int k;
5504   bool scalar_shift_arg = true;
5505   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5506   vec_info *vinfo = stmt_info->vinfo;
5507 
5508   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5509     return false;
5510 
5511   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5512       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5513       && ! vec_stmt)
5514     return false;
5515 
5516   /* Is STMT a vectorizable binary/unary operation?   */
5517   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5518   if (!stmt)
5519     return false;
5520 
5521   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5522     return false;
5523 
5524   code = gimple_assign_rhs_code (stmt);
5525 
5526   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5527       || code == RROTATE_EXPR))
5528     return false;
5529 
5530   scalar_dest = gimple_assign_lhs (stmt);
5531   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5532   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5533     {
5534       if (dump_enabled_p ())
5535         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5536                          "bit-precision shifts not supported.\n");
5537       return false;
5538     }
5539 
5540   op0 = gimple_assign_rhs1 (stmt);
5541   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5542     {
5543       if (dump_enabled_p ())
5544         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5545                          "use not simple.\n");
5546       return false;
5547     }
5548   /* If op0 is an external or constant def use a vector type with
5549      the same size as the output vector type.  */
5550   if (!vectype)
5551     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5552   if (vec_stmt)
5553     gcc_assert (vectype);
5554   if (!vectype)
5555     {
5556       if (dump_enabled_p ())
5557         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5558                          "no vectype for scalar type\n");
5559       return false;
5560     }
5561 
5562   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5563   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5564   if (maybe_ne (nunits_out, nunits_in))
5565     return false;
5566 
5567   op1 = gimple_assign_rhs2 (stmt);
5568   stmt_vec_info op1_def_stmt_info;
5569   if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5570 			   &op1_def_stmt_info))
5571     {
5572       if (dump_enabled_p ())
5573         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574                          "use not simple.\n");
5575       return false;
5576     }
5577 
5578   /* Multiple types in SLP are handled by creating the appropriate number of
5579      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5580      case of SLP.  */
5581   if (slp_node)
5582     ncopies = 1;
5583   else
5584     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5585 
5586   gcc_assert (ncopies >= 1);
5587 
5588   /* Determine whether the shift amount is a vector, or scalar.  If the
5589      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5590 
5591   if ((dt[1] == vect_internal_def
5592        || dt[1] == vect_induction_def
5593        || dt[1] == vect_nested_cycle)
5594       && !slp_node)
5595     scalar_shift_arg = false;
5596   else if (dt[1] == vect_constant_def
5597 	   || dt[1] == vect_external_def
5598 	   || dt[1] == vect_internal_def)
5599     {
5600       /* In SLP, need to check whether the shift count is the same,
5601 	 in loops if it is a constant or invariant, it is always
5602 	 a scalar shift.  */
5603       if (slp_node)
5604 	{
5605 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5606 	  stmt_vec_info slpstmt_info;
5607 
5608 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5609 	    {
5610 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5611 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5612 		scalar_shift_arg = false;
5613 	    }
5614 
5615 	  /* For internal SLP defs we have to make sure we see scalar stmts
5616 	     for all vector elements.
5617 	     ???  For different vectors we could resort to a different
5618 	     scalar shift operand but code-generation below simply always
5619 	     takes the first.  */
5620 	  if (dt[1] == vect_internal_def
5621 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5622 			   stmts.length ()))
5623 	    scalar_shift_arg = false;
5624 	}
5625 
5626       /* If the shift amount is computed by a pattern stmt we cannot
5627          use the scalar amount directly thus give up and use a vector
5628 	 shift.  */
5629       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5630 	scalar_shift_arg = false;
5631     }
5632   else
5633     {
5634       if (dump_enabled_p ())
5635         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636                          "operand mode requires invariant argument.\n");
5637       return false;
5638     }
5639 
5640   /* Vector shifted by vector.  */
5641   if (!scalar_shift_arg)
5642     {
5643       optab = optab_for_tree_code (code, vectype, optab_vector);
5644       if (dump_enabled_p ())
5645         dump_printf_loc (MSG_NOTE, vect_location,
5646                          "vector/vector shift/rotate found.\n");
5647 
5648       if (!op1_vectype)
5649 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5650       if (op1_vectype == NULL_TREE
5651 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5652 	{
5653 	  if (dump_enabled_p ())
5654 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5655                              "unusable type for last operand in"
5656                              " vector/vector shift/rotate.\n");
5657 	  return false;
5658 	}
5659     }
5660   /* See if the machine has a vector shifted by scalar insn and if not
5661      then see if it has a vector shifted by vector insn.  */
5662   else
5663     {
5664       optab = optab_for_tree_code (code, vectype, optab_scalar);
5665       if (optab
5666           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5667         {
5668           if (dump_enabled_p ())
5669             dump_printf_loc (MSG_NOTE, vect_location,
5670                              "vector/scalar shift/rotate found.\n");
5671         }
5672       else
5673         {
5674           optab = optab_for_tree_code (code, vectype, optab_vector);
5675           if (optab
5676                && (optab_handler (optab, TYPE_MODE (vectype))
5677                       != CODE_FOR_nothing))
5678             {
5679 	      scalar_shift_arg = false;
5680 
5681               if (dump_enabled_p ())
5682                 dump_printf_loc (MSG_NOTE, vect_location,
5683                                  "vector/vector shift/rotate found.\n");
5684 
5685               /* Unlike the other binary operators, shifts/rotates have
5686                  the rhs being int, instead of the same type as the lhs,
5687                  so make sure the scalar is the right type if we are
5688 		 dealing with vectors of long long/long/short/char.  */
5689               if (dt[1] == vect_constant_def)
5690                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5691 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5692 						   TREE_TYPE (op1)))
5693 		{
5694 		  if (slp_node
5695 		      && TYPE_MODE (TREE_TYPE (vectype))
5696 			 != TYPE_MODE (TREE_TYPE (op1)))
5697 		    {
5698                       if (dump_enabled_p ())
5699                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5700                                          "unusable type for last operand in"
5701                                          " vector/vector shift/rotate.\n");
5702 		      return false;
5703 		    }
5704 		  if (vec_stmt && !slp_node)
5705 		    {
5706 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
5707 		      op1 = vect_init_vector (stmt_info, op1,
5708 					      TREE_TYPE (vectype), NULL);
5709 		    }
5710 		}
5711             }
5712         }
5713     }
5714 
5715   /* Supportable by target?  */
5716   if (!optab)
5717     {
5718       if (dump_enabled_p ())
5719         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720                          "no optab.\n");
5721       return false;
5722     }
5723   vec_mode = TYPE_MODE (vectype);
5724   icode = (int) optab_handler (optab, vec_mode);
5725   if (icode == CODE_FOR_nothing)
5726     {
5727       if (dump_enabled_p ())
5728         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5729                          "op not supported by target.\n");
5730       /* Check only during analysis.  */
5731       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5732 	  || (!vec_stmt
5733 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5734         return false;
5735       if (dump_enabled_p ())
5736         dump_printf_loc (MSG_NOTE, vect_location,
5737                          "proceeding using word mode.\n");
5738     }
5739 
5740   /* Worthwhile without SIMD support?  Check only during analysis.  */
5741   if (!vec_stmt
5742       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5743       && !vect_worthwhile_without_simd_p (vinfo, code))
5744     {
5745       if (dump_enabled_p ())
5746         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747                          "not worthwhile without SIMD support.\n");
5748       return false;
5749     }
5750 
5751   if (!vec_stmt) /* transformation not required.  */
5752     {
5753       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5754       DUMP_VECT_SCOPE ("vectorizable_shift");
5755       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5756       return true;
5757     }
5758 
5759   /* Transform.  */
5760 
5761   if (dump_enabled_p ())
5762     dump_printf_loc (MSG_NOTE, vect_location,
5763                      "transform binary/unary operation.\n");
5764 
5765   /* Handle def.  */
5766   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5767 
5768   prev_stmt_info = NULL;
5769   for (j = 0; j < ncopies; j++)
5770     {
5771       /* Handle uses.  */
5772       if (j == 0)
5773         {
5774           if (scalar_shift_arg)
5775             {
5776               /* Vector shl and shr insn patterns can be defined with scalar
5777                  operand 2 (shift operand).  In this case, use constant or loop
5778                  invariant op1 directly, without extending it to vector mode
5779                  first.  */
5780               optab_op2_mode = insn_data[icode].operand[2].mode;
5781               if (!VECTOR_MODE_P (optab_op2_mode))
5782                 {
5783                   if (dump_enabled_p ())
5784                     dump_printf_loc (MSG_NOTE, vect_location,
5785                                      "operand 1 using scalar mode.\n");
5786                   vec_oprnd1 = op1;
5787                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5788                   vec_oprnds1.quick_push (vec_oprnd1);
5789                   if (slp_node)
5790                     {
5791                       /* Store vec_oprnd1 for every vector stmt to be created
5792                          for SLP_NODE.  We check during the analysis that all
5793                          the shift arguments are the same.
5794                          TODO: Allow different constants for different vector
5795                          stmts generated for an SLP instance.  */
5796                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5797                         vec_oprnds1.quick_push (vec_oprnd1);
5798                     }
5799                 }
5800             }
5801 
5802           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5803              (a special case for certain kind of vector shifts); otherwise,
5804              operand 1 should be of a vector type (the usual case).  */
5805           if (vec_oprnd1)
5806 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5807 			       slp_node);
5808           else
5809 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5810 			       slp_node);
5811         }
5812       else
5813 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5814 
5815       /* Arguments are ready.  Create the new vector stmt.  */
5816       stmt_vec_info new_stmt_info = NULL;
5817       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5818         {
5819           vop1 = vec_oprnds1[i];
5820 	  gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5821           new_temp = make_ssa_name (vec_dest, new_stmt);
5822           gimple_assign_set_lhs (new_stmt, new_temp);
5823 	  new_stmt_info
5824 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5825           if (slp_node)
5826 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5827         }
5828 
5829       if (slp_node)
5830         continue;
5831 
5832       if (j == 0)
5833 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5834       else
5835 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5836       prev_stmt_info = new_stmt_info;
5837     }
5838 
5839   vec_oprnds0.release ();
5840   vec_oprnds1.release ();
5841 
5842   return true;
5843 }
5844 
5845 
5846 /* Function vectorizable_operation.
5847 
5848    Check if STMT_INFO performs a binary, unary or ternary operation that can
5849    be vectorized.
5850    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5851    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5852    Return true if STMT_INFO is vectorizable in this way.  */
5853 
5854 static bool
vectorizable_operation(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5855 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5856 			stmt_vec_info *vec_stmt, slp_tree slp_node,
5857 			stmt_vector_for_cost *cost_vec)
5858 {
5859   tree vec_dest;
5860   tree scalar_dest;
5861   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5862   tree vectype;
5863   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5864   enum tree_code code, orig_code;
5865   machine_mode vec_mode;
5866   tree new_temp;
5867   int op_type;
5868   optab optab;
5869   bool target_support_p;
5870   enum vect_def_type dt[3]
5871     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5872   int ndts = 3;
5873   stmt_vec_info prev_stmt_info;
5874   poly_uint64 nunits_in;
5875   poly_uint64 nunits_out;
5876   tree vectype_out;
5877   int ncopies;
5878   int j, i;
5879   vec<tree> vec_oprnds0 = vNULL;
5880   vec<tree> vec_oprnds1 = vNULL;
5881   vec<tree> vec_oprnds2 = vNULL;
5882   tree vop0, vop1, vop2;
5883   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5884   vec_info *vinfo = stmt_info->vinfo;
5885 
5886   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5887     return false;
5888 
5889   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5890       && ! vec_stmt)
5891     return false;
5892 
5893   /* Is STMT a vectorizable binary/unary operation?   */
5894   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5895   if (!stmt)
5896     return false;
5897 
5898   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5899     return false;
5900 
5901   orig_code = code = gimple_assign_rhs_code (stmt);
5902 
5903   /* For pointer addition and subtraction, we should use the normal
5904      plus and minus for the vector operation.  */
5905   if (code == POINTER_PLUS_EXPR)
5906     code = PLUS_EXPR;
5907   if (code == POINTER_DIFF_EXPR)
5908     code = MINUS_EXPR;
5909 
5910   /* Support only unary or binary operations.  */
5911   op_type = TREE_CODE_LENGTH (code);
5912   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5913     {
5914       if (dump_enabled_p ())
5915         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5916                          "num. args = %d (not unary/binary/ternary op).\n",
5917                          op_type);
5918       return false;
5919     }
5920 
5921   scalar_dest = gimple_assign_lhs (stmt);
5922   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5923 
5924   /* Most operations cannot handle bit-precision types without extra
5925      truncations.  */
5926   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5927       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5928       /* Exception are bitwise binary operations.  */
5929       && code != BIT_IOR_EXPR
5930       && code != BIT_XOR_EXPR
5931       && code != BIT_AND_EXPR)
5932     {
5933       if (dump_enabled_p ())
5934         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5935                          "bit-precision arithmetic not supported.\n");
5936       return false;
5937     }
5938 
5939   op0 = gimple_assign_rhs1 (stmt);
5940   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5941     {
5942       if (dump_enabled_p ())
5943         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5944                          "use not simple.\n");
5945       return false;
5946     }
5947   /* If op0 is an external or constant def use a vector type with
5948      the same size as the output vector type.  */
5949   if (!vectype)
5950     {
5951       /* For boolean type we cannot determine vectype by
5952 	 invariant value (don't know whether it is a vector
5953 	 of booleans or vector of integers).  We use output
5954 	 vectype because operations on boolean don't change
5955 	 type.  */
5956       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5957 	{
5958 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5959 	    {
5960 	      if (dump_enabled_p ())
5961 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5962 				 "not supported operation on bool value.\n");
5963 	      return false;
5964 	    }
5965 	  vectype = vectype_out;
5966 	}
5967       else
5968 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5969     }
5970   if (vec_stmt)
5971     gcc_assert (vectype);
5972   if (!vectype)
5973     {
5974       if (dump_enabled_p ())
5975 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5976 			 "no vectype for scalar type %T\n",
5977 			 TREE_TYPE (op0));
5978 
5979       return false;
5980     }
5981 
5982   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5983   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5984   if (maybe_ne (nunits_out, nunits_in))
5985     return false;
5986 
5987   if (op_type == binary_op || op_type == ternary_op)
5988     {
5989       op1 = gimple_assign_rhs2 (stmt);
5990       if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5991 	{
5992 	  if (dump_enabled_p ())
5993 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5994                              "use not simple.\n");
5995 	  return false;
5996 	}
5997     }
5998   if (op_type == ternary_op)
5999     {
6000       op2 = gimple_assign_rhs3 (stmt);
6001       if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6002 	{
6003 	  if (dump_enabled_p ())
6004 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6005                              "use not simple.\n");
6006 	  return false;
6007 	}
6008     }
6009 
6010   /* Multiple types in SLP are handled by creating the appropriate number of
6011      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6012      case of SLP.  */
6013   if (slp_node)
6014     ncopies = 1;
6015   else
6016     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6017 
6018   gcc_assert (ncopies >= 1);
6019 
6020   /* Shifts are handled in vectorizable_shift ().  */
6021   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6022       || code == RROTATE_EXPR)
6023    return false;
6024 
6025   /* Supportable by target?  */
6026 
6027   vec_mode = TYPE_MODE (vectype);
6028   if (code == MULT_HIGHPART_EXPR)
6029     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6030   else
6031     {
6032       optab = optab_for_tree_code (code, vectype, optab_default);
6033       if (!optab)
6034 	{
6035           if (dump_enabled_p ())
6036             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037                              "no optab.\n");
6038 	  return false;
6039 	}
6040       target_support_p = (optab_handler (optab, vec_mode)
6041 			  != CODE_FOR_nothing);
6042     }
6043 
6044   if (!target_support_p)
6045     {
6046       if (dump_enabled_p ())
6047 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6048                          "op not supported by target.\n");
6049       /* Check only during analysis.  */
6050       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6051 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6052         return false;
6053       if (dump_enabled_p ())
6054 	dump_printf_loc (MSG_NOTE, vect_location,
6055                          "proceeding using word mode.\n");
6056     }
6057 
6058   /* Worthwhile without SIMD support?  Check only during analysis.  */
6059   if (!VECTOR_MODE_P (vec_mode)
6060       && !vec_stmt
6061       && !vect_worthwhile_without_simd_p (vinfo, code))
6062     {
6063       if (dump_enabled_p ())
6064         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6065                          "not worthwhile without SIMD support.\n");
6066       return false;
6067     }
6068 
6069   if (!vec_stmt) /* transformation not required.  */
6070     {
6071       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6072       DUMP_VECT_SCOPE ("vectorizable_operation");
6073       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6074       return true;
6075     }
6076 
6077   /* Transform.  */
6078 
6079   if (dump_enabled_p ())
6080     dump_printf_loc (MSG_NOTE, vect_location,
6081                      "transform binary/unary operation.\n");
6082 
6083   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6084      vectors with unsigned elements, but the result is signed.  So, we
6085      need to compute the MINUS_EXPR into vectype temporary and
6086      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6087   tree vec_cvt_dest = NULL_TREE;
6088   if (orig_code == POINTER_DIFF_EXPR)
6089     {
6090       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6091       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6092     }
6093   /* Handle def.  */
6094   else
6095     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6096 
6097   /* In case the vectorization factor (VF) is bigger than the number
6098      of elements that we can fit in a vectype (nunits), we have to generate
6099      more than one vector stmt - i.e - we need to "unroll" the
6100      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6101      from one copy of the vector stmt to the next, in the field
6102      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6103      stages to find the correct vector defs to be used when vectorizing
6104      stmts that use the defs of the current stmt.  The example below
6105      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6106      we need to create 4 vectorized stmts):
6107 
6108      before vectorization:
6109                                 RELATED_STMT    VEC_STMT
6110         S1:     x = memref      -               -
6111         S2:     z = x + 1       -               -
6112 
6113      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6114              there):
6115                                 RELATED_STMT    VEC_STMT
6116         VS1_0:  vx0 = memref0   VS1_1           -
6117         VS1_1:  vx1 = memref1   VS1_2           -
6118         VS1_2:  vx2 = memref2   VS1_3           -
6119         VS1_3:  vx3 = memref3   -               -
6120         S1:     x = load        -               VS1_0
6121         S2:     z = x + 1       -               -
6122 
6123      step2: vectorize stmt S2 (done here):
6124         To vectorize stmt S2 we first need to find the relevant vector
6125         def for the first operand 'x'.  This is, as usual, obtained from
6126         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6127         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6128         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6129         the vector stmt VS2_0, and as usual, record it in the
6130         STMT_VINFO_VEC_STMT of stmt S2.
6131         When creating the second copy (VS2_1), we obtain the relevant vector
6132         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6133         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6134         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6135         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6136         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6137         chain of stmts and pointers:
6138                                 RELATED_STMT    VEC_STMT
6139         VS1_0:  vx0 = memref0   VS1_1           -
6140         VS1_1:  vx1 = memref1   VS1_2           -
6141         VS1_2:  vx2 = memref2   VS1_3           -
6142         VS1_3:  vx3 = memref3   -               -
6143         S1:     x = load        -               VS1_0
6144         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6145         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6146         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6147         VS2_3:  vz3 = vx3 + v1  -               -
6148         S2:     z = x + 1       -               VS2_0  */
6149 
6150   prev_stmt_info = NULL;
6151   for (j = 0; j < ncopies; j++)
6152     {
6153       /* Handle uses.  */
6154       if (j == 0)
6155 	{
6156 	  if (op_type == binary_op)
6157 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6158 			       slp_node);
6159 	  else if (op_type == ternary_op)
6160 	    {
6161 	      if (slp_node)
6162 		{
6163 		  auto_vec<tree> ops(3);
6164 		  ops.quick_push (op0);
6165 		  ops.quick_push (op1);
6166 		  ops.quick_push (op2);
6167 		  auto_vec<vec<tree> > vec_defs(3);
6168 		  vect_get_slp_defs (ops, slp_node, &vec_defs);
6169 		  vec_oprnds0 = vec_defs[0];
6170 		  vec_oprnds1 = vec_defs[1];
6171 		  vec_oprnds2 = vec_defs[2];
6172 		}
6173 	      else
6174 		{
6175 		  vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6176 				     &vec_oprnds1, NULL);
6177 		  vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6178 				     NULL, NULL);
6179 		}
6180 	    }
6181 	  else
6182 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6183 			       slp_node);
6184 	}
6185       else
6186 	{
6187 	  vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6188 	  if (op_type == ternary_op)
6189 	    {
6190 	      tree vec_oprnd = vec_oprnds2.pop ();
6191 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6192 							           vec_oprnd));
6193 	    }
6194 	}
6195 
6196       /* Arguments are ready.  Create the new vector stmt.  */
6197       stmt_vec_info new_stmt_info = NULL;
6198       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6199         {
6200 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
6201 		  ? vec_oprnds1[i] : NULL_TREE);
6202 	  vop2 = ((op_type == ternary_op)
6203 		  ? vec_oprnds2[i] : NULL_TREE);
6204 	  gassign *new_stmt = gimple_build_assign (vec_dest, code,
6205 						   vop0, vop1, vop2);
6206 	  new_temp = make_ssa_name (vec_dest, new_stmt);
6207 	  gimple_assign_set_lhs (new_stmt, new_temp);
6208 	  new_stmt_info
6209 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6210 	  if (vec_cvt_dest)
6211 	    {
6212 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6213 	      gassign *new_stmt
6214 		= gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6215 				       new_temp);
6216 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6217 	      gimple_assign_set_lhs (new_stmt, new_temp);
6218 	      new_stmt_info
6219 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6220 	    }
6221           if (slp_node)
6222 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6223         }
6224 
6225       if (slp_node)
6226         continue;
6227 
6228       if (j == 0)
6229 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6230       else
6231 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6232       prev_stmt_info = new_stmt_info;
6233     }
6234 
6235   vec_oprnds0.release ();
6236   vec_oprnds1.release ();
6237   vec_oprnds2.release ();
6238 
6239   return true;
6240 }
6241 
6242 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6243 
6244 static void
ensure_base_align(dr_vec_info * dr_info)6245 ensure_base_align (dr_vec_info *dr_info)
6246 {
6247   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6248     return;
6249 
6250   if (dr_info->base_misaligned)
6251     {
6252       tree base_decl = dr_info->base_decl;
6253 
6254       // We should only be able to increase the alignment of a base object if
6255       // we know what its new alignment should be at compile time.
6256       unsigned HOST_WIDE_INT align_base_to =
6257 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6258 
6259       if (decl_in_symtab_p (base_decl))
6260 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6261       else
6262 	{
6263 	  SET_DECL_ALIGN (base_decl, align_base_to);
6264           DECL_USER_ALIGN (base_decl) = 1;
6265 	}
6266       dr_info->base_misaligned = false;
6267     }
6268 }
6269 
6270 
6271 /* Function get_group_alias_ptr_type.
6272 
6273    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6274 
6275 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6276 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6277 {
6278   struct data_reference *first_dr, *next_dr;
6279 
6280   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6281   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6282   while (next_stmt_info)
6283     {
6284       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6285       if (get_alias_set (DR_REF (first_dr))
6286 	  != get_alias_set (DR_REF (next_dr)))
6287 	{
6288 	  if (dump_enabled_p ())
6289 	    dump_printf_loc (MSG_NOTE, vect_location,
6290 			     "conflicting alias set types.\n");
6291 	  return ptr_type_node;
6292 	}
6293       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6294     }
6295   return reference_alias_ptr_type (DR_REF (first_dr));
6296 }
6297 
6298 
6299 /* Function vectorizable_store.
6300 
6301    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6302    that can be vectorized.
6303    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6304    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6305    Return true if STMT_INFO is vectorizable in this way.  */
6306 
6307 static bool
vectorizable_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)6308 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6309 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
6310 		    stmt_vector_for_cost *cost_vec)
6311 {
6312   tree data_ref;
6313   tree op;
6314   tree vec_oprnd = NULL_TREE;
6315   tree elem_type;
6316   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6317   struct loop *loop = NULL;
6318   machine_mode vec_mode;
6319   tree dummy;
6320   enum dr_alignment_support alignment_support_scheme;
6321   enum vect_def_type rhs_dt = vect_unknown_def_type;
6322   enum vect_def_type mask_dt = vect_unknown_def_type;
6323   stmt_vec_info prev_stmt_info = NULL;
6324   tree dataref_ptr = NULL_TREE;
6325   tree dataref_offset = NULL_TREE;
6326   gimple *ptr_incr = NULL;
6327   int ncopies;
6328   int j;
6329   stmt_vec_info first_stmt_info;
6330   bool grouped_store;
6331   unsigned int group_size, i;
6332   vec<tree> oprnds = vNULL;
6333   vec<tree> result_chain = vNULL;
6334   tree offset = NULL_TREE;
6335   vec<tree> vec_oprnds = vNULL;
6336   bool slp = (slp_node != NULL);
6337   unsigned int vec_num;
6338   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6339   vec_info *vinfo = stmt_info->vinfo;
6340   tree aggr_type;
6341   gather_scatter_info gs_info;
6342   poly_uint64 vf;
6343   vec_load_store_type vls_type;
6344   tree ref_type;
6345 
6346   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6347     return false;
6348 
6349   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6350       && ! vec_stmt)
6351     return false;
6352 
6353   /* Is vectorizable store? */
6354 
6355   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6356   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6357     {
6358       tree scalar_dest = gimple_assign_lhs (assign);
6359       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6360 	  && is_pattern_stmt_p (stmt_info))
6361 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
6362       if (TREE_CODE (scalar_dest) != ARRAY_REF
6363 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6364 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
6365 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
6366 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6367 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
6368 	  && TREE_CODE (scalar_dest) != MEM_REF)
6369 	return false;
6370     }
6371   else
6372     {
6373       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6374       if (!call || !gimple_call_internal_p (call))
6375 	return false;
6376 
6377       internal_fn ifn = gimple_call_internal_fn (call);
6378       if (!internal_store_fn_p (ifn))
6379 	return false;
6380 
6381       if (slp_node != NULL)
6382 	{
6383 	  if (dump_enabled_p ())
6384 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6385 			     "SLP of masked stores not supported.\n");
6386 	  return false;
6387 	}
6388 
6389       int mask_index = internal_fn_mask_index (ifn);
6390       if (mask_index >= 0)
6391 	{
6392 	  mask = gimple_call_arg (call, mask_index);
6393 	  if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6394 					   &mask_vectype))
6395 	    return false;
6396 	}
6397     }
6398 
6399   op = vect_get_store_rhs (stmt_info);
6400 
6401   /* Cannot have hybrid store SLP -- that would mean storing to the
6402      same location twice.  */
6403   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6404 
6405   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6406   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6407 
6408   if (loop_vinfo)
6409     {
6410       loop = LOOP_VINFO_LOOP (loop_vinfo);
6411       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6412     }
6413   else
6414     vf = 1;
6415 
6416   /* Multiple types in SLP are handled by creating the appropriate number of
6417      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6418      case of SLP.  */
6419   if (slp)
6420     ncopies = 1;
6421   else
6422     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6423 
6424   gcc_assert (ncopies >= 1);
6425 
6426   /* FORNOW.  This restriction should be relaxed.  */
6427   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6428     {
6429       if (dump_enabled_p ())
6430 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6431 			 "multiple types in nested loop.\n");
6432       return false;
6433     }
6434 
6435   if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6436     return false;
6437 
6438   elem_type = TREE_TYPE (vectype);
6439   vec_mode = TYPE_MODE (vectype);
6440 
6441   if (!STMT_VINFO_DATA_REF (stmt_info))
6442     return false;
6443 
6444   vect_memory_access_type memory_access_type;
6445   if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6446 			    &memory_access_type, &gs_info))
6447     return false;
6448 
6449   if (mask)
6450     {
6451       if (memory_access_type == VMAT_CONTIGUOUS)
6452 	{
6453 	  if (!VECTOR_MODE_P (vec_mode)
6454 	      || !can_vec_mask_load_store_p (vec_mode,
6455 					     TYPE_MODE (mask_vectype), false))
6456 	    return false;
6457 	}
6458       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6459 	       && (memory_access_type != VMAT_GATHER_SCATTER
6460 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6461 	{
6462 	  if (dump_enabled_p ())
6463 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6464 			     "unsupported access type for masked store.\n");
6465 	  return false;
6466 	}
6467     }
6468   else
6469     {
6470       /* FORNOW. In some cases can vectorize even if data-type not supported
6471 	 (e.g. - array initialization with 0).  */
6472       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6473 	return false;
6474     }
6475 
6476   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6477   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6478 		   && memory_access_type != VMAT_GATHER_SCATTER
6479 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
6480   if (grouped_store)
6481     {
6482       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6483       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6484       group_size = DR_GROUP_SIZE (first_stmt_info);
6485     }
6486   else
6487     {
6488       first_stmt_info = stmt_info;
6489       first_dr_info = dr_info;
6490       group_size = vec_num = 1;
6491     }
6492 
6493   if (!vec_stmt) /* transformation not required.  */
6494     {
6495       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6496 
6497       if (loop_vinfo
6498 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6499 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6500 				  memory_access_type, &gs_info);
6501 
6502       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6503       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6504 			     vls_type, slp_node, cost_vec);
6505       return true;
6506     }
6507   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6508 
6509   /* Transform.  */
6510 
6511   ensure_base_align (dr_info);
6512 
6513   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6514     {
6515       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6516       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6517       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6518       tree ptr, var, scale, vec_mask;
6519       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6520       tree mask_halfvectype = mask_vectype;
6521       edge pe = loop_preheader_edge (loop);
6522       gimple_seq seq;
6523       basic_block new_bb;
6524       enum { NARROW, NONE, WIDEN } modifier;
6525       poly_uint64 scatter_off_nunits
6526 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6527 
6528       if (known_eq (nunits, scatter_off_nunits))
6529 	modifier = NONE;
6530       else if (known_eq (nunits * 2, scatter_off_nunits))
6531 	{
6532 	  modifier = WIDEN;
6533 
6534 	  /* Currently gathers and scatters are only supported for
6535 	     fixed-length vectors.  */
6536 	  unsigned int count = scatter_off_nunits.to_constant ();
6537 	  vec_perm_builder sel (count, count, 1);
6538 	  for (i = 0; i < (unsigned int) count; ++i)
6539 	    sel.quick_push (i | (count / 2));
6540 
6541 	  vec_perm_indices indices (sel, 1, count);
6542 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6543 						  indices);
6544 	  gcc_assert (perm_mask != NULL_TREE);
6545 	}
6546       else if (known_eq (nunits, scatter_off_nunits * 2))
6547 	{
6548 	  modifier = NARROW;
6549 
6550 	  /* Currently gathers and scatters are only supported for
6551 	     fixed-length vectors.  */
6552 	  unsigned int count = nunits.to_constant ();
6553 	  vec_perm_builder sel (count, count, 1);
6554 	  for (i = 0; i < (unsigned int) count; ++i)
6555 	    sel.quick_push (i | (count / 2));
6556 
6557 	  vec_perm_indices indices (sel, 2, count);
6558 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6559 	  gcc_assert (perm_mask != NULL_TREE);
6560 	  ncopies *= 2;
6561 
6562 	  if (mask)
6563 	    mask_halfvectype
6564 	      = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6565 	}
6566       else
6567 	gcc_unreachable ();
6568 
6569       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6570       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6571       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6572       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6573       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6574       scaletype = TREE_VALUE (arglist);
6575 
6576       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6577 			   && TREE_CODE (rettype) == VOID_TYPE);
6578 
6579       ptr = fold_convert (ptrtype, gs_info.base);
6580       if (!is_gimple_min_invariant (ptr))
6581 	{
6582 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6583 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6584 	  gcc_assert (!new_bb);
6585 	}
6586 
6587       if (mask == NULL_TREE)
6588 	{
6589 	  mask_arg = build_int_cst (masktype, -1);
6590 	  mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6591 	}
6592 
6593       scale = build_int_cst (scaletype, gs_info.scale);
6594 
6595       prev_stmt_info = NULL;
6596       for (j = 0; j < ncopies; ++j)
6597 	{
6598 	  if (j == 0)
6599 	    {
6600 	      src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6601 	      op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6602 							      stmt_info);
6603 	      if (mask)
6604 		mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6605 								   stmt_info);
6606 	    }
6607 	  else if (modifier != NONE && (j & 1))
6608 	    {
6609 	      if (modifier == WIDEN)
6610 		{
6611 		  src
6612 		    = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6613 								   vec_oprnd1);
6614 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6615 					     stmt_info, gsi);
6616 		  if (mask)
6617 		    mask_op
6618 		      = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6619 								   vec_mask);
6620 		}
6621 	      else if (modifier == NARROW)
6622 		{
6623 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6624 					      stmt_info, gsi);
6625 		  op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6626 								    vec_oprnd0);
6627 		}
6628 	      else
6629 		gcc_unreachable ();
6630 	    }
6631 	  else
6632 	    {
6633 	      src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6634 								 vec_oprnd1);
6635 	      op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6636 								vec_oprnd0);
6637 	      if (mask)
6638 		mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6639 								     vec_mask);
6640 	    }
6641 
6642 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6643 	    {
6644 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6645 				    TYPE_VECTOR_SUBPARTS (srctype)));
6646 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
6647 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6648 	      gassign *new_stmt
6649 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6650 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6651 	      src = var;
6652 	    }
6653 
6654 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6655 	    {
6656 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6657 				    TYPE_VECTOR_SUBPARTS (idxtype)));
6658 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6659 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6660 	      gassign *new_stmt
6661 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6662 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6663 	      op = var;
6664 	    }
6665 
6666 	  if (mask)
6667 	    {
6668 	      tree utype;
6669 	      mask_arg = mask_op;
6670 	      if (modifier == NARROW)
6671 		{
6672 		  var = vect_get_new_ssa_name (mask_halfvectype,
6673 					       vect_simple_var);
6674 		  gassign *new_stmt
6675 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6676 							: VEC_UNPACK_LO_EXPR,
6677 					   mask_op);
6678 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6679 		  mask_arg = var;
6680 		}
6681 	      tree optype = TREE_TYPE (mask_arg);
6682 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6683 		utype = masktype;
6684 	      else
6685 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6686 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
6687 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6688 	      gassign *new_stmt
6689 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6690 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6691 	      mask_arg = var;
6692 	      if (!useless_type_conversion_p (masktype, utype))
6693 		{
6694 		  gcc_assert (TYPE_PRECISION (utype)
6695 			      <= TYPE_PRECISION (masktype));
6696 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6697 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6698 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6699 		  mask_arg = var;
6700 		}
6701 	    }
6702 
6703 	  gcall *new_stmt
6704 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6705 	  stmt_vec_info new_stmt_info
6706 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6707 
6708 	  if (prev_stmt_info == NULL)
6709 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6710 	  else
6711 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6712 	  prev_stmt_info = new_stmt_info;
6713 	}
6714       return true;
6715     }
6716 
6717   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6718     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6719 
6720   if (grouped_store)
6721     {
6722       /* FORNOW */
6723       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6724 
6725       /* We vectorize all the stmts of the interleaving group when we
6726 	 reach the last stmt in the group.  */
6727       if (DR_GROUP_STORE_COUNT (first_stmt_info)
6728 	  < DR_GROUP_SIZE (first_stmt_info)
6729 	  && !slp)
6730 	{
6731 	  *vec_stmt = NULL;
6732 	  return true;
6733 	}
6734 
6735       if (slp)
6736         {
6737           grouped_store = false;
6738           /* VEC_NUM is the number of vect stmts to be created for this
6739              group.  */
6740           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6741 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6742 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6743 		      == first_stmt_info);
6744 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6745 	  op = vect_get_store_rhs (first_stmt_info);
6746         }
6747       else
6748         /* VEC_NUM is the number of vect stmts to be created for this
6749            group.  */
6750 	vec_num = group_size;
6751 
6752       ref_type = get_group_alias_ptr_type (first_stmt_info);
6753     }
6754   else
6755     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6756 
6757   if (dump_enabled_p ())
6758     dump_printf_loc (MSG_NOTE, vect_location,
6759                      "transform store. ncopies = %d\n", ncopies);
6760 
6761   if (memory_access_type == VMAT_ELEMENTWISE
6762       || memory_access_type == VMAT_STRIDED_SLP)
6763     {
6764       gimple_stmt_iterator incr_gsi;
6765       bool insert_after;
6766       gimple *incr;
6767       tree offvar;
6768       tree ivstep;
6769       tree running_off;
6770       tree stride_base, stride_step, alias_off;
6771       tree vec_oprnd;
6772       unsigned int g;
6773       /* Checked by get_load_store_type.  */
6774       unsigned int const_nunits = nunits.to_constant ();
6775 
6776       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6777       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6778 
6779       stride_base
6780 	= fold_build_pointer_plus
6781 	    (DR_BASE_ADDRESS (first_dr_info->dr),
6782 	     size_binop (PLUS_EXPR,
6783 			 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6784 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6785       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6786 
6787       /* For a store with loop-invariant (but other than power-of-2)
6788          stride (i.e. not a grouped access) like so:
6789 
6790 	   for (i = 0; i < n; i += stride)
6791 	     array[i] = ...;
6792 
6793 	 we generate a new induction variable and new stores from
6794 	 the components of the (vectorized) rhs:
6795 
6796 	   for (j = 0; ; j += VF*stride)
6797 	     vectemp = ...;
6798 	     tmp1 = vectemp[0];
6799 	     array[j] = tmp1;
6800 	     tmp2 = vectemp[1];
6801 	     array[j + stride] = tmp2;
6802 	     ...
6803          */
6804 
6805       unsigned nstores = const_nunits;
6806       unsigned lnel = 1;
6807       tree ltype = elem_type;
6808       tree lvectype = vectype;
6809       if (slp)
6810 	{
6811 	  if (group_size < const_nunits
6812 	      && const_nunits % group_size == 0)
6813 	    {
6814 	      nstores = const_nunits / group_size;
6815 	      lnel = group_size;
6816 	      ltype = build_vector_type (elem_type, group_size);
6817 	      lvectype = vectype;
6818 
6819 	      /* First check if vec_extract optab doesn't support extraction
6820 		 of vector elts directly.  */
6821 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6822 	      machine_mode vmode;
6823 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
6824 		  || !VECTOR_MODE_P (vmode)
6825 		  || !targetm.vector_mode_supported_p (vmode)
6826 		  || (convert_optab_handler (vec_extract_optab,
6827 					     TYPE_MODE (vectype), vmode)
6828 		      == CODE_FOR_nothing))
6829 		{
6830 		  /* Try to avoid emitting an extract of vector elements
6831 		     by performing the extracts using an integer type of the
6832 		     same size, extracting from a vector of those and then
6833 		     re-interpreting it as the original vector type if
6834 		     supported.  */
6835 		  unsigned lsize
6836 		    = group_size * GET_MODE_BITSIZE (elmode);
6837 		  unsigned int lnunits = const_nunits / group_size;
6838 		  /* If we can't construct such a vector fall back to
6839 		     element extracts from the original vector type and
6840 		     element size stores.  */
6841 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
6842 		      && mode_for_vector (elmode, lnunits).exists (&vmode)
6843 		      && VECTOR_MODE_P (vmode)
6844 		      && targetm.vector_mode_supported_p (vmode)
6845 		      && (convert_optab_handler (vec_extract_optab,
6846 						 vmode, elmode)
6847 			  != CODE_FOR_nothing))
6848 		    {
6849 		      nstores = lnunits;
6850 		      lnel = group_size;
6851 		      ltype = build_nonstandard_integer_type (lsize, 1);
6852 		      lvectype = build_vector_type (ltype, nstores);
6853 		    }
6854 		  /* Else fall back to vector extraction anyway.
6855 		     Fewer stores are more important than avoiding spilling
6856 		     of the vector we extract from.  Compared to the
6857 		     construction case in vectorizable_load no store-forwarding
6858 		     issue exists here for reasonable archs.  */
6859 		}
6860 	    }
6861 	  else if (group_size >= const_nunits
6862 		   && group_size % const_nunits == 0)
6863 	    {
6864 	      nstores = 1;
6865 	      lnel = const_nunits;
6866 	      ltype = vectype;
6867 	      lvectype = vectype;
6868 	    }
6869 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6870 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6871 	}
6872 
6873       ivstep = stride_step;
6874       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6875 			    build_int_cst (TREE_TYPE (ivstep), vf));
6876 
6877       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6878 
6879       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6880       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6881       create_iv (stride_base, ivstep, NULL,
6882 		 loop, &incr_gsi, insert_after,
6883 		 &offvar, NULL);
6884       incr = gsi_stmt (incr_gsi);
6885       loop_vinfo->add_stmt (incr);
6886 
6887       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6888 
6889       prev_stmt_info = NULL;
6890       alias_off = build_int_cst (ref_type, 0);
6891       stmt_vec_info next_stmt_info = first_stmt_info;
6892       for (g = 0; g < group_size; g++)
6893 	{
6894 	  running_off = offvar;
6895 	  if (g)
6896 	    {
6897 	      tree size = TYPE_SIZE_UNIT (ltype);
6898 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6899 				      size);
6900 	      tree newoff = copy_ssa_name (running_off, NULL);
6901 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6902 					  running_off, pos);
6903 	      vect_finish_stmt_generation (stmt_info, incr, gsi);
6904 	      running_off = newoff;
6905 	    }
6906 	  unsigned int group_el = 0;
6907 	  unsigned HOST_WIDE_INT
6908 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6909 	  for (j = 0; j < ncopies; j++)
6910 	    {
6911 	      /* We've set op and dt above, from vect_get_store_rhs,
6912 		 and first_stmt_info == stmt_info.  */
6913 	      if (j == 0)
6914 		{
6915 		  if (slp)
6916 		    {
6917 		      vect_get_vec_defs (op, NULL_TREE, stmt_info,
6918 					 &vec_oprnds, NULL, slp_node);
6919 		      vec_oprnd = vec_oprnds[0];
6920 		    }
6921 		  else
6922 		    {
6923 		      op = vect_get_store_rhs (next_stmt_info);
6924 		      vec_oprnd = vect_get_vec_def_for_operand
6925 			(op, next_stmt_info);
6926 		    }
6927 		}
6928 	      else
6929 		{
6930 		  if (slp)
6931 		    vec_oprnd = vec_oprnds[j];
6932 		  else
6933 		    vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6934 								vec_oprnd);
6935 		}
6936 	      /* Pun the vector to extract from if necessary.  */
6937 	      if (lvectype != vectype)
6938 		{
6939 		  tree tem = make_ssa_name (lvectype);
6940 		  gimple *pun
6941 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6942 							lvectype, vec_oprnd));
6943 		  vect_finish_stmt_generation (stmt_info, pun, gsi);
6944 		  vec_oprnd = tem;
6945 		}
6946 	      for (i = 0; i < nstores; i++)
6947 		{
6948 		  tree newref, newoff;
6949 		  gimple *incr, *assign;
6950 		  tree size = TYPE_SIZE (ltype);
6951 		  /* Extract the i'th component.  */
6952 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6953 					  bitsize_int (i), size);
6954 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6955 					   size, pos);
6956 
6957 		  elem = force_gimple_operand_gsi (gsi, elem, true,
6958 						   NULL_TREE, true,
6959 						   GSI_SAME_STMT);
6960 
6961 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
6962 						 group_el * elsz);
6963 		  newref = build2 (MEM_REF, ltype,
6964 				   running_off, this_off);
6965 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6966 
6967 		  /* And store it to *running_off.  */
6968 		  assign = gimple_build_assign (newref, elem);
6969 		  stmt_vec_info assign_info
6970 		    = vect_finish_stmt_generation (stmt_info, assign, gsi);
6971 
6972 		  group_el += lnel;
6973 		  if (! slp
6974 		      || group_el == group_size)
6975 		    {
6976 		      newoff = copy_ssa_name (running_off, NULL);
6977 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6978 						  running_off, stride_step);
6979 		      vect_finish_stmt_generation (stmt_info, incr, gsi);
6980 
6981 		      running_off = newoff;
6982 		      group_el = 0;
6983 		    }
6984 		  if (g == group_size - 1
6985 		      && !slp)
6986 		    {
6987 		      if (j == 0 && i == 0)
6988 			STMT_VINFO_VEC_STMT (stmt_info)
6989 			    = *vec_stmt = assign_info;
6990 		      else
6991 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6992 		      prev_stmt_info = assign_info;
6993 		    }
6994 		}
6995 	    }
6996 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6997 	  if (slp)
6998 	    break;
6999 	}
7000 
7001       vec_oprnds.release ();
7002       return true;
7003     }
7004 
7005   auto_vec<tree> dr_chain (group_size);
7006   oprnds.create (group_size);
7007 
7008   alignment_support_scheme
7009     = vect_supportable_dr_alignment (first_dr_info, false);
7010   gcc_assert (alignment_support_scheme);
7011   vec_loop_masks *loop_masks
7012     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7013        ? &LOOP_VINFO_MASKS (loop_vinfo)
7014        : NULL);
7015   /* Targets with store-lane instructions must not require explicit
7016      realignment.  vect_supportable_dr_alignment always returns either
7017      dr_aligned or dr_unaligned_supported for masked operations.  */
7018   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7019 	       && !mask
7020 	       && !loop_masks)
7021 	      || alignment_support_scheme == dr_aligned
7022 	      || alignment_support_scheme == dr_unaligned_supported);
7023 
7024   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7025       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7026     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7027 
7028   tree bump;
7029   tree vec_offset = NULL_TREE;
7030   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7031     {
7032       aggr_type = NULL_TREE;
7033       bump = NULL_TREE;
7034     }
7035   else if (memory_access_type == VMAT_GATHER_SCATTER)
7036     {
7037       aggr_type = elem_type;
7038       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7039 				       &bump, &vec_offset);
7040     }
7041   else
7042     {
7043       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7044 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7045       else
7046 	aggr_type = vectype;
7047       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
7048 					  memory_access_type);
7049     }
7050 
7051   if (mask)
7052     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7053 
7054   /* In case the vectorization factor (VF) is bigger than the number
7055      of elements that we can fit in a vectype (nunits), we have to generate
7056      more than one vector stmt - i.e - we need to "unroll" the
7057      vector stmt by a factor VF/nunits.  For more details see documentation in
7058      vect_get_vec_def_for_copy_stmt.  */
7059 
7060   /* In case of interleaving (non-unit grouped access):
7061 
7062         S1:  &base + 2 = x2
7063         S2:  &base = x0
7064         S3:  &base + 1 = x1
7065         S4:  &base + 3 = x3
7066 
7067      We create vectorized stores starting from base address (the access of the
7068      first stmt in the chain (S2 in the above example), when the last store stmt
7069      of the chain (S4) is reached:
7070 
7071         VS1: &base = vx2
7072 	VS2: &base + vec_size*1 = vx0
7073 	VS3: &base + vec_size*2 = vx1
7074 	VS4: &base + vec_size*3 = vx3
7075 
7076      Then permutation statements are generated:
7077 
7078 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7079 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7080 	...
7081 
7082      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7083      (the order of the data-refs in the output of vect_permute_store_chain
7084      corresponds to the order of scalar stmts in the interleaving chain - see
7085      the documentation of vect_permute_store_chain()).
7086 
7087      In case of both multiple types and interleaving, above vector stores and
7088      permutation stmts are created for every copy.  The result vector stmts are
7089      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7090      STMT_VINFO_RELATED_STMT for the next copies.
7091   */
7092 
7093   prev_stmt_info = NULL;
7094   tree vec_mask = NULL_TREE;
7095   for (j = 0; j < ncopies; j++)
7096     {
7097       stmt_vec_info new_stmt_info;
7098       if (j == 0)
7099 	{
7100           if (slp)
7101             {
7102 	      /* Get vectorized arguments for SLP_NODE.  */
7103 	      vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7104 				 NULL, slp_node);
7105 
7106               vec_oprnd = vec_oprnds[0];
7107             }
7108           else
7109             {
7110 	      /* For interleaved stores we collect vectorized defs for all the
7111 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7112 		 used as an input to vect_permute_store_chain(), and OPRNDS as
7113 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7114 
7115 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7116 		 OPRNDS are of size 1.  */
7117 	      stmt_vec_info next_stmt_info = first_stmt_info;
7118 	      for (i = 0; i < group_size; i++)
7119 		{
7120 		  /* Since gaps are not supported for interleaved stores,
7121 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
7122 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
7123 		     that there is no interleaving, DR_GROUP_SIZE is 1,
7124 		     and only one iteration of the loop will be executed.  */
7125 		  op = vect_get_store_rhs (next_stmt_info);
7126 		  vec_oprnd = vect_get_vec_def_for_operand
7127 		    (op, next_stmt_info);
7128 		  dr_chain.quick_push (vec_oprnd);
7129 		  oprnds.quick_push (vec_oprnd);
7130 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7131 		}
7132 	      if (mask)
7133 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7134 							 mask_vectype);
7135 	    }
7136 
7137 	  /* We should have catched mismatched types earlier.  */
7138 	  gcc_assert (useless_type_conversion_p (vectype,
7139 						 TREE_TYPE (vec_oprnd)));
7140 	  bool simd_lane_access_p
7141 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7142 	  if (simd_lane_access_p
7143 	      && !loop_masks
7144 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7145 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7146 	      && integer_zerop (DR_OFFSET (first_dr_info->dr))
7147 	      && integer_zerop (DR_INIT (first_dr_info->dr))
7148 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
7149 					get_alias_set (TREE_TYPE (ref_type))))
7150 	    {
7151 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7152 	      dataref_offset = build_int_cst (ref_type, 0);
7153 	    }
7154 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7155 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7156 					 &dataref_ptr, &vec_offset);
7157 	  else
7158 	    dataref_ptr
7159 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7160 					  simd_lane_access_p ? loop : NULL,
7161 					  offset, &dummy, gsi, &ptr_incr,
7162 					  simd_lane_access_p, NULL_TREE, bump);
7163 	}
7164       else
7165 	{
7166 	  /* For interleaved stores we created vectorized defs for all the
7167 	     defs stored in OPRNDS in the previous iteration (previous copy).
7168 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
7169 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7170 	     next copy.
7171 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7172 	     OPRNDS are of size 1.  */
7173 	  for (i = 0; i < group_size; i++)
7174 	    {
7175 	      op = oprnds[i];
7176 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7177 	      dr_chain[i] = vec_oprnd;
7178 	      oprnds[i] = vec_oprnd;
7179 	    }
7180 	  if (mask)
7181 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7182 	  if (dataref_offset)
7183 	    dataref_offset
7184 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7185 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7186 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7187 	  else
7188 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7189 					   stmt_info, bump);
7190 	}
7191 
7192       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7193 	{
7194 	  tree vec_array;
7195 
7196 	  /* Get an array into which we can store the individual vectors.  */
7197 	  vec_array = create_vector_array (vectype, vec_num);
7198 
7199 	  /* Invalidate the current contents of VEC_ARRAY.  This should
7200 	     become an RTL clobber too, which prevents the vector registers
7201 	     from being upward-exposed.  */
7202 	  vect_clobber_variable (stmt_info, gsi, vec_array);
7203 
7204 	  /* Store the individual vectors into the array.  */
7205 	  for (i = 0; i < vec_num; i++)
7206 	    {
7207 	      vec_oprnd = dr_chain[i];
7208 	      write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7209 	    }
7210 
7211 	  tree final_mask = NULL;
7212 	  if (loop_masks)
7213 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7214 					     vectype, j);
7215 	  if (vec_mask)
7216 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7217 						  vec_mask, gsi);
7218 
7219 	  gcall *call;
7220 	  if (final_mask)
7221 	    {
7222 	      /* Emit:
7223 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7224 				     VEC_ARRAY).  */
7225 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7226 	      tree alias_ptr = build_int_cst (ref_type, align);
7227 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7228 						 dataref_ptr, alias_ptr,
7229 						 final_mask, vec_array);
7230 	    }
7231 	  else
7232 	    {
7233 	      /* Emit:
7234 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
7235 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7236 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7237 						 vec_array);
7238 	      gimple_call_set_lhs (call, data_ref);
7239 	    }
7240 	  gimple_call_set_nothrow (call, true);
7241 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7242 
7243 	  /* Record that VEC_ARRAY is now dead.  */
7244 	  vect_clobber_variable (stmt_info, gsi, vec_array);
7245 	}
7246       else
7247 	{
7248 	  new_stmt_info = NULL;
7249 	  if (grouped_store)
7250 	    {
7251 	      if (j == 0)
7252 		result_chain.create (group_size);
7253 	      /* Permute.  */
7254 	      vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7255 					&result_chain);
7256 	    }
7257 
7258 	  stmt_vec_info next_stmt_info = first_stmt_info;
7259 	  for (i = 0; i < vec_num; i++)
7260 	    {
7261 	      unsigned misalign;
7262 	      unsigned HOST_WIDE_INT align;
7263 
7264 	      tree final_mask = NULL_TREE;
7265 	      if (loop_masks)
7266 		final_mask = vect_get_loop_mask (gsi, loop_masks,
7267 						 vec_num * ncopies,
7268 						 vectype, vec_num * j + i);
7269 	      if (vec_mask)
7270 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7271 						      vec_mask, gsi);
7272 
7273 	      if (memory_access_type == VMAT_GATHER_SCATTER)
7274 		{
7275 		  tree scale = size_int (gs_info.scale);
7276 		  gcall *call;
7277 		  if (loop_masks)
7278 		    call = gimple_build_call_internal
7279 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7280 		       scale, vec_oprnd, final_mask);
7281 		  else
7282 		    call = gimple_build_call_internal
7283 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7284 		       scale, vec_oprnd);
7285 		  gimple_call_set_nothrow (call, true);
7286 		  new_stmt_info
7287 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
7288 		  break;
7289 		}
7290 
7291 	      if (i > 0)
7292 		/* Bump the vector pointer.  */
7293 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7294 					       stmt_info, bump);
7295 
7296 	      if (slp)
7297 		vec_oprnd = vec_oprnds[i];
7298 	      else if (grouped_store)
7299 		/* For grouped stores vectorized defs are interleaved in
7300 		   vect_permute_store_chain().  */
7301 		vec_oprnd = result_chain[i];
7302 
7303 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7304 	      if (aligned_access_p (first_dr_info))
7305 		misalign = 0;
7306 	      else if (DR_MISALIGNMENT (first_dr_info) == -1)
7307 		{
7308 		  align = dr_alignment (vect_dr_behavior (first_dr_info));
7309 		  misalign = 0;
7310 		}
7311 	      else
7312 		misalign = DR_MISALIGNMENT (first_dr_info);
7313 	      if (dataref_offset == NULL_TREE
7314 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
7315 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7316 					misalign);
7317 
7318 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7319 		{
7320 		  tree perm_mask = perm_mask_for_reverse (vectype);
7321 		  tree perm_dest = vect_create_destination_var
7322 		    (vect_get_store_rhs (stmt_info), vectype);
7323 		  tree new_temp = make_ssa_name (perm_dest);
7324 
7325 		  /* Generate the permute statement.  */
7326 		  gimple *perm_stmt
7327 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7328 					   vec_oprnd, perm_mask);
7329 		  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7330 
7331 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7332 		  vec_oprnd = new_temp;
7333 		}
7334 
7335 	      /* Arguments are ready.  Create the new vector stmt.  */
7336 	      if (final_mask)
7337 		{
7338 		  align = least_bit_hwi (misalign | align);
7339 		  tree ptr = build_int_cst (ref_type, align);
7340 		  gcall *call
7341 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
7342 						  dataref_ptr, ptr,
7343 						  final_mask, vec_oprnd);
7344 		  gimple_call_set_nothrow (call, true);
7345 		  new_stmt_info
7346 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
7347 		}
7348 	      else
7349 		{
7350 		  data_ref = fold_build2 (MEM_REF, vectype,
7351 					  dataref_ptr,
7352 					  dataref_offset
7353 					  ? dataref_offset
7354 					  : build_int_cst (ref_type, 0));
7355 		  if (aligned_access_p (first_dr_info))
7356 		    ;
7357 		  else if (DR_MISALIGNMENT (first_dr_info) == -1)
7358 		    TREE_TYPE (data_ref)
7359 		      = build_aligned_type (TREE_TYPE (data_ref),
7360 					    align * BITS_PER_UNIT);
7361 		  else
7362 		    TREE_TYPE (data_ref)
7363 		      = build_aligned_type (TREE_TYPE (data_ref),
7364 					    TYPE_ALIGN (elem_type));
7365 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7366 		  gassign *new_stmt
7367 		    = gimple_build_assign (data_ref, vec_oprnd);
7368 		  new_stmt_info
7369 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7370 		}
7371 
7372 	      if (slp)
7373 		continue;
7374 
7375 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7376 	      if (!next_stmt_info)
7377 		break;
7378 	    }
7379 	}
7380       if (!slp)
7381 	{
7382 	  if (j == 0)
7383 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7384 	  else
7385 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7386 	  prev_stmt_info = new_stmt_info;
7387 	}
7388     }
7389 
7390   oprnds.release ();
7391   result_chain.release ();
7392   vec_oprnds.release ();
7393 
7394   return true;
7395 }
7396 
7397 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7398    VECTOR_CST mask.  No checks are made that the target platform supports the
7399    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7400    vect_gen_perm_mask_checked.  */
7401 
7402 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)7403 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7404 {
7405   tree mask_type;
7406 
7407   poly_uint64 nunits = sel.length ();
7408   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7409 
7410   mask_type = build_vector_type (ssizetype, nunits);
7411   return vec_perm_indices_to_tree (mask_type, sel);
7412 }
7413 
7414 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7415    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7416 
7417 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)7418 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7419 {
7420   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7421   return vect_gen_perm_mask_any (vectype, sel);
7422 }
7423 
7424 /* Given a vector variable X and Y, that was generated for the scalar
7425    STMT_INFO, generate instructions to permute the vector elements of X and Y
7426    using permutation mask MASK_VEC, insert them at *GSI and return the
7427    permuted vector variable.  */
7428 
7429 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)7430 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7431 		      gimple_stmt_iterator *gsi)
7432 {
7433   tree vectype = TREE_TYPE (x);
7434   tree perm_dest, data_ref;
7435   gimple *perm_stmt;
7436 
7437   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7438   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7439     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7440   else
7441     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7442   data_ref = make_ssa_name (perm_dest);
7443 
7444   /* Generate the permute statement.  */
7445   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7446   vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7447 
7448   return data_ref;
7449 }
7450 
7451 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7452    inserting them on the loops preheader edge.  Returns true if we
7453    were successful in doing so (and thus STMT_INFO can be moved then),
7454    otherwise returns false.  */
7455 
7456 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,struct loop * loop)7457 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7458 {
7459   ssa_op_iter i;
7460   tree op;
7461   bool any = false;
7462 
7463   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7464     {
7465       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7466       if (!gimple_nop_p (def_stmt)
7467 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7468 	{
7469 	  /* Make sure we don't need to recurse.  While we could do
7470 	     so in simple cases when there are more complex use webs
7471 	     we don't have an easy way to preserve stmt order to fulfil
7472 	     dependencies within them.  */
7473 	  tree op2;
7474 	  ssa_op_iter i2;
7475 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
7476 	    return false;
7477 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7478 	    {
7479 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7480 	      if (!gimple_nop_p (def_stmt2)
7481 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7482 		return false;
7483 	    }
7484 	  any = true;
7485 	}
7486     }
7487 
7488   if (!any)
7489     return true;
7490 
7491   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7492     {
7493       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7494       if (!gimple_nop_p (def_stmt)
7495 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7496 	{
7497 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7498 	  gsi_remove (&gsi, false);
7499 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7500 	}
7501     }
7502 
7503   return true;
7504 }
7505 
7506 /* vectorizable_load.
7507 
7508    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7509    that can be vectorized.
7510    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7511    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7512    Return true if STMT_INFO is vectorizable in this way.  */
7513 
7514 static bool
vectorizable_load(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance,stmt_vector_for_cost * cost_vec)7515 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7516 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
7517 		   slp_instance slp_node_instance,
7518 		   stmt_vector_for_cost *cost_vec)
7519 {
7520   tree scalar_dest;
7521   tree vec_dest = NULL;
7522   tree data_ref = NULL;
7523   stmt_vec_info prev_stmt_info;
7524   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7525   struct loop *loop = NULL;
7526   struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7527   bool nested_in_vect_loop = false;
7528   tree elem_type;
7529   tree new_temp;
7530   machine_mode mode;
7531   tree dummy;
7532   enum dr_alignment_support alignment_support_scheme;
7533   tree dataref_ptr = NULL_TREE;
7534   tree dataref_offset = NULL_TREE;
7535   gimple *ptr_incr = NULL;
7536   int ncopies;
7537   int i, j;
7538   unsigned int group_size;
7539   poly_uint64 group_gap_adj;
7540   tree msq = NULL_TREE, lsq;
7541   tree offset = NULL_TREE;
7542   tree byte_offset = NULL_TREE;
7543   tree realignment_token = NULL_TREE;
7544   gphi *phi = NULL;
7545   vec<tree> dr_chain = vNULL;
7546   bool grouped_load = false;
7547   stmt_vec_info first_stmt_info;
7548   stmt_vec_info first_stmt_info_for_drptr = NULL;
7549   bool compute_in_loop = false;
7550   struct loop *at_loop;
7551   int vec_num;
7552   bool slp = (slp_node != NULL);
7553   bool slp_perm = false;
7554   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7555   poly_uint64 vf;
7556   tree aggr_type;
7557   gather_scatter_info gs_info;
7558   vec_info *vinfo = stmt_info->vinfo;
7559   tree ref_type;
7560   enum vect_def_type mask_dt = vect_unknown_def_type;
7561 
7562   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7563     return false;
7564 
7565   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7566       && ! vec_stmt)
7567     return false;
7568 
7569   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7570   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7571     {
7572       scalar_dest = gimple_assign_lhs (assign);
7573       if (TREE_CODE (scalar_dest) != SSA_NAME)
7574 	return false;
7575 
7576       tree_code code = gimple_assign_rhs_code (assign);
7577       if (code != ARRAY_REF
7578 	  && code != BIT_FIELD_REF
7579 	  && code != INDIRECT_REF
7580 	  && code != COMPONENT_REF
7581 	  && code != IMAGPART_EXPR
7582 	  && code != REALPART_EXPR
7583 	  && code != MEM_REF
7584 	  && TREE_CODE_CLASS (code) != tcc_declaration)
7585 	return false;
7586     }
7587   else
7588     {
7589       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7590       if (!call || !gimple_call_internal_p (call))
7591 	return false;
7592 
7593       internal_fn ifn = gimple_call_internal_fn (call);
7594       if (!internal_load_fn_p (ifn))
7595 	return false;
7596 
7597       scalar_dest = gimple_call_lhs (call);
7598       if (!scalar_dest)
7599 	return false;
7600 
7601       if (slp_node != NULL)
7602 	{
7603 	  if (dump_enabled_p ())
7604 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7605 			     "SLP of masked loads not supported.\n");
7606 	  return false;
7607 	}
7608 
7609       int mask_index = internal_fn_mask_index (ifn);
7610       if (mask_index >= 0)
7611 	{
7612 	  mask = gimple_call_arg (call, mask_index);
7613 	  if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7614 					   &mask_vectype))
7615 	    return false;
7616 	}
7617     }
7618 
7619   if (!STMT_VINFO_DATA_REF (stmt_info))
7620     return false;
7621 
7622   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7623   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7624 
7625   if (loop_vinfo)
7626     {
7627       loop = LOOP_VINFO_LOOP (loop_vinfo);
7628       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7629       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7630     }
7631   else
7632     vf = 1;
7633 
7634   /* Multiple types in SLP are handled by creating the appropriate number of
7635      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7636      case of SLP.  */
7637   if (slp)
7638     ncopies = 1;
7639   else
7640     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7641 
7642   gcc_assert (ncopies >= 1);
7643 
7644   /* FORNOW. This restriction should be relaxed.  */
7645   if (nested_in_vect_loop && ncopies > 1)
7646     {
7647       if (dump_enabled_p ())
7648         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7649                          "multiple types in nested loop.\n");
7650       return false;
7651     }
7652 
7653   /* Invalidate assumptions made by dependence analysis when vectorization
7654      on the unrolled body effectively re-orders stmts.  */
7655   if (ncopies > 1
7656       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7657       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7658 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7659     {
7660       if (dump_enabled_p ())
7661 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7662 			 "cannot perform implicit CSE when unrolling "
7663 			 "with negative dependence distance\n");
7664       return false;
7665     }
7666 
7667   elem_type = TREE_TYPE (vectype);
7668   mode = TYPE_MODE (vectype);
7669 
7670   /* FORNOW. In some cases can vectorize even if data-type not supported
7671     (e.g. - data copies).  */
7672   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7673     {
7674       if (dump_enabled_p ())
7675         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7676                          "Aligned load, but unsupported type.\n");
7677       return false;
7678     }
7679 
7680   /* Check if the load is a part of an interleaving chain.  */
7681   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7682     {
7683       grouped_load = true;
7684       /* FORNOW */
7685       gcc_assert (!nested_in_vect_loop);
7686       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7687 
7688       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7689       group_size = DR_GROUP_SIZE (first_stmt_info);
7690 
7691       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7692 	slp_perm = true;
7693 
7694       /* Invalidate assumptions made by dependence analysis when vectorization
7695 	 on the unrolled body effectively re-orders stmts.  */
7696       if (!PURE_SLP_STMT (stmt_info)
7697 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7698 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7699 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7700 	{
7701 	  if (dump_enabled_p ())
7702 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7703 			     "cannot perform implicit CSE when performing "
7704 			     "group loads with negative dependence distance\n");
7705 	  return false;
7706 	}
7707     }
7708   else
7709     group_size = 1;
7710 
7711   vect_memory_access_type memory_access_type;
7712   if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7713 			    &memory_access_type, &gs_info))
7714     return false;
7715 
7716   if (mask)
7717     {
7718       if (memory_access_type == VMAT_CONTIGUOUS)
7719 	{
7720 	  machine_mode vec_mode = TYPE_MODE (vectype);
7721 	  if (!VECTOR_MODE_P (vec_mode)
7722 	      || !can_vec_mask_load_store_p (vec_mode,
7723 					     TYPE_MODE (mask_vectype), true))
7724 	    return false;
7725 	}
7726       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7727 	       && memory_access_type != VMAT_GATHER_SCATTER)
7728 	{
7729 	  if (dump_enabled_p ())
7730 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7731 			     "unsupported access type for masked load.\n");
7732 	  return false;
7733 	}
7734     }
7735 
7736   if (!vec_stmt) /* transformation not required.  */
7737     {
7738       if (!slp)
7739 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7740 
7741       if (loop_vinfo
7742 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7743 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7744 				  memory_access_type, &gs_info);
7745 
7746       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7747       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7748 			    slp_node_instance, slp_node, cost_vec);
7749       return true;
7750     }
7751 
7752   if (!slp)
7753     gcc_assert (memory_access_type
7754 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7755 
7756   if (dump_enabled_p ())
7757     dump_printf_loc (MSG_NOTE, vect_location,
7758                      "transform load. ncopies = %d\n", ncopies);
7759 
7760   /* Transform.  */
7761 
7762   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7763   ensure_base_align (dr_info);
7764 
7765   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7766     {
7767       vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7768       return true;
7769     }
7770 
7771   if (memory_access_type == VMAT_INVARIANT)
7772     {
7773       gcc_assert (!grouped_load && !mask && !bb_vinfo);
7774       /* If we have versioned for aliasing or the loop doesn't
7775 	 have any data dependencies that would preclude this,
7776 	 then we are sure this is a loop invariant load and
7777 	 thus we can insert it on the preheader edge.  */
7778       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7779 		      && !nested_in_vect_loop
7780 		      && hoist_defs_of_uses (stmt_info, loop));
7781       if (hoist_p)
7782 	{
7783 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7784 	  if (dump_enabled_p ())
7785 	    dump_printf_loc (MSG_NOTE, vect_location,
7786 			     "hoisting out of the vectorized loop: %G", stmt);
7787 	  scalar_dest = copy_ssa_name (scalar_dest);
7788 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7789 	  gsi_insert_on_edge_immediate
7790 	    (loop_preheader_edge (loop),
7791 	     gimple_build_assign (scalar_dest, rhs));
7792 	}
7793       /* These copies are all equivalent, but currently the representation
7794 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
7795       prev_stmt_info = NULL;
7796       gimple_stmt_iterator gsi2 = *gsi;
7797       gsi_next (&gsi2);
7798       for (j = 0; j < ncopies; j++)
7799 	{
7800 	  stmt_vec_info new_stmt_info;
7801 	  if (hoist_p)
7802 	    {
7803 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
7804 					   vectype, NULL);
7805 	      gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7806 	      new_stmt_info = vinfo->add_stmt (new_stmt);
7807 	    }
7808 	  else
7809 	    {
7810 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
7811 					   vectype, &gsi2);
7812 	      new_stmt_info = vinfo->lookup_def (new_temp);
7813 	    }
7814 	  if (slp)
7815 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7816 	  else if (j == 0)
7817 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7818 	  else
7819 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7820 	  prev_stmt_info = new_stmt_info;
7821 	}
7822       return true;
7823     }
7824 
7825   if (memory_access_type == VMAT_ELEMENTWISE
7826       || memory_access_type == VMAT_STRIDED_SLP)
7827     {
7828       gimple_stmt_iterator incr_gsi;
7829       bool insert_after;
7830       gimple *incr;
7831       tree offvar;
7832       tree ivstep;
7833       tree running_off;
7834       vec<constructor_elt, va_gc> *v = NULL;
7835       tree stride_base, stride_step, alias_off;
7836       /* Checked by get_load_store_type.  */
7837       unsigned int const_nunits = nunits.to_constant ();
7838       unsigned HOST_WIDE_INT cst_offset = 0;
7839 
7840       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7841       gcc_assert (!nested_in_vect_loop);
7842 
7843       if (grouped_load)
7844 	{
7845 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7846 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7847 	}
7848       else
7849 	{
7850 	  first_stmt_info = stmt_info;
7851 	  first_dr_info = dr_info;
7852 	}
7853       if (slp && grouped_load)
7854 	{
7855 	  group_size = DR_GROUP_SIZE (first_stmt_info);
7856 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
7857 	}
7858       else
7859 	{
7860 	  if (grouped_load)
7861 	    cst_offset
7862 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7863 		 * vect_get_place_in_interleaving_chain (stmt_info,
7864 							 first_stmt_info));
7865 	  group_size = 1;
7866 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7867 	}
7868 
7869       stride_base
7870 	= fold_build_pointer_plus
7871 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7872 	     size_binop (PLUS_EXPR,
7873 			 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7874 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7875       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7876 
7877       /* For a load with loop-invariant (but other than power-of-2)
7878          stride (i.e. not a grouped access) like so:
7879 
7880 	   for (i = 0; i < n; i += stride)
7881 	     ... = array[i];
7882 
7883 	 we generate a new induction variable and new accesses to
7884 	 form a new vector (or vectors, depending on ncopies):
7885 
7886 	   for (j = 0; ; j += VF*stride)
7887 	     tmp1 = array[j];
7888 	     tmp2 = array[j + stride];
7889 	     ...
7890 	     vectemp = {tmp1, tmp2, ...}
7891          */
7892 
7893       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7894 			    build_int_cst (TREE_TYPE (stride_step), vf));
7895 
7896       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7897 
7898       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7899       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7900       create_iv (stride_base, ivstep, NULL,
7901 		 loop, &incr_gsi, insert_after,
7902 		 &offvar, NULL);
7903       incr = gsi_stmt (incr_gsi);
7904       loop_vinfo->add_stmt (incr);
7905 
7906       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7907 
7908       prev_stmt_info = NULL;
7909       running_off = offvar;
7910       alias_off = build_int_cst (ref_type, 0);
7911       int nloads = const_nunits;
7912       int lnel = 1;
7913       tree ltype = TREE_TYPE (vectype);
7914       tree lvectype = vectype;
7915       auto_vec<tree> dr_chain;
7916       if (memory_access_type == VMAT_STRIDED_SLP)
7917 	{
7918 	  if (group_size < const_nunits)
7919 	    {
7920 	      /* First check if vec_init optab supports construction from
7921 		 vector elts directly.  */
7922 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7923 	      machine_mode vmode;
7924 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
7925 		  && VECTOR_MODE_P (vmode)
7926 		  && targetm.vector_mode_supported_p (vmode)
7927 		  && (convert_optab_handler (vec_init_optab,
7928 					     TYPE_MODE (vectype), vmode)
7929 		      != CODE_FOR_nothing))
7930 		{
7931 		  nloads = const_nunits / group_size;
7932 		  lnel = group_size;
7933 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7934 		}
7935 	      else
7936 		{
7937 		  /* Otherwise avoid emitting a constructor of vector elements
7938 		     by performing the loads using an integer type of the same
7939 		     size, constructing a vector of those and then
7940 		     re-interpreting it as the original vector type.
7941 		     This avoids a huge runtime penalty due to the general
7942 		     inability to perform store forwarding from smaller stores
7943 		     to a larger load.  */
7944 		  unsigned lsize
7945 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7946 		  unsigned int lnunits = const_nunits / group_size;
7947 		  /* If we can't construct such a vector fall back to
7948 		     element loads of the original vector type.  */
7949 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7950 		      && mode_for_vector (elmode, lnunits).exists (&vmode)
7951 		      && VECTOR_MODE_P (vmode)
7952 		      && targetm.vector_mode_supported_p (vmode)
7953 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
7954 			  != CODE_FOR_nothing))
7955 		    {
7956 		      nloads = lnunits;
7957 		      lnel = group_size;
7958 		      ltype = build_nonstandard_integer_type (lsize, 1);
7959 		      lvectype = build_vector_type (ltype, nloads);
7960 		    }
7961 		}
7962 	    }
7963 	  else
7964 	    {
7965 	      nloads = 1;
7966 	      lnel = const_nunits;
7967 	      ltype = vectype;
7968 	    }
7969 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7970 	}
7971       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7972       else if (nloads == 1)
7973 	ltype = vectype;
7974 
7975       if (slp)
7976 	{
7977 	  /* For SLP permutation support we need to load the whole group,
7978 	     not only the number of vector stmts the permutation result
7979 	     fits in.  */
7980 	  if (slp_perm)
7981 	    {
7982 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7983 		 variable VF.  */
7984 	      unsigned int const_vf = vf.to_constant ();
7985 	      ncopies = CEIL (group_size * const_vf, const_nunits);
7986 	      dr_chain.create (ncopies);
7987 	    }
7988 	  else
7989 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7990 	}
7991       unsigned int group_el = 0;
7992       unsigned HOST_WIDE_INT
7993 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7994       for (j = 0; j < ncopies; j++)
7995 	{
7996 	  if (nloads > 1)
7997 	    vec_alloc (v, nloads);
7998 	  stmt_vec_info new_stmt_info = NULL;
7999 	  for (i = 0; i < nloads; i++)
8000 	    {
8001 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
8002 					     group_el * elsz + cst_offset);
8003 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8004 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8005 	      gassign *new_stmt
8006 		= gimple_build_assign (make_ssa_name (ltype), data_ref);
8007 	      new_stmt_info
8008 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8009 	      if (nloads > 1)
8010 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8011 					gimple_assign_lhs (new_stmt));
8012 
8013 	      group_el += lnel;
8014 	      if (! slp
8015 		  || group_el == group_size)
8016 		{
8017 		  tree newoff = copy_ssa_name (running_off);
8018 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8019 						      running_off, stride_step);
8020 		  vect_finish_stmt_generation (stmt_info, incr, gsi);
8021 
8022 		  running_off = newoff;
8023 		  group_el = 0;
8024 		}
8025 	    }
8026 	  if (nloads > 1)
8027 	    {
8028 	      tree vec_inv = build_constructor (lvectype, v);
8029 	      new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
8030 	      new_stmt_info = vinfo->lookup_def (new_temp);
8031 	      if (lvectype != vectype)
8032 		{
8033 		  gassign *new_stmt
8034 		    = gimple_build_assign (make_ssa_name (vectype),
8035 					   VIEW_CONVERT_EXPR,
8036 					   build1 (VIEW_CONVERT_EXPR,
8037 						   vectype, new_temp));
8038 		  new_stmt_info
8039 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8040 		}
8041 	    }
8042 
8043 	  if (slp)
8044 	    {
8045 	      if (slp_perm)
8046 		dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
8047 	      else
8048 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8049 	    }
8050 	  else
8051 	    {
8052 	      if (j == 0)
8053 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8054 	      else
8055 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8056 	      prev_stmt_info = new_stmt_info;
8057 	    }
8058 	}
8059       if (slp_perm)
8060 	{
8061 	  unsigned n_perms;
8062 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8063 					slp_node_instance, false, &n_perms);
8064 	}
8065       return true;
8066     }
8067 
8068   if (memory_access_type == VMAT_GATHER_SCATTER
8069       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8070     grouped_load = false;
8071 
8072   if (grouped_load)
8073     {
8074       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8075       group_size = DR_GROUP_SIZE (first_stmt_info);
8076       /* For SLP vectorization we directly vectorize a subchain
8077          without permutation.  */
8078       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8079 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8080       /* For BB vectorization always use the first stmt to base
8081 	 the data ref pointer on.  */
8082       if (bb_vinfo)
8083 	first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8084 
8085       /* Check if the chain of loads is already vectorized.  */
8086       if (STMT_VINFO_VEC_STMT (first_stmt_info)
8087 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8088 	     ???  But we can only do so if there is exactly one
8089 	     as we have no way to get at the rest.  Leave the CSE
8090 	     opportunity alone.
8091 	     ???  With the group load eventually participating
8092 	     in multiple different permutations (having multiple
8093 	     slp nodes which refer to the same group) the CSE
8094 	     is even wrong code.  See PR56270.  */
8095 	  && !slp)
8096 	{
8097 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8098 	  return true;
8099 	}
8100       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8101       group_gap_adj = 0;
8102 
8103       /* VEC_NUM is the number of vect stmts to be created for this group.  */
8104       if (slp)
8105 	{
8106 	  grouped_load = false;
8107 	  /* If an SLP permutation is from N elements to N elements,
8108 	     and if one vector holds a whole number of N, we can load
8109 	     the inputs to the permutation in the same way as an
8110 	     unpermuted sequence.  In other cases we need to load the
8111 	     whole group, not only the number of vector stmts the
8112 	     permutation result fits in.  */
8113 	  if (slp_perm
8114 	      && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8115 		  || !multiple_p (nunits, group_size)))
8116 	    {
8117 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8118 		 variable VF; see vect_transform_slp_perm_load.  */
8119 	      unsigned int const_vf = vf.to_constant ();
8120 	      unsigned int const_nunits = nunits.to_constant ();
8121 	      vec_num = CEIL (group_size * const_vf, const_nunits);
8122 	      group_gap_adj = vf * group_size - nunits * vec_num;
8123 	    }
8124 	  else
8125 	    {
8126 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8127 	      group_gap_adj
8128 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8129 	    }
8130     	}
8131       else
8132 	vec_num = group_size;
8133 
8134       ref_type = get_group_alias_ptr_type (first_stmt_info);
8135     }
8136   else
8137     {
8138       first_stmt_info = stmt_info;
8139       first_dr_info = dr_info;
8140       group_size = vec_num = 1;
8141       group_gap_adj = 0;
8142       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8143     }
8144 
8145   alignment_support_scheme
8146     = vect_supportable_dr_alignment (first_dr_info, false);
8147   gcc_assert (alignment_support_scheme);
8148   vec_loop_masks *loop_masks
8149     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8150        ? &LOOP_VINFO_MASKS (loop_vinfo)
8151        : NULL);
8152   /* Targets with store-lane instructions must not require explicit
8153      realignment.  vect_supportable_dr_alignment always returns either
8154      dr_aligned or dr_unaligned_supported for masked operations.  */
8155   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8156 	       && !mask
8157 	       && !loop_masks)
8158 	      || alignment_support_scheme == dr_aligned
8159 	      || alignment_support_scheme == dr_unaligned_supported);
8160 
8161   /* In case the vectorization factor (VF) is bigger than the number
8162      of elements that we can fit in a vectype (nunits), we have to generate
8163      more than one vector stmt - i.e - we need to "unroll" the
8164      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
8165      from one copy of the vector stmt to the next, in the field
8166      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
8167      stages to find the correct vector defs to be used when vectorizing
8168      stmts that use the defs of the current stmt.  The example below
8169      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8170      need to create 4 vectorized stmts):
8171 
8172      before vectorization:
8173                                 RELATED_STMT    VEC_STMT
8174         S1:     x = memref      -               -
8175         S2:     z = x + 1       -               -
8176 
8177      step 1: vectorize stmt S1:
8178         We first create the vector stmt VS1_0, and, as usual, record a
8179         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8180         Next, we create the vector stmt VS1_1, and record a pointer to
8181         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8182         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
8183         stmts and pointers:
8184                                 RELATED_STMT    VEC_STMT
8185         VS1_0:  vx0 = memref0   VS1_1           -
8186         VS1_1:  vx1 = memref1   VS1_2           -
8187         VS1_2:  vx2 = memref2   VS1_3           -
8188         VS1_3:  vx3 = memref3   -               -
8189         S1:     x = load        -               VS1_0
8190         S2:     z = x + 1       -               -
8191 
8192      See in documentation in vect_get_vec_def_for_stmt_copy for how the
8193      information we recorded in RELATED_STMT field is used to vectorize
8194      stmt S2.  */
8195 
8196   /* In case of interleaving (non-unit grouped access):
8197 
8198      S1:  x2 = &base + 2
8199      S2:  x0 = &base
8200      S3:  x1 = &base + 1
8201      S4:  x3 = &base + 3
8202 
8203      Vectorized loads are created in the order of memory accesses
8204      starting from the access of the first stmt of the chain:
8205 
8206      VS1: vx0 = &base
8207      VS2: vx1 = &base + vec_size*1
8208      VS3: vx3 = &base + vec_size*2
8209      VS4: vx4 = &base + vec_size*3
8210 
8211      Then permutation statements are generated:
8212 
8213      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8214      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8215        ...
8216 
8217      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8218      (the order of the data-refs in the output of vect_permute_load_chain
8219      corresponds to the order of scalar stmts in the interleaving chain - see
8220      the documentation of vect_permute_load_chain()).
8221      The generation of permutation stmts and recording them in
8222      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8223 
8224      In case of both multiple types and interleaving, the vector loads and
8225      permutation stmts above are created for every copy.  The result vector
8226      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8227      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
8228 
8229   /* If the data reference is aligned (dr_aligned) or potentially unaligned
8230      on a target that supports unaligned accesses (dr_unaligned_supported)
8231      we generate the following code:
8232          p = initial_addr;
8233          indx = 0;
8234          loop {
8235 	   p = p + indx * vectype_size;
8236            vec_dest = *(p);
8237            indx = indx + 1;
8238          }
8239 
8240      Otherwise, the data reference is potentially unaligned on a target that
8241      does not support unaligned accesses (dr_explicit_realign_optimized) -
8242      then generate the following code, in which the data in each iteration is
8243      obtained by two vector loads, one from the previous iteration, and one
8244      from the current iteration:
8245          p1 = initial_addr;
8246          msq_init = *(floor(p1))
8247          p2 = initial_addr + VS - 1;
8248          realignment_token = call target_builtin;
8249          indx = 0;
8250          loop {
8251            p2 = p2 + indx * vectype_size
8252            lsq = *(floor(p2))
8253            vec_dest = realign_load (msq, lsq, realignment_token)
8254            indx = indx + 1;
8255            msq = lsq;
8256          }   */
8257 
8258   /* If the misalignment remains the same throughout the execution of the
8259      loop, we can create the init_addr and permutation mask at the loop
8260      preheader.  Otherwise, it needs to be created inside the loop.
8261      This can only occur when vectorizing memory accesses in the inner-loop
8262      nested within an outer-loop that is being vectorized.  */
8263 
8264   if (nested_in_vect_loop
8265       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8266 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
8267     {
8268       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8269       compute_in_loop = true;
8270     }
8271 
8272   if ((alignment_support_scheme == dr_explicit_realign_optimized
8273        || alignment_support_scheme == dr_explicit_realign)
8274       && !compute_in_loop)
8275     {
8276       msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8277 				    alignment_support_scheme, NULL_TREE,
8278 				    &at_loop);
8279       if (alignment_support_scheme == dr_explicit_realign_optimized)
8280 	{
8281 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8282 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8283 				    size_one_node);
8284 	}
8285     }
8286   else
8287     at_loop = loop;
8288 
8289   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8290     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8291 
8292   tree bump;
8293   tree vec_offset = NULL_TREE;
8294   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8295     {
8296       aggr_type = NULL_TREE;
8297       bump = NULL_TREE;
8298     }
8299   else if (memory_access_type == VMAT_GATHER_SCATTER)
8300     {
8301       aggr_type = elem_type;
8302       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8303 				       &bump, &vec_offset);
8304     }
8305   else
8306     {
8307       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8308 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8309       else
8310 	aggr_type = vectype;
8311       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8312 					  memory_access_type);
8313     }
8314 
8315   tree vec_mask = NULL_TREE;
8316   prev_stmt_info = NULL;
8317   poly_uint64 group_elt = 0;
8318   for (j = 0; j < ncopies; j++)
8319     {
8320       stmt_vec_info new_stmt_info = NULL;
8321       /* 1. Create the vector or array pointer update chain.  */
8322       if (j == 0)
8323 	{
8324 	  bool simd_lane_access_p
8325 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8326 	  if (simd_lane_access_p
8327 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8328 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8329 	      && integer_zerop (DR_OFFSET (first_dr_info->dr))
8330 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8331 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8332 					get_alias_set (TREE_TYPE (ref_type)))
8333 	      && (alignment_support_scheme == dr_aligned
8334 		  || alignment_support_scheme == dr_unaligned_supported))
8335 	    {
8336 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8337 	      dataref_offset = build_int_cst (ref_type, 0);
8338 	    }
8339 	  else if (first_stmt_info_for_drptr
8340 		   && first_stmt_info != first_stmt_info_for_drptr)
8341 	    {
8342 	      dataref_ptr
8343 		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8344 					    aggr_type, at_loop, offset, &dummy,
8345 					    gsi, &ptr_incr, simd_lane_access_p,
8346 					    byte_offset, bump);
8347 	      /* Adjust the pointer by the difference to first_stmt.  */
8348 	      data_reference_p ptrdr
8349 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8350 	      tree diff
8351 		= fold_convert (sizetype,
8352 				size_binop (MINUS_EXPR,
8353 					    DR_INIT (first_dr_info->dr),
8354 					    DR_INIT (ptrdr)));
8355 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8356 					     stmt_info, diff);
8357 	    }
8358 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8359 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8360 					 &dataref_ptr, &vec_offset);
8361 	  else
8362 	    dataref_ptr
8363 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8364 					  offset, &dummy, gsi, &ptr_incr,
8365 					  simd_lane_access_p,
8366 					  byte_offset, bump);
8367 	  if (mask)
8368 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8369 						     mask_vectype);
8370 	}
8371       else
8372 	{
8373 	  if (dataref_offset)
8374 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8375 					      bump);
8376 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8377 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8378 	  else
8379 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8380 					   stmt_info, bump);
8381 	  if (mask)
8382 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8383 	}
8384 
8385       if (grouped_load || slp_perm)
8386 	dr_chain.create (vec_num);
8387 
8388       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8389 	{
8390 	  tree vec_array;
8391 
8392 	  vec_array = create_vector_array (vectype, vec_num);
8393 
8394 	  tree final_mask = NULL_TREE;
8395 	  if (loop_masks)
8396 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8397 					     vectype, j);
8398 	  if (vec_mask)
8399 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8400 						  vec_mask, gsi);
8401 
8402 	  gcall *call;
8403 	  if (final_mask)
8404 	    {
8405 	      /* Emit:
8406 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8407 		                                VEC_MASK).  */
8408 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8409 	      tree alias_ptr = build_int_cst (ref_type, align);
8410 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8411 						 dataref_ptr, alias_ptr,
8412 						 final_mask);
8413 	    }
8414 	  else
8415 	    {
8416 	      /* Emit:
8417 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8418 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8419 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8420 	    }
8421 	  gimple_call_set_lhs (call, vec_array);
8422 	  gimple_call_set_nothrow (call, true);
8423 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8424 
8425 	  /* Extract each vector into an SSA_NAME.  */
8426 	  for (i = 0; i < vec_num; i++)
8427 	    {
8428 	      new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8429 					    vec_array, i);
8430 	      dr_chain.quick_push (new_temp);
8431 	    }
8432 
8433 	  /* Record the mapping between SSA_NAMEs and statements.  */
8434 	  vect_record_grouped_load_vectors (stmt_info, dr_chain);
8435 
8436 	  /* Record that VEC_ARRAY is now dead.  */
8437 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8438 	}
8439       else
8440 	{
8441 	  for (i = 0; i < vec_num; i++)
8442 	    {
8443 	      tree final_mask = NULL_TREE;
8444 	      if (loop_masks
8445 		  && memory_access_type != VMAT_INVARIANT)
8446 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8447 						 vec_num * ncopies,
8448 						 vectype, vec_num * j + i);
8449 	      if (vec_mask)
8450 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8451 						      vec_mask, gsi);
8452 
8453 	      if (i > 0)
8454 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8455 					       stmt_info, bump);
8456 
8457 	      /* 2. Create the vector-load in the loop.  */
8458 	      gimple *new_stmt = NULL;
8459 	      switch (alignment_support_scheme)
8460 		{
8461 		case dr_aligned:
8462 		case dr_unaligned_supported:
8463 		  {
8464 		    unsigned int misalign;
8465 		    unsigned HOST_WIDE_INT align;
8466 
8467 		    if (memory_access_type == VMAT_GATHER_SCATTER)
8468 		      {
8469 			tree scale = size_int (gs_info.scale);
8470 			gcall *call;
8471 			if (loop_masks)
8472 			  call = gimple_build_call_internal
8473 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8474 			     vec_offset, scale, final_mask);
8475 			else
8476 			  call = gimple_build_call_internal
8477 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
8478 			     vec_offset, scale);
8479 			gimple_call_set_nothrow (call, true);
8480 			new_stmt = call;
8481 			data_ref = NULL_TREE;
8482 			break;
8483 		      }
8484 
8485 		    align =
8486 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8487 		    if (alignment_support_scheme == dr_aligned)
8488 		      {
8489 			gcc_assert (aligned_access_p (first_dr_info));
8490 			misalign = 0;
8491 		      }
8492 		    else if (DR_MISALIGNMENT (first_dr_info) == -1)
8493 		      {
8494 			align = dr_alignment
8495 			  (vect_dr_behavior (first_dr_info));
8496 			misalign = 0;
8497 		      }
8498 		    else
8499 		      misalign = DR_MISALIGNMENT (first_dr_info);
8500 		    if (dataref_offset == NULL_TREE
8501 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
8502 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8503 					      align, misalign);
8504 
8505 		    if (final_mask)
8506 		      {
8507 			align = least_bit_hwi (misalign | align);
8508 			tree ptr = build_int_cst (ref_type, align);
8509 			gcall *call
8510 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8511 							dataref_ptr, ptr,
8512 							final_mask);
8513 			gimple_call_set_nothrow (call, true);
8514 			new_stmt = call;
8515 			data_ref = NULL_TREE;
8516 		      }
8517 		    else
8518 		      {
8519 			data_ref
8520 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
8521 					 dataref_offset
8522 					 ? dataref_offset
8523 					 : build_int_cst (ref_type, 0));
8524 			if (alignment_support_scheme == dr_aligned)
8525 			  ;
8526 			else if (DR_MISALIGNMENT (first_dr_info) == -1)
8527 			  TREE_TYPE (data_ref)
8528 			    = build_aligned_type (TREE_TYPE (data_ref),
8529 						  align * BITS_PER_UNIT);
8530 			else
8531 			  TREE_TYPE (data_ref)
8532 			    = build_aligned_type (TREE_TYPE (data_ref),
8533 						  TYPE_ALIGN (elem_type));
8534 		      }
8535 		    break;
8536 		  }
8537 		case dr_explicit_realign:
8538 		  {
8539 		    tree ptr, bump;
8540 
8541 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8542 
8543 		    if (compute_in_loop)
8544 		      msq = vect_setup_realignment (first_stmt_info, gsi,
8545 						    &realignment_token,
8546 						    dr_explicit_realign,
8547 						    dataref_ptr, NULL);
8548 
8549 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8550 		      ptr = copy_ssa_name (dataref_ptr);
8551 		    else
8552 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8553 		    // For explicit realign the target alignment should be
8554 		    // known at compile time.
8555 		    unsigned HOST_WIDE_INT align =
8556 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8557 		    new_stmt = gimple_build_assign
8558 				 (ptr, BIT_AND_EXPR, dataref_ptr,
8559 				  build_int_cst
8560 				  (TREE_TYPE (dataref_ptr),
8561 				   -(HOST_WIDE_INT) align));
8562 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8563 		    data_ref
8564 		      = build2 (MEM_REF, vectype, ptr,
8565 				build_int_cst (ref_type, 0));
8566 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8567 		    vec_dest = vect_create_destination_var (scalar_dest,
8568 							    vectype);
8569 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
8570 		    new_temp = make_ssa_name (vec_dest, new_stmt);
8571 		    gimple_assign_set_lhs (new_stmt, new_temp);
8572 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8573 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8574 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8575 		    msq = new_temp;
8576 
8577 		    bump = size_binop (MULT_EXPR, vs,
8578 				       TYPE_SIZE_UNIT (elem_type));
8579 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
8580 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8581 					   stmt_info, bump);
8582 		    new_stmt = gimple_build_assign
8583 				 (NULL_TREE, BIT_AND_EXPR, ptr,
8584 				  build_int_cst
8585 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8586 		    ptr = copy_ssa_name (ptr, new_stmt);
8587 		    gimple_assign_set_lhs (new_stmt, ptr);
8588 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8589 		    data_ref
8590 		      = build2 (MEM_REF, vectype, ptr,
8591 				build_int_cst (ref_type, 0));
8592 		    break;
8593 		  }
8594 		case dr_explicit_realign_optimized:
8595 		  {
8596 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8597 		      new_temp = copy_ssa_name (dataref_ptr);
8598 		    else
8599 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8600 		    // We should only be doing this if we know the target
8601 		    // alignment at compile time.
8602 		    unsigned HOST_WIDE_INT align =
8603 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8604 		    new_stmt = gimple_build_assign
8605 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
8606 		       build_int_cst (TREE_TYPE (dataref_ptr),
8607 				     -(HOST_WIDE_INT) align));
8608 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8609 		    data_ref
8610 		      = build2 (MEM_REF, vectype, new_temp,
8611 				build_int_cst (ref_type, 0));
8612 		    break;
8613 		  }
8614 		default:
8615 		  gcc_unreachable ();
8616 		}
8617 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
8618 	      /* DATA_REF is null if we've already built the statement.  */
8619 	      if (data_ref)
8620 		{
8621 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8622 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
8623 		}
8624 	      new_temp = make_ssa_name (vec_dest, new_stmt);
8625 	      gimple_set_lhs (new_stmt, new_temp);
8626 	      new_stmt_info
8627 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8628 
8629 	      /* 3. Handle explicit realignment if necessary/supported.
8630 		 Create in loop:
8631 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
8632 	      if (alignment_support_scheme == dr_explicit_realign_optimized
8633 		  || alignment_support_scheme == dr_explicit_realign)
8634 		{
8635 		  lsq = gimple_assign_lhs (new_stmt);
8636 		  if (!realignment_token)
8637 		    realignment_token = dataref_ptr;
8638 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
8639 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8640 						  msq, lsq, realignment_token);
8641 		  new_temp = make_ssa_name (vec_dest, new_stmt);
8642 		  gimple_assign_set_lhs (new_stmt, new_temp);
8643 		  new_stmt_info
8644 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8645 
8646 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
8647 		    {
8648 		      gcc_assert (phi);
8649 		      if (i == vec_num - 1 && j == ncopies - 1)
8650 			add_phi_arg (phi, lsq,
8651 				     loop_latch_edge (containing_loop),
8652 				     UNKNOWN_LOCATION);
8653 		      msq = lsq;
8654 		    }
8655 		}
8656 
8657 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8658 		{
8659 		  tree perm_mask = perm_mask_for_reverse (vectype);
8660 		  new_temp = permute_vec_elements (new_temp, new_temp,
8661 						   perm_mask, stmt_info, gsi);
8662 		  new_stmt_info = vinfo->lookup_def (new_temp);
8663 		}
8664 
8665 	      /* Collect vector loads and later create their permutation in
8666 		 vect_transform_grouped_load ().  */
8667 	      if (grouped_load || slp_perm)
8668 		dr_chain.quick_push (new_temp);
8669 
8670 	      /* Store vector loads in the corresponding SLP_NODE.  */
8671 	      if (slp && !slp_perm)
8672 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8673 
8674 	      /* With SLP permutation we load the gaps as well, without
8675 	         we need to skip the gaps after we manage to fully load
8676 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
8677 	      group_elt += nunits;
8678 	      if (maybe_ne (group_gap_adj, 0U)
8679 		  && !slp_perm
8680 		  && known_eq (group_elt, group_size - group_gap_adj))
8681 		{
8682 		  poly_wide_int bump_val
8683 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8684 		       * group_gap_adj);
8685 		  tree bump = wide_int_to_tree (sizetype, bump_val);
8686 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8687 						 stmt_info, bump);
8688 		  group_elt = 0;
8689 		}
8690 	    }
8691 	  /* Bump the vector pointer to account for a gap or for excess
8692 	     elements loaded for a permuted SLP load.  */
8693 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8694 	    {
8695 	      poly_wide_int bump_val
8696 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8697 		   * group_gap_adj);
8698 	      tree bump = wide_int_to_tree (sizetype, bump_val);
8699 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8700 					     stmt_info, bump);
8701 	    }
8702 	}
8703 
8704       if (slp && !slp_perm)
8705 	continue;
8706 
8707       if (slp_perm)
8708         {
8709 	  unsigned n_perms;
8710           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8711                                              slp_node_instance, false,
8712 					     &n_perms))
8713             {
8714               dr_chain.release ();
8715               return false;
8716             }
8717         }
8718       else
8719         {
8720           if (grouped_load)
8721   	    {
8722 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
8723 		vect_transform_grouped_load (stmt_info, dr_chain,
8724 					     group_size, gsi);
8725 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8726 	    }
8727           else
8728 	    {
8729 	      if (j == 0)
8730 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8731 	      else
8732 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8733 	      prev_stmt_info = new_stmt_info;
8734 	    }
8735         }
8736       dr_chain.release ();
8737     }
8738 
8739   return true;
8740 }
8741 
8742 /* Function vect_is_simple_cond.
8743 
8744    Input:
8745    LOOP - the loop that is being vectorized.
8746    COND - Condition that is checked for simple use.
8747 
8748    Output:
8749    *COMP_VECTYPE - the vector type for the comparison.
8750    *DTS - The def types for the arguments of the comparison
8751 
8752    Returns whether a COND can be vectorized.  Checks whether
8753    condition operands are supportable using vec_is_simple_use.  */
8754 
8755 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)8756 vect_is_simple_cond (tree cond, vec_info *vinfo,
8757 		     tree *comp_vectype, enum vect_def_type *dts,
8758 		     tree vectype)
8759 {
8760   tree lhs, rhs;
8761   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8762 
8763   /* Mask case.  */
8764   if (TREE_CODE (cond) == SSA_NAME
8765       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8766     {
8767       if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8768 	  || !*comp_vectype
8769 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8770 	return false;
8771       return true;
8772     }
8773 
8774   if (!COMPARISON_CLASS_P (cond))
8775     return false;
8776 
8777   lhs = TREE_OPERAND (cond, 0);
8778   rhs = TREE_OPERAND (cond, 1);
8779 
8780   if (TREE_CODE (lhs) == SSA_NAME)
8781     {
8782       if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8783 	return false;
8784     }
8785   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8786 	   || TREE_CODE (lhs) == FIXED_CST)
8787     dts[0] = vect_constant_def;
8788   else
8789     return false;
8790 
8791   if (TREE_CODE (rhs) == SSA_NAME)
8792     {
8793       if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8794 	return false;
8795     }
8796   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8797 	   || TREE_CODE (rhs) == FIXED_CST)
8798     dts[1] = vect_constant_def;
8799   else
8800     return false;
8801 
8802   if (vectype1 && vectype2
8803       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8804 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8805     return false;
8806 
8807   *comp_vectype = vectype1 ? vectype1 : vectype2;
8808   /* Invariant comparison.  */
8809   if (! *comp_vectype && vectype)
8810     {
8811       tree scalar_type = TREE_TYPE (lhs);
8812       /* If we can widen the comparison to match vectype do so.  */
8813       if (INTEGRAL_TYPE_P (scalar_type)
8814 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8815 			      TYPE_SIZE (TREE_TYPE (vectype))))
8816 	scalar_type = build_nonstandard_integer_type
8817 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8818 	   TYPE_UNSIGNED (scalar_type));
8819       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8820     }
8821 
8822   return true;
8823 }
8824 
8825 /* vectorizable_condition.
8826 
8827    Check if STMT_INFO is conditional modify expression that can be vectorized.
8828    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8829    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8830    at GSI.
8831 
8832    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8833 
8834    Return true if STMT_INFO is vectorizable in this way.  */
8835 
8836 bool
vectorizable_condition(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,bool for_reduction,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8837 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8838 			stmt_vec_info *vec_stmt, bool for_reduction,
8839 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8840 {
8841   vec_info *vinfo = stmt_info->vinfo;
8842   tree scalar_dest = NULL_TREE;
8843   tree vec_dest = NULL_TREE;
8844   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8845   tree then_clause, else_clause;
8846   tree comp_vectype = NULL_TREE;
8847   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8848   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8849   tree vec_compare;
8850   tree new_temp;
8851   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8852   enum vect_def_type dts[4]
8853     = {vect_unknown_def_type, vect_unknown_def_type,
8854        vect_unknown_def_type, vect_unknown_def_type};
8855   int ndts = 4;
8856   int ncopies;
8857   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8858   stmt_vec_info prev_stmt_info = NULL;
8859   int i, j;
8860   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8861   vec<tree> vec_oprnds0 = vNULL;
8862   vec<tree> vec_oprnds1 = vNULL;
8863   vec<tree> vec_oprnds2 = vNULL;
8864   vec<tree> vec_oprnds3 = vNULL;
8865   tree vec_cmp_type;
8866   bool masked = false;
8867 
8868   if (for_reduction && STMT_SLP_TYPE (stmt_info))
8869     return false;
8870 
8871   vect_reduction_type reduction_type
8872     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8873   if (reduction_type == TREE_CODE_REDUCTION)
8874     {
8875       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8876 	return false;
8877 
8878       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8879 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8880 	       && for_reduction))
8881 	return false;
8882 
8883       /* FORNOW: not yet supported.  */
8884       if (STMT_VINFO_LIVE_P (stmt_info))
8885 	{
8886 	  if (dump_enabled_p ())
8887 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8888 			     "value used after loop.\n");
8889 	  return false;
8890 	}
8891     }
8892 
8893   /* Is vectorizable conditional operation?  */
8894   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8895   if (!stmt)
8896     return false;
8897 
8898   code = gimple_assign_rhs_code (stmt);
8899 
8900   if (code != COND_EXPR)
8901     return false;
8902 
8903   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8904   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8905 
8906   if (slp_node)
8907     ncopies = 1;
8908   else
8909     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8910 
8911   gcc_assert (ncopies >= 1);
8912   if (for_reduction && ncopies > 1)
8913     return false; /* FORNOW */
8914 
8915   cond_expr = gimple_assign_rhs1 (stmt);
8916   then_clause = gimple_assign_rhs2 (stmt);
8917   else_clause = gimple_assign_rhs3 (stmt);
8918 
8919   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8920 			    &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8921       || !comp_vectype)
8922     return false;
8923 
8924   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8925     return false;
8926   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8927     return false;
8928 
8929   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8930     return false;
8931 
8932   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8933     return false;
8934 
8935   masked = !COMPARISON_CLASS_P (cond_expr);
8936   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8937 
8938   if (vec_cmp_type == NULL_TREE)
8939     return false;
8940 
8941   cond_code = TREE_CODE (cond_expr);
8942   if (!masked)
8943     {
8944       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8945       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8946     }
8947 
8948   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8949     {
8950       /* Boolean values may have another representation in vectors
8951 	 and therefore we prefer bit operations over comparison for
8952 	 them (which also works for scalar masks).  We store opcodes
8953 	 to use in bitop1 and bitop2.  Statement is vectorized as
8954 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8955 	 depending on bitop1 and bitop2 arity.  */
8956       switch (cond_code)
8957 	{
8958 	case GT_EXPR:
8959 	  bitop1 = BIT_NOT_EXPR;
8960 	  bitop2 = BIT_AND_EXPR;
8961 	  break;
8962 	case GE_EXPR:
8963 	  bitop1 = BIT_NOT_EXPR;
8964 	  bitop2 = BIT_IOR_EXPR;
8965 	  break;
8966 	case LT_EXPR:
8967 	  bitop1 = BIT_NOT_EXPR;
8968 	  bitop2 = BIT_AND_EXPR;
8969 	  std::swap (cond_expr0, cond_expr1);
8970 	  break;
8971 	case LE_EXPR:
8972 	  bitop1 = BIT_NOT_EXPR;
8973 	  bitop2 = BIT_IOR_EXPR;
8974 	  std::swap (cond_expr0, cond_expr1);
8975 	  break;
8976 	case NE_EXPR:
8977 	  bitop1 = BIT_XOR_EXPR;
8978 	  break;
8979 	case EQ_EXPR:
8980 	  bitop1 = BIT_XOR_EXPR;
8981 	  bitop2 = BIT_NOT_EXPR;
8982 	  break;
8983 	default:
8984 	  return false;
8985 	}
8986       cond_code = SSA_NAME;
8987     }
8988 
8989   if (!vec_stmt)
8990     {
8991       if (bitop1 != NOP_EXPR)
8992 	{
8993 	  machine_mode mode = TYPE_MODE (comp_vectype);
8994 	  optab optab;
8995 
8996 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8997 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8998 	    return false;
8999 
9000 	  if (bitop2 != NOP_EXPR)
9001 	    {
9002 	      optab = optab_for_tree_code (bitop2, comp_vectype,
9003 					   optab_default);
9004 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9005 		return false;
9006 	    }
9007 	}
9008       if (expand_vec_cond_expr_p (vectype, comp_vectype,
9009 				     cond_code))
9010 	{
9011 	  STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9012 	  vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
9013 				  cost_vec);
9014 	  return true;
9015 	}
9016       return false;
9017     }
9018 
9019   /* Transform.  */
9020 
9021   if (!slp_node)
9022     {
9023       vec_oprnds0.create (1);
9024       vec_oprnds1.create (1);
9025       vec_oprnds2.create (1);
9026       vec_oprnds3.create (1);
9027     }
9028 
9029   /* Handle def.  */
9030   scalar_dest = gimple_assign_lhs (stmt);
9031   if (reduction_type != EXTRACT_LAST_REDUCTION)
9032     vec_dest = vect_create_destination_var (scalar_dest, vectype);
9033 
9034   /* Handle cond expr.  */
9035   for (j = 0; j < ncopies; j++)
9036     {
9037       stmt_vec_info new_stmt_info = NULL;
9038       if (j == 0)
9039 	{
9040           if (slp_node)
9041             {
9042               auto_vec<tree, 4> ops;
9043 	      auto_vec<vec<tree>, 4> vec_defs;
9044 
9045 	      if (masked)
9046 		ops.safe_push (cond_expr);
9047 	      else
9048 		{
9049 		  ops.safe_push (cond_expr0);
9050 		  ops.safe_push (cond_expr1);
9051 		}
9052               ops.safe_push (then_clause);
9053               ops.safe_push (else_clause);
9054               vect_get_slp_defs (ops, slp_node, &vec_defs);
9055 	      vec_oprnds3 = vec_defs.pop ();
9056 	      vec_oprnds2 = vec_defs.pop ();
9057 	      if (!masked)
9058 		vec_oprnds1 = vec_defs.pop ();
9059 	      vec_oprnds0 = vec_defs.pop ();
9060             }
9061           else
9062             {
9063 	      if (masked)
9064 		{
9065 		  vec_cond_lhs
9066 		    = vect_get_vec_def_for_operand (cond_expr, stmt_info,
9067 						    comp_vectype);
9068 		}
9069 	      else
9070 		{
9071 		  vec_cond_lhs
9072 		    = vect_get_vec_def_for_operand (cond_expr0,
9073 						    stmt_info, comp_vectype);
9074 		  vec_cond_rhs
9075 		    = vect_get_vec_def_for_operand (cond_expr1,
9076 						    stmt_info, comp_vectype);
9077 		}
9078 	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9079 							      stmt_info);
9080 	      if (reduction_type != EXTRACT_LAST_REDUCTION)
9081 		vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9082 								stmt_info);
9083 	    }
9084 	}
9085       else
9086 	{
9087 	  vec_cond_lhs
9088 	    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9089 	  if (!masked)
9090 	    vec_cond_rhs
9091 	      = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9092 
9093 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9094 							    vec_oprnds2.pop ());
9095 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9096 							    vec_oprnds3.pop ());
9097 	}
9098 
9099       if (!slp_node)
9100         {
9101 	  vec_oprnds0.quick_push (vec_cond_lhs);
9102 	  if (!masked)
9103 	    vec_oprnds1.quick_push (vec_cond_rhs);
9104 	  vec_oprnds2.quick_push (vec_then_clause);
9105 	  vec_oprnds3.quick_push (vec_else_clause);
9106 	}
9107 
9108       /* Arguments are ready.  Create the new vector stmt.  */
9109       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9110         {
9111           vec_then_clause = vec_oprnds2[i];
9112           vec_else_clause = vec_oprnds3[i];
9113 
9114 	  if (masked)
9115 	    vec_compare = vec_cond_lhs;
9116 	  else
9117 	    {
9118 	      vec_cond_rhs = vec_oprnds1[i];
9119 	      if (bitop1 == NOP_EXPR)
9120 		vec_compare = build2 (cond_code, vec_cmp_type,
9121 				      vec_cond_lhs, vec_cond_rhs);
9122 	      else
9123 		{
9124 		  new_temp = make_ssa_name (vec_cmp_type);
9125 		  gassign *new_stmt;
9126 		  if (bitop1 == BIT_NOT_EXPR)
9127 		    new_stmt = gimple_build_assign (new_temp, bitop1,
9128 						    vec_cond_rhs);
9129 		  else
9130 		    new_stmt
9131 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9132 					     vec_cond_rhs);
9133 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9134 		  if (bitop2 == NOP_EXPR)
9135 		    vec_compare = new_temp;
9136 		  else if (bitop2 == BIT_NOT_EXPR)
9137 		    {
9138 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
9139 		      vec_compare = new_temp;
9140 		      std::swap (vec_then_clause, vec_else_clause);
9141 		    }
9142 		  else
9143 		    {
9144 		      vec_compare = make_ssa_name (vec_cmp_type);
9145 		      new_stmt
9146 			= gimple_build_assign (vec_compare, bitop2,
9147 					       vec_cond_lhs, new_temp);
9148 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9149 		    }
9150 		}
9151 	    }
9152 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
9153 	    {
9154 	      if (!is_gimple_val (vec_compare))
9155 		{
9156 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
9157 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
9158 							   vec_compare);
9159 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9160 		  vec_compare = vec_compare_name;
9161 		}
9162 	      gcall *new_stmt = gimple_build_call_internal
9163 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9164 		 vec_then_clause);
9165 	      gimple_call_set_lhs (new_stmt, scalar_dest);
9166 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9167 	      if (stmt_info->stmt == gsi_stmt (*gsi))
9168 		new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9169 	      else
9170 		{
9171 		  /* In this case we're moving the definition to later in the
9172 		     block.  That doesn't matter because the only uses of the
9173 		     lhs are in phi statements.  */
9174 		  gimple_stmt_iterator old_gsi
9175 		    = gsi_for_stmt (stmt_info->stmt);
9176 		  gsi_remove (&old_gsi, true);
9177 		  new_stmt_info
9178 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9179 		}
9180 	    }
9181 	  else
9182 	    {
9183 	      new_temp = make_ssa_name (vec_dest);
9184 	      gassign *new_stmt
9185 		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9186 				       vec_then_clause, vec_else_clause);
9187 	      new_stmt_info
9188 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9189 	    }
9190           if (slp_node)
9191 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9192         }
9193 
9194         if (slp_node)
9195           continue;
9196 
9197 	if (j == 0)
9198 	  STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9199 	else
9200 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9201 
9202 	prev_stmt_info = new_stmt_info;
9203     }
9204 
9205   vec_oprnds0.release ();
9206   vec_oprnds1.release ();
9207   vec_oprnds2.release ();
9208   vec_oprnds3.release ();
9209 
9210   return true;
9211 }
9212 
9213 /* vectorizable_comparison.
9214 
9215    Check if STMT_INFO is comparison expression that can be vectorized.
9216    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9217    comparison, put it in VEC_STMT, and insert it at GSI.
9218 
9219    Return true if STMT_INFO is vectorizable in this way.  */
9220 
9221 static bool
vectorizable_comparison(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9222 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9223 			 stmt_vec_info *vec_stmt,
9224 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9225 {
9226   vec_info *vinfo = stmt_info->vinfo;
9227   tree lhs, rhs1, rhs2;
9228   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9229   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9230   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9231   tree new_temp;
9232   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9233   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9234   int ndts = 2;
9235   poly_uint64 nunits;
9236   int ncopies;
9237   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9238   stmt_vec_info prev_stmt_info = NULL;
9239   int i, j;
9240   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9241   vec<tree> vec_oprnds0 = vNULL;
9242   vec<tree> vec_oprnds1 = vNULL;
9243   tree mask_type;
9244   tree mask;
9245 
9246   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9247     return false;
9248 
9249   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9250     return false;
9251 
9252   mask_type = vectype;
9253   nunits = TYPE_VECTOR_SUBPARTS (vectype);
9254 
9255   if (slp_node)
9256     ncopies = 1;
9257   else
9258     ncopies = vect_get_num_copies (loop_vinfo, vectype);
9259 
9260   gcc_assert (ncopies >= 1);
9261   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9262     return false;
9263 
9264   if (STMT_VINFO_LIVE_P (stmt_info))
9265     {
9266       if (dump_enabled_p ())
9267 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9268 			 "value used after loop.\n");
9269       return false;
9270     }
9271 
9272   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9273   if (!stmt)
9274     return false;
9275 
9276   code = gimple_assign_rhs_code (stmt);
9277 
9278   if (TREE_CODE_CLASS (code) != tcc_comparison)
9279     return false;
9280 
9281   rhs1 = gimple_assign_rhs1 (stmt);
9282   rhs2 = gimple_assign_rhs2 (stmt);
9283 
9284   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9285     return false;
9286 
9287   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9288     return false;
9289 
9290   if (vectype1 && vectype2
9291       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9292 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9293     return false;
9294 
9295   vectype = vectype1 ? vectype1 : vectype2;
9296 
9297   /* Invariant comparison.  */
9298   if (!vectype)
9299     {
9300       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9301       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9302 	return false;
9303     }
9304   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9305     return false;
9306 
9307   /* Can't compare mask and non-mask types.  */
9308   if (vectype1 && vectype2
9309       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9310     return false;
9311 
9312   /* Boolean values may have another representation in vectors
9313      and therefore we prefer bit operations over comparison for
9314      them (which also works for scalar masks).  We store opcodes
9315      to use in bitop1 and bitop2.  Statement is vectorized as
9316        BITOP2 (rhs1 BITOP1 rhs2) or
9317        rhs1 BITOP2 (BITOP1 rhs2)
9318      depending on bitop1 and bitop2 arity.  */
9319   bool swap_p = false;
9320   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9321     {
9322       if (code == GT_EXPR)
9323 	{
9324 	  bitop1 = BIT_NOT_EXPR;
9325 	  bitop2 = BIT_AND_EXPR;
9326 	}
9327       else if (code == GE_EXPR)
9328 	{
9329 	  bitop1 = BIT_NOT_EXPR;
9330 	  bitop2 = BIT_IOR_EXPR;
9331 	}
9332       else if (code == LT_EXPR)
9333 	{
9334 	  bitop1 = BIT_NOT_EXPR;
9335 	  bitop2 = BIT_AND_EXPR;
9336 	  swap_p = true;
9337 	}
9338       else if (code == LE_EXPR)
9339 	{
9340 	  bitop1 = BIT_NOT_EXPR;
9341 	  bitop2 = BIT_IOR_EXPR;
9342 	  swap_p = true;
9343 	}
9344       else
9345 	{
9346 	  bitop1 = BIT_XOR_EXPR;
9347 	  if (code == EQ_EXPR)
9348 	    bitop2 = BIT_NOT_EXPR;
9349 	}
9350     }
9351 
9352   if (!vec_stmt)
9353     {
9354       if (bitop1 == NOP_EXPR)
9355 	{
9356 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9357 	    return false;
9358 	}
9359       else
9360 	{
9361 	  machine_mode mode = TYPE_MODE (vectype);
9362 	  optab optab;
9363 
9364 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
9365 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9366 	    return false;
9367 
9368 	  if (bitop2 != NOP_EXPR)
9369 	    {
9370 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
9371 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9372 		return false;
9373 	    }
9374 	}
9375 
9376       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9377       vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9378 			      dts, ndts, slp_node, cost_vec);
9379       return true;
9380     }
9381 
9382   /* Transform.  */
9383   if (!slp_node)
9384     {
9385       vec_oprnds0.create (1);
9386       vec_oprnds1.create (1);
9387     }
9388 
9389   /* Handle def.  */
9390   lhs = gimple_assign_lhs (stmt);
9391   mask = vect_create_destination_var (lhs, mask_type);
9392 
9393   /* Handle cmp expr.  */
9394   for (j = 0; j < ncopies; j++)
9395     {
9396       stmt_vec_info new_stmt_info = NULL;
9397       if (j == 0)
9398 	{
9399 	  if (slp_node)
9400 	    {
9401 	      auto_vec<tree, 2> ops;
9402 	      auto_vec<vec<tree>, 2> vec_defs;
9403 
9404 	      ops.safe_push (rhs1);
9405 	      ops.safe_push (rhs2);
9406 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
9407 	      vec_oprnds1 = vec_defs.pop ();
9408 	      vec_oprnds0 = vec_defs.pop ();
9409 	      if (swap_p)
9410 		std::swap (vec_oprnds0, vec_oprnds1);
9411 	    }
9412 	  else
9413 	    {
9414 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9415 						       vectype);
9416 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9417 						       vectype);
9418 	    }
9419 	}
9420       else
9421 	{
9422 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9423 						     vec_oprnds0.pop ());
9424 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9425 						     vec_oprnds1.pop ());
9426 	}
9427 
9428       if (!slp_node)
9429 	{
9430 	  if (swap_p)
9431 	    std::swap (vec_rhs1, vec_rhs2);
9432 	  vec_oprnds0.quick_push (vec_rhs1);
9433 	  vec_oprnds1.quick_push (vec_rhs2);
9434 	}
9435 
9436       /* Arguments are ready.  Create the new vector stmt.  */
9437       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9438 	{
9439 	  vec_rhs2 = vec_oprnds1[i];
9440 
9441 	  new_temp = make_ssa_name (mask);
9442 	  if (bitop1 == NOP_EXPR)
9443 	    {
9444 	      gassign *new_stmt = gimple_build_assign (new_temp, code,
9445 						       vec_rhs1, vec_rhs2);
9446 	      new_stmt_info
9447 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9448 	    }
9449 	  else
9450 	    {
9451 	      gassign *new_stmt;
9452 	      if (bitop1 == BIT_NOT_EXPR)
9453 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9454 	      else
9455 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9456 						vec_rhs2);
9457 	      new_stmt_info
9458 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9459 	      if (bitop2 != NOP_EXPR)
9460 		{
9461 		  tree res = make_ssa_name (mask);
9462 		  if (bitop2 == BIT_NOT_EXPR)
9463 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
9464 		  else
9465 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9466 						    new_temp);
9467 		  new_stmt_info
9468 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9469 		}
9470 	    }
9471 	  if (slp_node)
9472 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9473 	}
9474 
9475       if (slp_node)
9476 	continue;
9477 
9478       if (j == 0)
9479 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9480       else
9481 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9482 
9483       prev_stmt_info = new_stmt_info;
9484     }
9485 
9486   vec_oprnds0.release ();
9487   vec_oprnds1.release ();
9488 
9489   return true;
9490 }
9491 
9492 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9493    can handle all live statements in the node.  Otherwise return true
9494    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9495    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9496 
9497 static bool
can_vectorize_live_stmts(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,stmt_vec_info * vec_stmt,stmt_vector_for_cost * cost_vec)9498 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9499 			  slp_tree slp_node, stmt_vec_info *vec_stmt,
9500 			  stmt_vector_for_cost *cost_vec)
9501 {
9502   if (slp_node)
9503     {
9504       stmt_vec_info slp_stmt_info;
9505       unsigned int i;
9506       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9507 	{
9508 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
9509 	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9510 					       vec_stmt, cost_vec))
9511 	    return false;
9512 	}
9513     }
9514   else if (STMT_VINFO_LIVE_P (stmt_info)
9515 	   && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9516 					    vec_stmt, cost_vec))
9517     return false;
9518 
9519   return true;
9520 }
9521 
9522 /* Make sure the statement is vectorizable.  */
9523 
9524 opt_result
vect_analyze_stmt(stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)9525 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9526 		   slp_tree node, slp_instance node_instance,
9527 		   stmt_vector_for_cost *cost_vec)
9528 {
9529   vec_info *vinfo = stmt_info->vinfo;
9530   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9531   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9532   bool ok;
9533   gimple_seq pattern_def_seq;
9534 
9535   if (dump_enabled_p ())
9536     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9537 		     stmt_info->stmt);
9538 
9539   if (gimple_has_volatile_ops (stmt_info->stmt))
9540     return opt_result::failure_at (stmt_info->stmt,
9541 				   "not vectorized:"
9542 				   " stmt has volatile operands: %G\n",
9543 				   stmt_info->stmt);
9544 
9545   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9546       && node == NULL
9547       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9548     {
9549       gimple_stmt_iterator si;
9550 
9551       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9552 	{
9553 	  stmt_vec_info pattern_def_stmt_info
9554 	    = vinfo->lookup_stmt (gsi_stmt (si));
9555 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9556 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9557 	    {
9558 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
9559 	      if (dump_enabled_p ())
9560 		dump_printf_loc (MSG_NOTE, vect_location,
9561 				 "==> examining pattern def statement: %G",
9562 				 pattern_def_stmt_info->stmt);
9563 
9564 	      opt_result res
9565 		= vect_analyze_stmt (pattern_def_stmt_info,
9566 				     need_to_vectorize, node, node_instance,
9567 				     cost_vec);
9568 	      if (!res)
9569 		return res;
9570 	    }
9571 	}
9572     }
9573 
9574   /* Skip stmts that do not need to be vectorized. In loops this is expected
9575      to include:
9576      - the COND_EXPR which is the loop exit condition
9577      - any LABEL_EXPRs in the loop
9578      - computations that are used only for array indexing or loop control.
9579      In basic blocks we only analyze statements that are a part of some SLP
9580      instance, therefore, all the statements are relevant.
9581 
9582      Pattern statement needs to be analyzed instead of the original statement
9583      if the original statement is not relevant.  Otherwise, we analyze both
9584      statements.  In basic blocks we are called from some SLP instance
9585      traversal, don't analyze pattern stmts instead, the pattern stmts
9586      already will be part of SLP instance.  */
9587 
9588   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9589   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9590       && !STMT_VINFO_LIVE_P (stmt_info))
9591     {
9592       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9593 	  && pattern_stmt_info
9594 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9595 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9596         {
9597           /* Analyze PATTERN_STMT instead of the original stmt.  */
9598 	  stmt_info = pattern_stmt_info;
9599           if (dump_enabled_p ())
9600 	    dump_printf_loc (MSG_NOTE, vect_location,
9601 			     "==> examining pattern statement: %G",
9602 			     stmt_info->stmt);
9603         }
9604       else
9605         {
9606           if (dump_enabled_p ())
9607             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9608 
9609           return opt_result::success ();
9610         }
9611     }
9612   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9613 	   && node == NULL
9614 	   && pattern_stmt_info
9615 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9616 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9617     {
9618       /* Analyze PATTERN_STMT too.  */
9619       if (dump_enabled_p ())
9620 	dump_printf_loc (MSG_NOTE, vect_location,
9621 			 "==> examining pattern statement: %G",
9622 			 pattern_stmt_info->stmt);
9623 
9624       opt_result res
9625 	= vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9626 			     node_instance, cost_vec);
9627       if (!res)
9628 	return res;
9629    }
9630 
9631   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9632     {
9633       case vect_internal_def:
9634         break;
9635 
9636       case vect_reduction_def:
9637       case vect_nested_cycle:
9638          gcc_assert (!bb_vinfo
9639 		     && (relevance == vect_used_in_outer
9640 			 || relevance == vect_used_in_outer_by_reduction
9641 			 || relevance == vect_used_by_reduction
9642 			 || relevance == vect_unused_in_scope
9643 			 || relevance == vect_used_only_live));
9644          break;
9645 
9646       case vect_induction_def:
9647 	gcc_assert (!bb_vinfo);
9648 	break;
9649 
9650       case vect_constant_def:
9651       case vect_external_def:
9652       case vect_unknown_def_type:
9653       default:
9654         gcc_unreachable ();
9655     }
9656 
9657   if (STMT_VINFO_RELEVANT_P (stmt_info))
9658     {
9659       tree type = gimple_expr_type (stmt_info->stmt);
9660       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9661       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9662       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9663 		  || (call && gimple_call_lhs (call) == NULL_TREE));
9664       *need_to_vectorize = true;
9665     }
9666 
9667   if (PURE_SLP_STMT (stmt_info) && !node)
9668     {
9669       if (dump_enabled_p ())
9670 	dump_printf_loc (MSG_NOTE, vect_location,
9671 			 "handled only by SLP analysis\n");
9672       return opt_result::success ();
9673     }
9674 
9675   ok = true;
9676   if (!bb_vinfo
9677       && (STMT_VINFO_RELEVANT_P (stmt_info)
9678 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9679     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9680        -mveclibabi= takes preference over library functions with
9681        the simd attribute.  */
9682     ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9683 	  || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9684 					   cost_vec)
9685 	  || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9686 	  || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9687 	  || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9688 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9689 				cost_vec)
9690 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9691 	  || vectorizable_reduction (stmt_info, NULL, NULL, node,
9692 				     node_instance, cost_vec)
9693 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9694 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9695 	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9696 				     cost_vec)
9697 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
9698 				      cost_vec));
9699   else
9700     {
9701       if (bb_vinfo)
9702 	ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9703 	      || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9704 					       cost_vec)
9705 	      || vectorizable_conversion (stmt_info, NULL, NULL, node,
9706 					  cost_vec)
9707 	      || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9708 	      || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9709 	      || vectorizable_assignment (stmt_info, NULL, NULL, node,
9710 					  cost_vec)
9711 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9712 				    cost_vec)
9713 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9714 	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9715 					 cost_vec)
9716 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
9717 					  cost_vec));
9718     }
9719 
9720   if (!ok)
9721     return opt_result::failure_at (stmt_info->stmt,
9722 				   "not vectorized:"
9723 				   " relevant stmt not supported: %G",
9724 				   stmt_info->stmt);
9725 
9726   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9727       need extra handling, except for vectorizable reductions.  */
9728   if (!bb_vinfo
9729       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9730       && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9731     return opt_result::failure_at (stmt_info->stmt,
9732 				   "not vectorized:"
9733 				   " live stmt not supported: %G",
9734 				   stmt_info->stmt);
9735 
9736   return opt_result::success ();
9737 }
9738 
9739 
9740 /* Function vect_transform_stmt.
9741 
9742    Create a vectorized stmt to replace STMT_INFO, and insert it at BSI.  */
9743 
9744 bool
vect_transform_stmt(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)9745 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9746 		     slp_tree slp_node, slp_instance slp_node_instance)
9747 {
9748   vec_info *vinfo = stmt_info->vinfo;
9749   bool is_store = false;
9750   stmt_vec_info vec_stmt = NULL;
9751   bool done;
9752 
9753   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9754   stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9755 
9756   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9757 		   && nested_in_vect_loop_p
9758 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9759 			 stmt_info));
9760 
9761   gimple *stmt = stmt_info->stmt;
9762   switch (STMT_VINFO_TYPE (stmt_info))
9763     {
9764     case type_demotion_vec_info_type:
9765     case type_promotion_vec_info_type:
9766     case type_conversion_vec_info_type:
9767       done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9768 				      NULL);
9769       gcc_assert (done);
9770       break;
9771 
9772     case induc_vec_info_type:
9773       done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9774 				     NULL);
9775       gcc_assert (done);
9776       break;
9777 
9778     case shift_vec_info_type:
9779       done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9780       gcc_assert (done);
9781       break;
9782 
9783     case op_vec_info_type:
9784       done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9785 				     NULL);
9786       gcc_assert (done);
9787       break;
9788 
9789     case assignment_vec_info_type:
9790       done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9791 				      NULL);
9792       gcc_assert (done);
9793       break;
9794 
9795     case load_vec_info_type:
9796       done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9797                                 slp_node_instance, NULL);
9798       gcc_assert (done);
9799       break;
9800 
9801     case store_vec_info_type:
9802       done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9803       gcc_assert (done);
9804       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9805 	{
9806 	  /* In case of interleaving, the whole chain is vectorized when the
9807 	     last store in the chain is reached.  Store stmts before the last
9808 	     one are skipped, and there vec_stmt_info shouldn't be freed
9809 	     meanwhile.  */
9810 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9811 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9812 	    is_store = true;
9813 	}
9814       else
9815 	is_store = true;
9816       break;
9817 
9818     case condition_vec_info_type:
9819       done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9820 				     slp_node, NULL);
9821       gcc_assert (done);
9822       break;
9823 
9824     case comparison_vec_info_type:
9825       done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9826 				      slp_node, NULL);
9827       gcc_assert (done);
9828       break;
9829 
9830     case call_vec_info_type:
9831       done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9832       stmt = gsi_stmt (*gsi);
9833       break;
9834 
9835     case call_simd_clone_vec_info_type:
9836       done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9837 					   slp_node, NULL);
9838       stmt = gsi_stmt (*gsi);
9839       break;
9840 
9841     case reduc_vec_info_type:
9842       done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9843 				     slp_node_instance, NULL);
9844       gcc_assert (done);
9845       break;
9846 
9847     default:
9848       if (!STMT_VINFO_LIVE_P (stmt_info))
9849 	{
9850 	  if (dump_enabled_p ())
9851 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9852                              "stmt not supported.\n");
9853 	  gcc_unreachable ();
9854 	}
9855     }
9856 
9857   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9858      This would break hybrid SLP vectorization.  */
9859   if (slp_node)
9860     gcc_assert (!vec_stmt
9861 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9862 
9863   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9864      is being vectorized, but outside the immediately enclosing loop.  */
9865   if (vec_stmt
9866       && nested_p
9867       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9868       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9869           || STMT_VINFO_RELEVANT (stmt_info) ==
9870                                            vect_used_in_outer_by_reduction))
9871     {
9872       struct loop *innerloop = LOOP_VINFO_LOOP (
9873                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9874       imm_use_iterator imm_iter;
9875       use_operand_p use_p;
9876       tree scalar_dest;
9877 
9878       if (dump_enabled_p ())
9879         dump_printf_loc (MSG_NOTE, vect_location,
9880                          "Record the vdef for outer-loop vectorization.\n");
9881 
9882       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9883         (to be used when vectorizing outer-loop stmts that use the DEF of
9884         STMT).  */
9885       if (gimple_code (stmt) == GIMPLE_PHI)
9886         scalar_dest = PHI_RESULT (stmt);
9887       else
9888         scalar_dest = gimple_get_lhs (stmt);
9889 
9890       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9891 	if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9892 	  {
9893 	    stmt_vec_info exit_phi_info
9894 	      = vinfo->lookup_stmt (USE_STMT (use_p));
9895 	    STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9896 	  }
9897     }
9898 
9899   /* Handle stmts whose DEF is used outside the loop-nest that is
9900      being vectorized.  */
9901   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9902     {
9903       done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9904 				       NULL);
9905       gcc_assert (done);
9906     }
9907 
9908   if (vec_stmt)
9909     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9910 
9911   return is_store;
9912 }
9913 
9914 
9915 /* Remove a group of stores (for SLP or interleaving), free their
9916    stmt_vec_info.  */
9917 
9918 void
vect_remove_stores(stmt_vec_info first_stmt_info)9919 vect_remove_stores (stmt_vec_info first_stmt_info)
9920 {
9921   vec_info *vinfo = first_stmt_info->vinfo;
9922   stmt_vec_info next_stmt_info = first_stmt_info;
9923 
9924   while (next_stmt_info)
9925     {
9926       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9927       next_stmt_info = vect_orig_stmt (next_stmt_info);
9928       /* Free the attached stmt_vec_info and remove the stmt.  */
9929       vinfo->remove_stmt (next_stmt_info);
9930       next_stmt_info = tmp;
9931     }
9932 }
9933 
9934 /* Function get_vectype_for_scalar_type_and_size.
9935 
9936    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9937    by the target.  */
9938 
9939 tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)9940 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9941 {
9942   tree orig_scalar_type = scalar_type;
9943   scalar_mode inner_mode;
9944   machine_mode simd_mode;
9945   poly_uint64 nunits;
9946   tree vectype;
9947 
9948   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9949       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9950     return NULL_TREE;
9951 
9952   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9953 
9954   /* For vector types of elements whose mode precision doesn't
9955      match their types precision we use a element type of mode
9956      precision.  The vectorization routines will have to make sure
9957      they support the proper result truncation/extension.
9958      We also make sure to build vector types with INTEGER_TYPE
9959      component type only.  */
9960   if (INTEGRAL_TYPE_P (scalar_type)
9961       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9962 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
9963     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9964 						  TYPE_UNSIGNED (scalar_type));
9965 
9966   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9967      When the component mode passes the above test simply use a type
9968      corresponding to that mode.  The theory is that any use that
9969      would cause problems with this will disable vectorization anyway.  */
9970   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9971 	   && !INTEGRAL_TYPE_P (scalar_type))
9972     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9973 
9974   /* We can't build a vector type of elements with alignment bigger than
9975      their size.  */
9976   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9977     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9978 						  TYPE_UNSIGNED (scalar_type));
9979 
9980   /* If we felt back to using the mode fail if there was
9981      no scalar type for it.  */
9982   if (scalar_type == NULL_TREE)
9983     return NULL_TREE;
9984 
9985   /* If no size was supplied use the mode the target prefers.   Otherwise
9986      lookup a vector mode of the specified size.  */
9987   if (known_eq (size, 0U))
9988     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9989   else if (!multiple_p (size, nbytes, &nunits)
9990 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9991     return NULL_TREE;
9992   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
9993   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9994     return NULL_TREE;
9995 
9996   vectype = build_vector_type (scalar_type, nunits);
9997 
9998   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9999       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10000     return NULL_TREE;
10001 
10002   /* Re-attach the address-space qualifier if we canonicalized the scalar
10003      type.  */
10004   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10005     return build_qualified_type
10006 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10007 
10008   return vectype;
10009 }
10010 
10011 poly_uint64 current_vector_size;
10012 
10013 /* Function get_vectype_for_scalar_type.
10014 
10015    Returns the vector type corresponding to SCALAR_TYPE as supported
10016    by the target.  */
10017 
10018 tree
get_vectype_for_scalar_type(tree scalar_type)10019 get_vectype_for_scalar_type (tree scalar_type)
10020 {
10021   tree vectype;
10022   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10023 						  current_vector_size);
10024   if (vectype
10025       && known_eq (current_vector_size, 0U))
10026     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10027   return vectype;
10028 }
10029 
10030 /* Function get_mask_type_for_scalar_type.
10031 
10032    Returns the mask type corresponding to a result of comparison
10033    of vectors of specified SCALAR_TYPE as supported by target.  */
10034 
10035 tree
get_mask_type_for_scalar_type(tree scalar_type)10036 get_mask_type_for_scalar_type (tree scalar_type)
10037 {
10038   tree vectype = get_vectype_for_scalar_type (scalar_type);
10039 
10040   if (!vectype)
10041     return NULL;
10042 
10043   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10044 				  current_vector_size);
10045 }
10046 
10047 /* Function get_same_sized_vectype
10048 
10049    Returns a vector type corresponding to SCALAR_TYPE of size
10050    VECTOR_TYPE if supported by the target.  */
10051 
10052 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)10053 get_same_sized_vectype (tree scalar_type, tree vector_type)
10054 {
10055   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10056     return build_same_sized_truth_vector_type (vector_type);
10057 
10058   return get_vectype_for_scalar_type_and_size
10059 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10060 }
10061 
10062 /* Function vect_is_simple_use.
10063 
10064    Input:
10065    VINFO - the vect info of the loop or basic block that is being vectorized.
10066    OPERAND - operand in the loop or bb.
10067    Output:
10068    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10069      case OPERAND is an SSA_NAME that is defined in the vectorizable region
10070    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10071      the definition could be anywhere in the function
10072    DT - the type of definition
10073 
10074    Returns whether a stmt with OPERAND can be vectorized.
10075    For loops, supportable operands are constants, loop invariants, and operands
10076    that are defined by the current iteration of the loop.  Unsupportable
10077    operands are those that are defined by a previous iteration of the loop (as
10078    is the case in reduction/induction computations).
10079    For basic blocks, supportable operands are constants and bb invariants.
10080    For now, operands defined outside the basic block are not supported.  */
10081 
10082 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)10083 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10084 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10085 {
10086   if (def_stmt_info_out)
10087     *def_stmt_info_out = NULL;
10088   if (def_stmt_out)
10089     *def_stmt_out = NULL;
10090   *dt = vect_unknown_def_type;
10091 
10092   if (dump_enabled_p ())
10093     {
10094       dump_printf_loc (MSG_NOTE, vect_location,
10095                        "vect_is_simple_use: operand ");
10096       if (TREE_CODE (operand) == SSA_NAME
10097 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
10098 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10099       else
10100 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10101     }
10102 
10103   if (CONSTANT_CLASS_P (operand))
10104     *dt = vect_constant_def;
10105   else if (is_gimple_min_invariant (operand))
10106     *dt = vect_external_def;
10107   else if (TREE_CODE (operand) != SSA_NAME)
10108     *dt = vect_unknown_def_type;
10109   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10110     *dt = vect_external_def;
10111   else
10112     {
10113       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10114       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10115       if (!stmt_vinfo)
10116 	*dt = vect_external_def;
10117       else
10118 	{
10119 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10120 	  def_stmt = stmt_vinfo->stmt;
10121 	  switch (gimple_code (def_stmt))
10122 	    {
10123 	    case GIMPLE_PHI:
10124 	    case GIMPLE_ASSIGN:
10125 	    case GIMPLE_CALL:
10126 	      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10127 	      break;
10128 	    default:
10129 	      *dt = vect_unknown_def_type;
10130 	      break;
10131 	    }
10132 	  if (def_stmt_info_out)
10133 	    *def_stmt_info_out = stmt_vinfo;
10134 	}
10135       if (def_stmt_out)
10136 	*def_stmt_out = def_stmt;
10137     }
10138 
10139   if (dump_enabled_p ())
10140     {
10141       dump_printf (MSG_NOTE, ", type of def: ");
10142       switch (*dt)
10143 	{
10144 	case vect_uninitialized_def:
10145 	  dump_printf (MSG_NOTE, "uninitialized\n");
10146 	  break;
10147 	case vect_constant_def:
10148 	  dump_printf (MSG_NOTE, "constant\n");
10149 	  break;
10150 	case vect_external_def:
10151 	  dump_printf (MSG_NOTE, "external\n");
10152 	  break;
10153 	case vect_internal_def:
10154 	  dump_printf (MSG_NOTE, "internal\n");
10155 	  break;
10156 	case vect_induction_def:
10157 	  dump_printf (MSG_NOTE, "induction\n");
10158 	  break;
10159 	case vect_reduction_def:
10160 	  dump_printf (MSG_NOTE, "reduction\n");
10161 	  break;
10162 	case vect_double_reduction_def:
10163 	  dump_printf (MSG_NOTE, "double reduction\n");
10164 	  break;
10165 	case vect_nested_cycle:
10166 	  dump_printf (MSG_NOTE, "nested cycle\n");
10167 	  break;
10168 	case vect_unknown_def_type:
10169 	  dump_printf (MSG_NOTE, "unknown\n");
10170 	  break;
10171 	}
10172     }
10173 
10174   if (*dt == vect_unknown_def_type)
10175     {
10176       if (dump_enabled_p ())
10177         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10178                          "Unsupported pattern.\n");
10179       return false;
10180     }
10181 
10182   return true;
10183 }
10184 
10185 /* Function vect_is_simple_use.
10186 
10187    Same as vect_is_simple_use but also determines the vector operand
10188    type of OPERAND and stores it to *VECTYPE.  If the definition of
10189    OPERAND is vect_uninitialized_def, vect_constant_def or
10190    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10191    is responsible to compute the best suited vector type for the
10192    scalar operand.  */
10193 
10194 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)10195 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10196 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
10197 		    gimple **def_stmt_out)
10198 {
10199   stmt_vec_info def_stmt_info;
10200   gimple *def_stmt;
10201   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10202     return false;
10203 
10204   if (def_stmt_out)
10205     *def_stmt_out = def_stmt;
10206   if (def_stmt_info_out)
10207     *def_stmt_info_out = def_stmt_info;
10208 
10209   /* Now get a vector type if the def is internal, otherwise supply
10210      NULL_TREE and leave it up to the caller to figure out a proper
10211      type for the use stmt.  */
10212   if (*dt == vect_internal_def
10213       || *dt == vect_induction_def
10214       || *dt == vect_reduction_def
10215       || *dt == vect_double_reduction_def
10216       || *dt == vect_nested_cycle)
10217     {
10218       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10219       gcc_assert (*vectype != NULL_TREE);
10220       if (dump_enabled_p ())
10221 	dump_printf_loc (MSG_NOTE, vect_location,
10222 			 "vect_is_simple_use: vectype %T\n", *vectype);
10223     }
10224   else if (*dt == vect_uninitialized_def
10225 	   || *dt == vect_constant_def
10226 	   || *dt == vect_external_def)
10227     *vectype = NULL_TREE;
10228   else
10229     gcc_unreachable ();
10230 
10231   return true;
10232 }
10233 
10234 
10235 /* Function supportable_widening_operation
10236 
10237    Check whether an operation represented by the code CODE is a
10238    widening operation that is supported by the target platform in
10239    vector form (i.e., when operating on arguments of type VECTYPE_IN
10240    producing a result of type VECTYPE_OUT).
10241 
10242    Widening operations we currently support are NOP (CONVERT), FLOAT,
10243    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
10244    are supported by the target platform either directly (via vector
10245    tree-codes), or via target builtins.
10246 
10247    Output:
10248    - CODE1 and CODE2 are codes of vector operations to be used when
10249    vectorizing the operation, if available.
10250    - MULTI_STEP_CVT determines the number of required intermediate steps in
10251    case of multi-step conversion (like char->short->int - in that case
10252    MULTI_STEP_CVT will be 1).
10253    - INTERM_TYPES contains the intermediate type required to perform the
10254    widening operation (short in the above example).  */
10255 
10256 bool
supportable_widening_operation(enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)10257 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10258 				tree vectype_out, tree vectype_in,
10259                                 enum tree_code *code1, enum tree_code *code2,
10260                                 int *multi_step_cvt,
10261                                 vec<tree> *interm_types)
10262 {
10263   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10264   struct loop *vect_loop = NULL;
10265   machine_mode vec_mode;
10266   enum insn_code icode1, icode2;
10267   optab optab1, optab2;
10268   tree vectype = vectype_in;
10269   tree wide_vectype = vectype_out;
10270   enum tree_code c1, c2;
10271   int i;
10272   tree prev_type, intermediate_type;
10273   machine_mode intermediate_mode, prev_mode;
10274   optab optab3, optab4;
10275 
10276   *multi_step_cvt = 0;
10277   if (loop_info)
10278     vect_loop = LOOP_VINFO_LOOP (loop_info);
10279 
10280   switch (code)
10281     {
10282     case WIDEN_MULT_EXPR:
10283       /* The result of a vectorized widening operation usually requires
10284 	 two vectors (because the widened results do not fit into one vector).
10285 	 The generated vector results would normally be expected to be
10286 	 generated in the same order as in the original scalar computation,
10287 	 i.e. if 8 results are generated in each vector iteration, they are
10288 	 to be organized as follows:
10289 		vect1: [res1,res2,res3,res4],
10290 		vect2: [res5,res6,res7,res8].
10291 
10292 	 However, in the special case that the result of the widening
10293 	 operation is used in a reduction computation only, the order doesn't
10294 	 matter (because when vectorizing a reduction we change the order of
10295 	 the computation).  Some targets can take advantage of this and
10296 	 generate more efficient code.  For example, targets like Altivec,
10297 	 that support widen_mult using a sequence of {mult_even,mult_odd}
10298 	 generate the following vectors:
10299 		vect1: [res1,res3,res5,res7],
10300 		vect2: [res2,res4,res6,res8].
10301 
10302 	 When vectorizing outer-loops, we execute the inner-loop sequentially
10303 	 (each vectorized inner-loop iteration contributes to VF outer-loop
10304 	 iterations in parallel).  We therefore don't allow to change the
10305 	 order of the computation in the inner-loop during outer-loop
10306 	 vectorization.  */
10307       /* TODO: Another case in which order doesn't *really* matter is when we
10308 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
10309 	 Normally, pack_trunc performs an even/odd permute, whereas the
10310 	 repack from an even/odd expansion would be an interleave, which
10311 	 would be significantly simpler for e.g. AVX2.  */
10312       /* In any case, in order to avoid duplicating the code below, recurse
10313 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10314 	 are properly set up for the caller.  If we fail, we'll continue with
10315 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10316       if (vect_loop
10317 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10318 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
10319 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10320 					     stmt_info, vectype_out,
10321 					     vectype_in, code1, code2,
10322 					     multi_step_cvt, interm_types))
10323         {
10324           /* Elements in a vector with vect_used_by_reduction property cannot
10325              be reordered if the use chain with this property does not have the
10326              same operation.  One such an example is s += a * b, where elements
10327              in a and b cannot be reordered.  Here we check if the vector defined
10328              by STMT is only directly used in the reduction statement.  */
10329 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
10330 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10331 	  if (use_stmt_info
10332 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10333 	    return true;
10334         }
10335       c1 = VEC_WIDEN_MULT_LO_EXPR;
10336       c2 = VEC_WIDEN_MULT_HI_EXPR;
10337       break;
10338 
10339     case DOT_PROD_EXPR:
10340       c1 = DOT_PROD_EXPR;
10341       c2 = DOT_PROD_EXPR;
10342       break;
10343 
10344     case SAD_EXPR:
10345       c1 = SAD_EXPR;
10346       c2 = SAD_EXPR;
10347       break;
10348 
10349     case VEC_WIDEN_MULT_EVEN_EXPR:
10350       /* Support the recursion induced just above.  */
10351       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10352       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10353       break;
10354 
10355     case WIDEN_LSHIFT_EXPR:
10356       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10357       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10358       break;
10359 
10360     CASE_CONVERT:
10361       c1 = VEC_UNPACK_LO_EXPR;
10362       c2 = VEC_UNPACK_HI_EXPR;
10363       break;
10364 
10365     case FLOAT_EXPR:
10366       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10367       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10368       break;
10369 
10370     case FIX_TRUNC_EXPR:
10371       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10372       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10373       break;
10374 
10375     default:
10376       gcc_unreachable ();
10377     }
10378 
10379   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10380     std::swap (c1, c2);
10381 
10382   if (code == FIX_TRUNC_EXPR)
10383     {
10384       /* The signedness is determined from output operand.  */
10385       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10386       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10387     }
10388   else if (CONVERT_EXPR_CODE_P (code)
10389 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10390 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
10391 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10392 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10393     {
10394       /* If the input and result modes are the same, a different optab
10395 	 is needed where we pass in the number of units in vectype.  */
10396       optab1 = vec_unpacks_sbool_lo_optab;
10397       optab2 = vec_unpacks_sbool_hi_optab;
10398     }
10399   else
10400     {
10401       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10402       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10403     }
10404 
10405   if (!optab1 || !optab2)
10406     return false;
10407 
10408   vec_mode = TYPE_MODE (vectype);
10409   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10410        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10411     return false;
10412 
10413   *code1 = c1;
10414   *code2 = c2;
10415 
10416   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10417       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10418     {
10419       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10420 	return true;
10421       /* For scalar masks we may have different boolean
10422 	 vector types having the same QImode.  Thus we
10423 	 add additional check for elements number.  */
10424       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10425 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10426 	return true;
10427     }
10428 
10429   /* Check if it's a multi-step conversion that can be done using intermediate
10430      types.  */
10431 
10432   prev_type = vectype;
10433   prev_mode = vec_mode;
10434 
10435   if (!CONVERT_EXPR_CODE_P (code))
10436     return false;
10437 
10438   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10439      intermediate steps in promotion sequence.  We try
10440      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10441      not.  */
10442   interm_types->create (MAX_INTERM_CVT_STEPS);
10443   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10444     {
10445       intermediate_mode = insn_data[icode1].operand[0].mode;
10446       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10447 	{
10448 	  intermediate_type = vect_halve_mask_nunits (prev_type);
10449 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10450 	    return false;
10451 	}
10452       else
10453 	intermediate_type
10454 	  = lang_hooks.types.type_for_mode (intermediate_mode,
10455 					    TYPE_UNSIGNED (prev_type));
10456 
10457       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10458 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
10459 	  && intermediate_mode == prev_mode
10460 	  && SCALAR_INT_MODE_P (prev_mode))
10461 	{
10462 	  /* If the input and result modes are the same, a different optab
10463 	     is needed where we pass in the number of units in vectype.  */
10464 	  optab3 = vec_unpacks_sbool_lo_optab;
10465 	  optab4 = vec_unpacks_sbool_hi_optab;
10466 	}
10467       else
10468 	{
10469 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10470 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10471 	}
10472 
10473       if (!optab3 || !optab4
10474           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10475 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10476 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10477 	  || insn_data[icode2].operand[0].mode != intermediate_mode
10478 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
10479 	      == CODE_FOR_nothing)
10480 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
10481 	      == CODE_FOR_nothing))
10482 	break;
10483 
10484       interm_types->quick_push (intermediate_type);
10485       (*multi_step_cvt)++;
10486 
10487       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10488 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10489 	{
10490 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10491 	    return true;
10492 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10493 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10494 	    return true;
10495 	}
10496 
10497       prev_type = intermediate_type;
10498       prev_mode = intermediate_mode;
10499     }
10500 
10501   interm_types->release ();
10502   return false;
10503 }
10504 
10505 
10506 /* Function supportable_narrowing_operation
10507 
10508    Check whether an operation represented by the code CODE is a
10509    narrowing operation that is supported by the target platform in
10510    vector form (i.e., when operating on arguments of type VECTYPE_IN
10511    and producing a result of type VECTYPE_OUT).
10512 
10513    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10514    and FLOAT.  This function checks if these operations are supported by
10515    the target platform directly via vector tree-codes.
10516 
10517    Output:
10518    - CODE1 is the code of a vector operation to be used when
10519    vectorizing the operation, if available.
10520    - MULTI_STEP_CVT determines the number of required intermediate steps in
10521    case of multi-step conversion (like int->short->char - in that case
10522    MULTI_STEP_CVT will be 1).
10523    - INTERM_TYPES contains the intermediate type required to perform the
10524    narrowing operation (short in the above example).   */
10525 
10526 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)10527 supportable_narrowing_operation (enum tree_code code,
10528 				 tree vectype_out, tree vectype_in,
10529 				 enum tree_code *code1, int *multi_step_cvt,
10530                                  vec<tree> *interm_types)
10531 {
10532   machine_mode vec_mode;
10533   enum insn_code icode1;
10534   optab optab1, interm_optab;
10535   tree vectype = vectype_in;
10536   tree narrow_vectype = vectype_out;
10537   enum tree_code c1;
10538   tree intermediate_type, prev_type;
10539   machine_mode intermediate_mode, prev_mode;
10540   int i;
10541   bool uns;
10542 
10543   *multi_step_cvt = 0;
10544   switch (code)
10545     {
10546     CASE_CONVERT:
10547       c1 = VEC_PACK_TRUNC_EXPR;
10548       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10549 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
10550 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10551 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10552 	optab1 = vec_pack_sbool_trunc_optab;
10553       else
10554 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
10555       break;
10556 
10557     case FIX_TRUNC_EXPR:
10558       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10559       /* The signedness is determined from output operand.  */
10560       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10561       break;
10562 
10563     case FLOAT_EXPR:
10564       c1 = VEC_PACK_FLOAT_EXPR;
10565       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10566       break;
10567 
10568     default:
10569       gcc_unreachable ();
10570     }
10571 
10572   if (!optab1)
10573     return false;
10574 
10575   vec_mode = TYPE_MODE (vectype);
10576   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10577     return false;
10578 
10579   *code1 = c1;
10580 
10581   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10582     {
10583       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10584 	return true;
10585       /* For scalar masks we may have different boolean
10586 	 vector types having the same QImode.  Thus we
10587 	 add additional check for elements number.  */
10588       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10589 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10590 	return true;
10591     }
10592 
10593   if (code == FLOAT_EXPR)
10594     return false;
10595 
10596   /* Check if it's a multi-step conversion that can be done using intermediate
10597      types.  */
10598   prev_mode = vec_mode;
10599   prev_type = vectype;
10600   if (code == FIX_TRUNC_EXPR)
10601     uns = TYPE_UNSIGNED (vectype_out);
10602   else
10603     uns = TYPE_UNSIGNED (vectype);
10604 
10605   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10606      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10607      costly than signed.  */
10608   if (code == FIX_TRUNC_EXPR && uns)
10609     {
10610       enum insn_code icode2;
10611 
10612       intermediate_type
10613 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10614       interm_optab
10615 	= optab_for_tree_code (c1, intermediate_type, optab_default);
10616       if (interm_optab != unknown_optab
10617 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10618 	  && insn_data[icode1].operand[0].mode
10619 	     == insn_data[icode2].operand[0].mode)
10620 	{
10621 	  uns = false;
10622 	  optab1 = interm_optab;
10623 	  icode1 = icode2;
10624 	}
10625     }
10626 
10627   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10628      intermediate steps in promotion sequence.  We try
10629      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10630   interm_types->create (MAX_INTERM_CVT_STEPS);
10631   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10632     {
10633       intermediate_mode = insn_data[icode1].operand[0].mode;
10634       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10635 	{
10636 	  intermediate_type = vect_double_mask_nunits (prev_type);
10637 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10638 	    return false;
10639 	}
10640       else
10641 	intermediate_type
10642 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10643       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10644 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
10645 	  && intermediate_mode == prev_mode
10646 	  && SCALAR_INT_MODE_P (prev_mode))
10647 	interm_optab = vec_pack_sbool_trunc_optab;
10648       else
10649 	interm_optab
10650 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10651 				 optab_default);
10652       if (!interm_optab
10653 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10654 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10655 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10656 	      == CODE_FOR_nothing))
10657 	break;
10658 
10659       interm_types->quick_push (intermediate_type);
10660       (*multi_step_cvt)++;
10661 
10662       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10663 	{
10664 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10665 	    return true;
10666 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10667 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10668 	    return true;
10669 	}
10670 
10671       prev_mode = intermediate_mode;
10672       prev_type = intermediate_type;
10673       optab1 = interm_optab;
10674     }
10675 
10676   interm_types->release ();
10677   return false;
10678 }
10679 
10680 /* Generate and return a statement that sets vector mask MASK such that
10681    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10682 
10683 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)10684 vect_gen_while (tree mask, tree start_index, tree end_index)
10685 {
10686   tree cmp_type = TREE_TYPE (start_index);
10687   tree mask_type = TREE_TYPE (mask);
10688   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10689 						       cmp_type, mask_type,
10690 						       OPTIMIZE_FOR_SPEED));
10691   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10692 					    start_index, end_index,
10693 					    build_zero_cst (mask_type));
10694   gimple_call_set_lhs (call, mask);
10695   return call;
10696 }
10697 
10698 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10699    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10700 
10701 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)10702 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10703 		    tree end_index)
10704 {
10705   tree tmp = make_ssa_name (mask_type);
10706   gcall *call = vect_gen_while (tmp, start_index, end_index);
10707   gimple_seq_add_stmt (seq, call);
10708   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10709 }
10710 
10711 /* Try to compute the vector types required to vectorize STMT_INFO,
10712    returning true on success and false if vectorization isn't possible.
10713 
10714    On success:
10715 
10716    - Set *STMT_VECTYPE_OUT to:
10717      - NULL_TREE if the statement doesn't need to be vectorized;
10718      - boolean_type_node if the statement is a boolean operation whose
10719        vector type can only be determined once all the other vector types
10720        are known; and
10721      - the equivalent of STMT_VINFO_VECTYPE otherwise.
10722 
10723    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10724      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10725      statement does not help to determine the overall number of units.  */
10726 
10727 opt_result
vect_get_vector_types_for_stmt(stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out)10728 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10729 				tree *stmt_vectype_out,
10730 				tree *nunits_vectype_out)
10731 {
10732   gimple *stmt = stmt_info->stmt;
10733 
10734   *stmt_vectype_out = NULL_TREE;
10735   *nunits_vectype_out = NULL_TREE;
10736 
10737   if (gimple_get_lhs (stmt) == NULL_TREE
10738       /* MASK_STORE has no lhs, but is ok.  */
10739       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10740     {
10741       if (is_a <gcall *> (stmt))
10742 	{
10743 	  /* Ignore calls with no lhs.  These must be calls to
10744 	     #pragma omp simd functions, and what vectorization factor
10745 	     it really needs can't be determined until
10746 	     vectorizable_simd_clone_call.  */
10747 	  if (dump_enabled_p ())
10748 	    dump_printf_loc (MSG_NOTE, vect_location,
10749 			     "defer to SIMD clone analysis.\n");
10750 	  return opt_result::success ();
10751 	}
10752 
10753       return opt_result::failure_at (stmt,
10754 				     "not vectorized: irregular stmt.%G", stmt);
10755     }
10756 
10757   if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10758     return opt_result::failure_at (stmt,
10759 				   "not vectorized: vector stmt in loop:%G",
10760 				   stmt);
10761 
10762   tree vectype;
10763   tree scalar_type = NULL_TREE;
10764   if (STMT_VINFO_VECTYPE (stmt_info))
10765     *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10766   else
10767     {
10768       gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10769       if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10770 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10771       else
10772 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10773 
10774       /* Pure bool ops don't participate in number-of-units computation.
10775 	 For comparisons use the types being compared.  */
10776       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10777 	  && is_gimple_assign (stmt)
10778 	  && gimple_assign_rhs_code (stmt) != COND_EXPR)
10779 	{
10780 	  *stmt_vectype_out = boolean_type_node;
10781 
10782 	  tree rhs1 = gimple_assign_rhs1 (stmt);
10783 	  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10784 	      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10785 	    scalar_type = TREE_TYPE (rhs1);
10786 	  else
10787 	    {
10788 	      if (dump_enabled_p ())
10789 		dump_printf_loc (MSG_NOTE, vect_location,
10790 				 "pure bool operation.\n");
10791 	      return opt_result::success ();
10792 	    }
10793 	}
10794 
10795       if (dump_enabled_p ())
10796 	dump_printf_loc (MSG_NOTE, vect_location,
10797 			 "get vectype for scalar type:  %T\n", scalar_type);
10798       vectype = get_vectype_for_scalar_type (scalar_type);
10799       if (!vectype)
10800 	return opt_result::failure_at (stmt,
10801 				       "not vectorized:"
10802 				       " unsupported data-type %T\n",
10803 				       scalar_type);
10804 
10805       if (!*stmt_vectype_out)
10806 	*stmt_vectype_out = vectype;
10807 
10808       if (dump_enabled_p ())
10809 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10810     }
10811 
10812   /* Don't try to compute scalar types if the stmt produces a boolean
10813      vector; use the existing vector type instead.  */
10814   tree nunits_vectype;
10815   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10816     nunits_vectype = vectype;
10817   else
10818     {
10819       /* The number of units is set according to the smallest scalar
10820 	 type (or the largest vector size, but we only support one
10821 	 vector size per vectorization).  */
10822       if (*stmt_vectype_out != boolean_type_node)
10823 	{
10824 	  HOST_WIDE_INT dummy;
10825 	  scalar_type = vect_get_smallest_scalar_type (stmt_info,
10826 						       &dummy, &dummy);
10827 	}
10828       if (dump_enabled_p ())
10829 	dump_printf_loc (MSG_NOTE, vect_location,
10830 			 "get vectype for scalar type:  %T\n", scalar_type);
10831       nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10832     }
10833   if (!nunits_vectype)
10834     return opt_result::failure_at (stmt,
10835 				   "not vectorized: unsupported data-type %T\n",
10836 				   scalar_type);
10837 
10838   if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10839 		GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10840     return opt_result::failure_at (stmt,
10841 				   "not vectorized: different sized vector "
10842 				   "types in statement, %T and %T\n",
10843 				   vectype, nunits_vectype);
10844 
10845   if (dump_enabled_p ())
10846     {
10847       dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10848 		       nunits_vectype);
10849 
10850       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10851       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10852       dump_printf (MSG_NOTE, "\n");
10853     }
10854 
10855   *nunits_vectype_out = nunits_vectype;
10856   return opt_result::success ();
10857 }
10858 
10859 /* Try to determine the correct vector type for STMT_INFO, which is a
10860    statement that produces a scalar boolean result.  Return the vector
10861    type on success, otherwise return NULL_TREE.  */
10862 
10863 opt_tree
vect_get_mask_type_for_stmt(stmt_vec_info stmt_info)10864 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10865 {
10866   gimple *stmt = stmt_info->stmt;
10867   tree mask_type = NULL;
10868   tree vectype, scalar_type;
10869 
10870   if (is_gimple_assign (stmt)
10871       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10872       && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10873     {
10874       scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10875       mask_type = get_mask_type_for_scalar_type (scalar_type);
10876 
10877       if (!mask_type)
10878 	return opt_tree::failure_at (stmt,
10879 				     "not vectorized: unsupported mask\n");
10880     }
10881   else
10882     {
10883       tree rhs;
10884       ssa_op_iter iter;
10885       enum vect_def_type dt;
10886 
10887       FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10888 	{
10889 	  if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10890 	    return opt_tree::failure_at (stmt,
10891 					 "not vectorized:can't compute mask"
10892 					 " type for statement, %G", stmt);
10893 
10894 	  /* No vectype probably means external definition.
10895 	     Allow it in case there is another operand which
10896 	     allows to determine mask type.  */
10897 	  if (!vectype)
10898 	    continue;
10899 
10900 	  if (!mask_type)
10901 	    mask_type = vectype;
10902 	  else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10903 			     TYPE_VECTOR_SUBPARTS (vectype)))
10904 	    return opt_tree::failure_at (stmt,
10905 					 "not vectorized: different sized mask"
10906 					 " types in statement, %T and %T\n",
10907 					 mask_type, vectype);
10908 	  else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10909 		   != VECTOR_BOOLEAN_TYPE_P (vectype))
10910 	    return opt_tree::failure_at (stmt,
10911 					 "not vectorized: mixed mask and "
10912 					 "nonmask vector types in statement, "
10913 					 "%T and %T\n",
10914 					 mask_type, vectype);
10915 	}
10916 
10917       /* We may compare boolean value loaded as vector of integers.
10918 	 Fix mask_type in such case.  */
10919       if (mask_type
10920 	  && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10921 	  && gimple_code (stmt) == GIMPLE_ASSIGN
10922 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10923 	mask_type = build_same_sized_truth_vector_type (mask_type);
10924     }
10925 
10926   /* No mask_type should mean loop invariant predicate.
10927      This is probably a subject for optimization in if-conversion.  */
10928   if (!mask_type)
10929     return opt_tree::failure_at (stmt,
10930 				 "not vectorized: can't compute mask type "
10931 				 "for statement: %G", stmt);
10932 
10933   return opt_tree::success (mask_type);
10934 }
10935