1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55 
56 /* For lang_hooks.types.type_for_mode.  */
57 #include "langhooks.h"
58 
59 /* Return the vectorized type for the given statement.  */
60 
61 tree
stmt_vectype(struct _stmt_vec_info * stmt_info)62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64   return STMT_VINFO_VECTYPE (stmt_info);
65 }
66 
67 /* Return TRUE iff the given statement is in an inner loop relative to
68    the loop being vectorized.  */
69 bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72   gimple *stmt = STMT_VINFO_STMT (stmt_info);
73   basic_block bb = gimple_bb (stmt);
74   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75   struct loop* loop;
76 
77   if (!loop_vinfo)
78     return false;
79 
80   loop = LOOP_VINFO_LOOP (loop_vinfo);
81 
82   return (bb->loop_father == loop->inner);
83 }
84 
85 /* Record the cost of a statement, either by directly informing the
86    target model or by saving it in a vector for later processing.
87    Return a preliminary estimate of the statement's cost.  */
88 
89 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 		  int misalign, enum vect_cost_model_location where)
93 {
94   if ((kind == vector_load || kind == unaligned_load)
95       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96     kind = vector_gather_load;
97   if ((kind == vector_store || kind == unaligned_store)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_scatter_store;
100   if (body_cost_vec)
101     {
102       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103       stmt_info_for_cost si = { count, kind,
104 			        stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 				misalign };
106       body_cost_vec->safe_push (si);
107       return (unsigned)
108 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
109     }
110   else
111     return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 			  count, kind, stmt_info, misalign, where);
113 }
114 
115 /* Return a variable of type ELEM_TYPE[NELEMS].  */
116 
117 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 {
120   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 			 "vect_array");
122 }
123 
124 /* ARRAY is an array of vectors created by create_vector_array.
125    Return an SSA_NAME for the vector in index N.  The reference
126    is part of the vectorization of STMT and the vector is associated
127    with scalar destination SCALAR_DEST.  */
128 
129 static tree
read_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 		   tree array, unsigned HOST_WIDE_INT n)
132 {
133   tree vect_type, vect, vect_name, array_ref;
134   gimple *new_stmt;
135 
136   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137   vect_type = TREE_TYPE (TREE_TYPE (array));
138   vect = vect_create_destination_var (scalar_dest, vect_type);
139   array_ref = build4 (ARRAY_REF, vect_type, array,
140 		      build_int_cst (size_type_node, n),
141 		      NULL_TREE, NULL_TREE);
142 
143   new_stmt = gimple_build_assign (vect, array_ref);
144   vect_name = make_ssa_name (vect, new_stmt);
145   gimple_assign_set_lhs (new_stmt, vect_name);
146   vect_finish_stmt_generation (stmt, new_stmt, gsi);
147 
148   return vect_name;
149 }
150 
151 /* ARRAY is an array of vectors created by create_vector_array.
152    Emit code to store SSA_NAME VECT in index N of the array.
153    The store is part of the vectorization of STMT.  */
154 
155 static void
write_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 		    tree array, unsigned HOST_WIDE_INT n)
158 {
159   tree array_ref;
160   gimple *new_stmt;
161 
162   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 		      build_int_cst (size_type_node, n),
164 		      NULL_TREE, NULL_TREE);
165 
166   new_stmt = gimple_build_assign (array_ref, vect);
167   vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 }
169 
170 /* PTR is a pointer to an array of type TYPE.  Return a representation
171    of *PTR.  The memory reference replaces those in FIRST_DR
172    (and its group).  */
173 
174 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 {
177   tree mem_ref;
178 
179   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180   /* Arrays have the same alignment as their type.  */
181   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182   return mem_ref;
183 }
184 
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
186 
187 /* Function vect_mark_relevant.
188 
189    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
190 
191 static void
vect_mark_relevant(vec<gimple * > * worklist,gimple * stmt,enum vect_relevant relevant,bool live_p)192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 		    enum vect_relevant relevant, bool live_p)
194 {
195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198   gimple *pattern_stmt;
199 
200   if (dump_enabled_p ())
201     {
202       dump_printf_loc (MSG_NOTE, vect_location,
203 		       "mark relevant %d, live %d: ", relevant, live_p);
204       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205     }
206 
207   /* If this stmt is an original stmt in a pattern, we might need to mark its
208      related pattern stmt instead of the original stmt.  However, such stmts
209      may have their own uses that are not in any pattern, in such cases the
210      stmt itself should be marked.  */
211   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212     {
213       /* This is the last stmt in a sequence that was detected as a
214 	 pattern that can potentially be vectorized.  Don't mark the stmt
215 	 as relevant/live because it's not going to be vectorized.
216 	 Instead mark the pattern-stmt that replaces it.  */
217 
218       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219 
220       if (dump_enabled_p ())
221 	dump_printf_loc (MSG_NOTE, vect_location,
222 			 "last stmt in pattern. don't mark"
223 			 " relevant/live.\n");
224       stmt_info = vinfo_for_stmt (pattern_stmt);
225       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228       stmt = pattern_stmt;
229     }
230 
231   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233     STMT_VINFO_RELEVANT (stmt_info) = relevant;
234 
235   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237     {
238       if (dump_enabled_p ())
239         dump_printf_loc (MSG_NOTE, vect_location,
240                          "already marked relevant/live.\n");
241       return;
242     }
243 
244   worklist->safe_push (stmt);
245 }
246 
247 
248 /* Function is_simple_and_all_uses_invariant
249 
250    Return true if STMT is simple and all uses of it are invariant.  */
251 
252 bool
is_simple_and_all_uses_invariant(gimple * stmt,loop_vec_info loop_vinfo)253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 {
255   tree op;
256   gimple *def_stmt;
257   ssa_op_iter iter;
258 
259   if (!is_gimple_assign (stmt))
260     return false;
261 
262   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263     {
264       enum vect_def_type dt = vect_uninitialized_def;
265 
266       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 	{
268 	  if (dump_enabled_p ())
269 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 			     "use not simple.\n");
271 	  return false;
272 	}
273 
274       if (dt != vect_external_def && dt != vect_constant_def)
275 	return false;
276     }
277   return true;
278 }
279 
280 /* Function vect_stmt_relevant_p.
281 
282    Return true if STMT in loop that is represented by LOOP_VINFO is
283    "relevant for vectorization".
284 
285    A stmt is considered "relevant for vectorization" if:
286    - it has uses outside the loop.
287    - it has vdefs (it alters memory).
288    - control stmts in the loop (except for the exit condition).
289 
290    CHECKME: what other side effects would the vectorizer allow?  */
291 
292 static bool
vect_stmt_relevant_p(gimple * stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 		      enum vect_relevant *relevant, bool *live_p)
295 {
296   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297   ssa_op_iter op_iter;
298   imm_use_iterator imm_iter;
299   use_operand_p use_p;
300   def_operand_p def_p;
301 
302   *relevant = vect_unused_in_scope;
303   *live_p = false;
304 
305   /* cond stmt other than loop exit cond.  */
306   if (is_ctrl_stmt (stmt)
307       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308          != loop_exit_ctrl_vec_info_type)
309     *relevant = vect_used_in_scope;
310 
311   /* changing memory.  */
312   if (gimple_code (stmt) != GIMPLE_PHI)
313     if (gimple_vdef (stmt)
314 	&& !gimple_clobber_p (stmt))
315       {
316 	if (dump_enabled_p ())
317 	  dump_printf_loc (MSG_NOTE, vect_location,
318                            "vec_stmt_relevant_p: stmt has vdefs.\n");
319 	*relevant = vect_used_in_scope;
320       }
321 
322   /* uses outside the loop.  */
323   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324     {
325       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 	{
327 	  basic_block bb = gimple_bb (USE_STMT (use_p));
328 	  if (!flow_bb_inside_loop_p (loop, bb))
329 	    {
330 	      if (dump_enabled_p ())
331 		dump_printf_loc (MSG_NOTE, vect_location,
332                                  "vec_stmt_relevant_p: used out of loop.\n");
333 
334 	      if (is_gimple_debug (USE_STMT (use_p)))
335 		continue;
336 
337 	      /* We expect all such uses to be in the loop exit phis
338 		 (because of loop closed form)   */
339 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 	      gcc_assert (bb == single_exit (loop)->dest);
341 
342               *live_p = true;
343 	    }
344 	}
345     }
346 
347   if (*live_p && *relevant == vect_unused_in_scope
348       && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349     {
350       if (dump_enabled_p ())
351 	dump_printf_loc (MSG_NOTE, vect_location,
352 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353       *relevant = vect_used_only_live;
354     }
355 
356   return (*live_p || *relevant);
357 }
358 
359 
360 /* Function exist_non_indexing_operands_for_use_p
361 
362    USE is one of the uses attached to STMT.  Check if USE is
363    used in STMT for anything other than indexing an array.  */
364 
365 static bool
exist_non_indexing_operands_for_use_p(tree use,gimple * stmt)366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 {
368   tree operand;
369   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 
371   /* USE corresponds to some operand in STMT.  If there is no data
372      reference in STMT, then any operand that corresponds to USE
373      is not indexing an array.  */
374   if (!STMT_VINFO_DATA_REF (stmt_info))
375     return true;
376 
377   /* STMT has a data_ref. FORNOW this means that its of one of
378      the following forms:
379      -1- ARRAY_REF = var
380      -2- var = ARRAY_REF
381      (This should have been verified in analyze_data_refs).
382 
383      'var' in the second case corresponds to a def, not a use,
384      so USE cannot correspond to any operands that are not used
385      for array indexing.
386 
387      Therefore, all we need to check is if STMT falls into the
388      first case, and whether var corresponds to USE.  */
389 
390   if (!gimple_assign_copy_p (stmt))
391     {
392       if (is_gimple_call (stmt)
393 	  && gimple_call_internal_p (stmt))
394 	{
395 	  internal_fn ifn = gimple_call_internal_fn (stmt);
396 	  int mask_index = internal_fn_mask_index (ifn);
397 	  if (mask_index >= 0
398 	      && use == gimple_call_arg (stmt, mask_index))
399 	    return true;
400 	  int stored_value_index = internal_fn_stored_value_index (ifn);
401 	  if (stored_value_index >= 0
402 	      && use == gimple_call_arg (stmt, stored_value_index))
403 	    return true;
404 	  if (internal_gather_scatter_fn_p (ifn)
405 	      && use == gimple_call_arg (stmt, 1))
406 	    return true;
407 	}
408       return false;
409     }
410 
411   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412     return false;
413   operand = gimple_assign_rhs1 (stmt);
414   if (TREE_CODE (operand) != SSA_NAME)
415     return false;
416 
417   if (operand == use)
418     return true;
419 
420   return false;
421 }
422 
423 
424 /*
425    Function process_use.
426 
427    Inputs:
428    - a USE in STMT in a loop represented by LOOP_VINFO
429    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430      that defined USE.  This is done by calling mark_relevant and passing it
431      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433      be performed.
434 
435    Outputs:
436    Generally, LIVE_P and RELEVANT are used to define the liveness and
437    relevance info of the DEF_STMT of this USE:
438        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440    Exceptions:
441    - case 1: If USE is used only for address computations (e.g. array indexing),
442    which does not need to be directly vectorized, then the liveness/relevance
443    of the respective DEF_STMT is left unchanged.
444    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445    skip DEF_STMT cause it had already been processed.
446    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
447    be modified accordingly.
448 
449    Return true if everything is as expected. Return false otherwise.  */
450 
451 static bool
process_use(gimple * stmt,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<gimple * > * worklist,bool force)452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453 	     enum vect_relevant relevant, vec<gimple *> *worklist,
454 	     bool force)
455 {
456   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   gimple *def_stmt;
461   enum vect_def_type dt;
462 
463   /* case 1: we are only interested in uses that need to be vectorized.  Uses
464      that are used for address computation are not considered relevant.  */
465   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466      return true;
467 
468   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469     {
470       if (dump_enabled_p ())
471         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472                          "not vectorized: unsupported use in stmt.\n");
473       return false;
474     }
475 
476   if (!def_stmt || gimple_nop_p (def_stmt))
477     return true;
478 
479   def_bb = gimple_bb (def_stmt);
480   if (!flow_bb_inside_loop_p (loop, def_bb))
481     {
482       if (dump_enabled_p ())
483 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484       return true;
485     }
486 
487   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488      DEF_STMT must have already been processed, because this should be the
489      only way that STMT, which is a reduction-phi, was put in the worklist,
490      as there should be no other uses for DEF_STMT in the loop.  So we just
491      check that everything is as expected, and we are done.  */
492   dstmt_vinfo = vinfo_for_stmt (def_stmt);
493   bb = gimple_bb (stmt);
494   if (gimple_code (stmt) == GIMPLE_PHI
495       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496       && gimple_code (def_stmt) != GIMPLE_PHI
497       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498       && bb->loop_father == def_bb->loop_father)
499     {
500       if (dump_enabled_p ())
501 	dump_printf_loc (MSG_NOTE, vect_location,
502                          "reduc-stmt defining reduc-phi in the same nest.\n");
503       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508       return true;
509     }
510 
511   /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 	outer-loop-header-bb:
513 		d = def_stmt
514 	inner-loop:
515 		stmt # use (d)
516 	outer-loop-tail-bb:
517 		...		  */
518   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519     {
520       if (dump_enabled_p ())
521 	dump_printf_loc (MSG_NOTE, vect_location,
522                          "outer-loop def-stmt defining inner-loop stmt.\n");
523 
524       switch (relevant)
525 	{
526 	case vect_unused_in_scope:
527 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 		      vect_used_in_scope : vect_unused_in_scope;
529 	  break;
530 
531 	case vect_used_in_outer_by_reduction:
532           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533 	  relevant = vect_used_by_reduction;
534 	  break;
535 
536 	case vect_used_in_outer:
537           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 	  relevant = vect_used_in_scope;
539 	  break;
540 
541 	case vect_used_in_scope:
542 	  break;
543 
544 	default:
545 	  gcc_unreachable ();
546 	}
547     }
548 
549   /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 	outer-loop-header-bb:
551 		...
552 	inner-loop:
553 		d = def_stmt
554 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 		stmt # use (d)		*/
556   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557     {
558       if (dump_enabled_p ())
559 	dump_printf_loc (MSG_NOTE, vect_location,
560                          "inner-loop def-stmt defining outer-loop stmt.\n");
561 
562       switch (relevant)
563         {
564         case vect_unused_in_scope:
565           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
568           break;
569 
570         case vect_used_by_reduction:
571 	case vect_used_only_live:
572           relevant = vect_used_in_outer_by_reduction;
573           break;
574 
575         case vect_used_in_scope:
576           relevant = vect_used_in_outer;
577           break;
578 
579         default:
580           gcc_unreachable ();
581         }
582     }
583   /* We are also not interested in uses on loop PHI backedges that are
584      inductions.  Otherwise we'll needlessly vectorize the IV increment
585      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
586      of course.  */
587   else if (gimple_code (stmt) == GIMPLE_PHI
588 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
589 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
590 	   && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591 	       == use))
592     {
593       if (dump_enabled_p ())
594 	dump_printf_loc (MSG_NOTE, vect_location,
595                          "induction value on backedge.\n");
596       return true;
597     }
598 
599 
600   vect_mark_relevant (worklist, def_stmt, relevant, false);
601   return true;
602 }
603 
604 
605 /* Function vect_mark_stmts_to_be_vectorized.
606 
607    Not all stmts in the loop need to be vectorized. For example:
608 
609      for i...
610        for j...
611    1.    T0 = i + j
612    2.	 T1 = a[T0]
613 
614    3.    j = j + 1
615 
616    Stmt 1 and 3 do not need to be vectorized, because loop control and
617    addressing of vectorized data-refs are handled differently.
618 
619    This pass detects such stmts.  */
620 
621 bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 {
624   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626   unsigned int nbbs = loop->num_nodes;
627   gimple_stmt_iterator si;
628   gimple *stmt;
629   unsigned int i;
630   stmt_vec_info stmt_vinfo;
631   basic_block bb;
632   gimple *phi;
633   bool live_p;
634   enum vect_relevant relevant;
635 
636   if (dump_enabled_p ())
637     dump_printf_loc (MSG_NOTE, vect_location,
638                      "=== vect_mark_stmts_to_be_vectorized ===\n");
639 
640   auto_vec<gimple *, 64> worklist;
641 
642   /* 1. Init worklist.  */
643   for (i = 0; i < nbbs; i++)
644     {
645       bb = bbs[i];
646       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 	{
648 	  phi = gsi_stmt (si);
649 	  if (dump_enabled_p ())
650 	    {
651 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
653 	    }
654 
655 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
656 	    vect_mark_relevant (&worklist, phi, relevant, live_p);
657 	}
658       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 	{
660 	  stmt = gsi_stmt (si);
661 	  if (dump_enabled_p ())
662 	    {
663 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
665 	    }
666 
667 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
668 	    vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 	}
670     }
671 
672   /* 2. Process_worklist */
673   while (worklist.length () > 0)
674     {
675       use_operand_p use_p;
676       ssa_op_iter iter;
677 
678       stmt = worklist.pop ();
679       if (dump_enabled_p ())
680 	{
681           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
683 	}
684 
685       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
686 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
687 	 of STMT.  */
688       stmt_vinfo = vinfo_for_stmt (stmt);
689       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 
691       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692 	 propagated as is to the DEF_STMTs of its USEs.
693 
694 	 One exception is when STMT has been identified as defining a reduction
695 	 variable; in this case we set the relevance to vect_used_by_reduction.
696 	 This is because we distinguish between two kinds of relevant stmts -
697 	 those that are used by a reduction computation, and those that are
698 	 (also) used by a regular computation.  This allows us later on to
699 	 identify stmts that are used solely by a reduction, and therefore the
700 	 order of the results that they produce does not have to be kept.  */
701 
702       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703         {
704           case vect_reduction_def:
705 	    gcc_assert (relevant != vect_unused_in_scope);
706 	    if (relevant != vect_unused_in_scope
707 		&& relevant != vect_used_in_scope
708 		&& relevant != vect_used_by_reduction
709 		&& relevant != vect_used_only_live)
710 	      {
711 		if (dump_enabled_p ())
712 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 				   "unsupported use of reduction.\n");
714 		return false;
715 	      }
716 	    break;
717 
718           case vect_nested_cycle:
719 	    if (relevant != vect_unused_in_scope
720 		&& relevant != vect_used_in_outer_by_reduction
721 		&& relevant != vect_used_in_outer)
722               {
723                 if (dump_enabled_p ())
724                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725                                    "unsupported use of nested cycle.\n");
726 
727                 return false;
728               }
729             break;
730 
731           case vect_double_reduction_def:
732 	    if (relevant != vect_unused_in_scope
733 		&& relevant != vect_used_by_reduction
734 		&& relevant != vect_used_only_live)
735               {
736                 if (dump_enabled_p ())
737                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738                                    "unsupported use of double reduction.\n");
739 
740                 return false;
741               }
742             break;
743 
744           default:
745             break;
746         }
747 
748       if (is_pattern_stmt_p (stmt_vinfo))
749         {
750           /* Pattern statements are not inserted into the code, so
751              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752              have to scan the RHS or function arguments instead.  */
753           if (is_gimple_assign (stmt))
754             {
755 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756 	      tree op = gimple_assign_rhs1 (stmt);
757 
758 	      i = 1;
759 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 		{
761 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
762 				    relevant, &worklist, false)
763 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
764 				       relevant, &worklist, false))
765 		    return false;
766 		  i = 2;
767 		}
768 	      for (; i < gimple_num_ops (stmt); i++)
769                 {
770 		  op = gimple_op (stmt, i);
771                   if (TREE_CODE (op) == SSA_NAME
772 		      && !process_use (stmt, op, loop_vinfo, relevant,
773 				       &worklist, false))
774                     return false;
775                  }
776             }
777           else if (is_gimple_call (stmt))
778             {
779               for (i = 0; i < gimple_call_num_args (stmt); i++)
780                 {
781                   tree arg = gimple_call_arg (stmt, i);
782 		  if (!process_use (stmt, arg, loop_vinfo, relevant,
783 				    &worklist, false))
784                     return false;
785                 }
786             }
787         }
788       else
789         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790           {
791             tree op = USE_FROM_PTR (use_p);
792 	    if (!process_use (stmt, op, loop_vinfo, relevant,
793 			      &worklist, false))
794               return false;
795           }
796 
797       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 	{
799 	  gather_scatter_info gs_info;
800 	  if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801 	    gcc_unreachable ();
802 	  if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 			    &worklist, true))
804 	    return false;
805 	}
806     } /* while worklist */
807 
808   return true;
809 }
810 
811 
812 /* Function vect_model_simple_cost.
813 
814    Models cost for simple operations, i.e. those that only emit ncopies of a
815    single op.  Right now, this does not account for multiple insns that could
816    be generated for the single vector op.  We will handle that shortly.  */
817 
818 void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820 			enum vect_def_type *dt,
821 			int ndts,
822 			stmt_vector_for_cost *prologue_cost_vec,
823 			stmt_vector_for_cost *body_cost_vec)
824 {
825   int i;
826   int inside_cost = 0, prologue_cost = 0;
827 
828   /* The SLP costs were already calculated during SLP tree build.  */
829   gcc_assert (!PURE_SLP_STMT (stmt_info));
830 
831   /* Cost the "broadcast" of a scalar operand in to a vector operand.
832      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833      cost model.  */
834   for (i = 0; i < ndts; i++)
835     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 					 stmt_info, 0, vect_prologue);
838 
839   /* Pass the inside-of-loop statements to the target-specific cost model.  */
840   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 				  stmt_info, 0, vect_body);
842 
843   if (dump_enabled_p ())
844     dump_printf_loc (MSG_NOTE, vect_location,
845                      "vect_model_simple_cost: inside_cost = %d, "
846                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
847 }
848 
849 
850 /* Model cost for type demotion and promotion operations.  PWR is normally
851    zero for single-step promotions and demotions.  It will be one if
852    two-step promotion/demotion is required, and so on.  Each additional
853    step doubles the number of instructions required.  */
854 
855 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 				    enum vect_def_type *dt, int pwr)
858 {
859   int i, tmp;
860   int inside_cost = 0, prologue_cost = 0;
861   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863   void *target_cost_data;
864 
865   /* The SLP costs were already calculated during SLP tree build.  */
866   gcc_assert (!PURE_SLP_STMT (stmt_info));
867 
868   if (loop_vinfo)
869     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870   else
871     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 
873   for (i = 0; i < pwr + 1; i++)
874     {
875       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 	(i + 1) : i;
877       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
878 				    vec_promote_demote, stmt_info, 0,
879 				    vect_body);
880     }
881 
882   /* FORNOW: Assuming maximum 2 args per stmts.  */
883   for (i = 0; i < 2; i++)
884     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886 				      stmt_info, 0, vect_prologue);
887 
888   if (dump_enabled_p ())
889     dump_printf_loc (MSG_NOTE, vect_location,
890                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
891                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 }
893 
894 /* Function vect_model_store_cost
895 
896    Models cost for stores.  In the case of grouped accesses, one access
897    has the overhead of the grouped access attributed to it.  */
898 
899 void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901 		       vect_memory_access_type memory_access_type,
902 		       vec_load_store_type vls_type, slp_tree slp_node,
903 		       stmt_vector_for_cost *prologue_cost_vec,
904 		       stmt_vector_for_cost *body_cost_vec)
905 {
906   unsigned int inside_cost = 0, prologue_cost = 0;
907   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 
911   if (vls_type == VLS_STORE_INVARIANT)
912     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913 				       stmt_info, 0, vect_prologue);
914 
915   /* Grouped stores update all elements in the group at once,
916      so we want the DR for the first statement.  */
917   if (!slp_node && grouped_access_p)
918     {
919       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
921     }
922 
923   /* True if we should include any once-per-group costs as well as
924      the cost of the statement itself.  For SLP we only get called
925      once per group anyhow.  */
926   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 
928   /* We assume that the cost of a single store-lanes instruction is
929      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
930      access is instead being provided by a permute-and-store operation,
931      include the cost of the permutes.  */
932   if (first_stmt_p
933       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934     {
935       /* Uses a high and low interleave or shuffle operations for each
936 	 needed permute.  */
937       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
938       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
939       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940 				      stmt_info, 0, vect_body);
941 
942       if (dump_enabled_p ())
943         dump_printf_loc (MSG_NOTE, vect_location,
944                          "vect_model_store_cost: strided group_size = %d .\n",
945                          group_size);
946     }
947 
948   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
949   /* Costs of the stores.  */
950   if (memory_access_type == VMAT_ELEMENTWISE
951       || memory_access_type == VMAT_GATHER_SCATTER)
952     {
953       /* N scalar stores plus extracting the elements.  */
954       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955       inside_cost += record_stmt_cost (body_cost_vec,
956 				       ncopies * assumed_nunits,
957 				       scalar_store, stmt_info, 0, vect_body);
958     }
959   else
960     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
961 
962   if (memory_access_type == VMAT_ELEMENTWISE
963       || memory_access_type == VMAT_STRIDED_SLP)
964     {
965       /* N scalar stores plus extracting the elements.  */
966       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967       inside_cost += record_stmt_cost (body_cost_vec,
968 				       ncopies * assumed_nunits,
969 				       vec_to_scalar, stmt_info, 0, vect_body);
970     }
971 
972   if (dump_enabled_p ())
973     dump_printf_loc (MSG_NOTE, vect_location,
974                      "vect_model_store_cost: inside_cost = %d, "
975                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
976 }
977 
978 
979 /* Calculate cost of DR's memory access.  */
980 void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 		     unsigned int *inside_cost,
983 		     stmt_vector_for_cost *body_cost_vec)
984 {
985   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986   gimple *stmt = DR_STMT (dr);
987   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 
989   switch (alignment_support_scheme)
990     {
991     case dr_aligned:
992       {
993 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 					  vector_store, stmt_info, 0,
995 					  vect_body);
996 
997         if (dump_enabled_p ())
998           dump_printf_loc (MSG_NOTE, vect_location,
999                            "vect_model_store_cost: aligned.\n");
1000         break;
1001       }
1002 
1003     case dr_unaligned_supported:
1004       {
1005         /* Here, we assign an additional cost for the unaligned store.  */
1006 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 					  unaligned_store, stmt_info,
1008 					  DR_MISALIGNMENT (dr), vect_body);
1009         if (dump_enabled_p ())
1010           dump_printf_loc (MSG_NOTE, vect_location,
1011                            "vect_model_store_cost: unaligned supported by "
1012                            "hardware.\n");
1013         break;
1014       }
1015 
1016     case dr_unaligned_unsupported:
1017       {
1018         *inside_cost = VECT_MAX_COST;
1019 
1020         if (dump_enabled_p ())
1021           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022                            "vect_model_store_cost: unsupported access.\n");
1023         break;
1024       }
1025 
1026     default:
1027       gcc_unreachable ();
1028     }
1029 }
1030 
1031 
1032 /* Function vect_model_load_cost
1033 
1034    Models cost for loads.  In the case of grouped accesses, one access has
1035    the overhead of the grouped access attributed to it.  Since unaligned
1036    accesses are supported for loads, we also account for the costs of the
1037    access scheme chosen.  */
1038 
1039 void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 		      vect_memory_access_type memory_access_type,
1042 		      slp_tree slp_node,
1043 		      stmt_vector_for_cost *prologue_cost_vec,
1044 		      stmt_vector_for_cost *body_cost_vec)
1045 {
1046   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048   unsigned int inside_cost = 0, prologue_cost = 0;
1049   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1050 
1051   /* Grouped loads read all elements in the group at once,
1052      so we want the DR for the first statement.  */
1053   if (!slp_node && grouped_access_p)
1054     {
1055       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057     }
1058 
1059   /* True if we should include any once-per-group costs as well as
1060      the cost of the statement itself.  For SLP we only get called
1061      once per group anyhow.  */
1062   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063 
1064   /* We assume that the cost of a single load-lanes instruction is
1065      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1066      access is instead being provided by a load-and-permute operation,
1067      include the cost of the permutes.  */
1068   if (first_stmt_p
1069       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1070     {
1071       /* Uses an even and odd extract operations or shuffle operations
1072 	 for each needed permute.  */
1073       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 				      stmt_info, 0, vect_body);
1077 
1078       if (dump_enabled_p ())
1079         dump_printf_loc (MSG_NOTE, vect_location,
1080                          "vect_model_load_cost: strided group_size = %d .\n",
1081                          group_size);
1082     }
1083 
1084   /* The loads themselves.  */
1085   if (memory_access_type == VMAT_ELEMENTWISE
1086       || memory_access_type == VMAT_GATHER_SCATTER)
1087     {
1088       /* N scalar loads plus gathering them into a vector.  */
1089       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1091       inside_cost += record_stmt_cost (body_cost_vec,
1092 				       ncopies * assumed_nunits,
1093 				       scalar_load, stmt_info, 0, vect_body);
1094     }
1095   else
1096     vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 			&inside_cost, &prologue_cost,
1098 			prologue_cost_vec, body_cost_vec, true);
1099   if (memory_access_type == VMAT_ELEMENTWISE
1100       || memory_access_type == VMAT_STRIDED_SLP)
1101     inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 				     stmt_info, 0, vect_body);
1103 
1104   if (dump_enabled_p ())
1105     dump_printf_loc (MSG_NOTE, vect_location,
1106                      "vect_model_load_cost: inside_cost = %d, "
1107                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108 }
1109 
1110 
1111 /* Calculate cost of DR's memory access.  */
1112 void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 		    bool add_realign_cost, unsigned int *inside_cost,
1115 		    unsigned int *prologue_cost,
1116 		    stmt_vector_for_cost *prologue_cost_vec,
1117 		    stmt_vector_for_cost *body_cost_vec,
1118 		    bool record_prologue_costs)
1119 {
1120   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121   gimple *stmt = DR_STMT (dr);
1122   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 
1124   switch (alignment_support_scheme)
1125     {
1126     case dr_aligned:
1127       {
1128 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 					  stmt_info, 0, vect_body);
1130 
1131         if (dump_enabled_p ())
1132           dump_printf_loc (MSG_NOTE, vect_location,
1133                            "vect_model_load_cost: aligned.\n");
1134 
1135         break;
1136       }
1137     case dr_unaligned_supported:
1138       {
1139         /* Here, we assign an additional cost for the unaligned load.  */
1140 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 					  unaligned_load, stmt_info,
1142 					  DR_MISALIGNMENT (dr), vect_body);
1143 
1144         if (dump_enabled_p ())
1145           dump_printf_loc (MSG_NOTE, vect_location,
1146                            "vect_model_load_cost: unaligned supported by "
1147                            "hardware.\n");
1148 
1149         break;
1150       }
1151     case dr_explicit_realign:
1152       {
1153 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 					  vector_load, stmt_info, 0, vect_body);
1155 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 					  vec_perm, stmt_info, 0, vect_body);
1157 
1158         /* FIXME: If the misalignment remains fixed across the iterations of
1159            the containing loop, the following cost should be added to the
1160            prologue costs.  */
1161         if (targetm.vectorize.builtin_mask_for_load)
1162 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 					    stmt_info, 0, vect_body);
1164 
1165         if (dump_enabled_p ())
1166           dump_printf_loc (MSG_NOTE, vect_location,
1167                            "vect_model_load_cost: explicit realign\n");
1168 
1169         break;
1170       }
1171     case dr_explicit_realign_optimized:
1172       {
1173         if (dump_enabled_p ())
1174           dump_printf_loc (MSG_NOTE, vect_location,
1175                            "vect_model_load_cost: unaligned software "
1176                            "pipelined.\n");
1177 
1178         /* Unaligned software pipeline has a load of an address, an initial
1179            load, and possibly a mask operation to "prime" the loop.  However,
1180            if this is an access in a group of loads, which provide grouped
1181            access, then the above cost should only be considered for one
1182            access in the group.  Inside the loop, there is a load op
1183            and a realignment op.  */
1184 
1185         if (add_realign_cost && record_prologue_costs)
1186           {
1187 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 						vector_stmt, stmt_info,
1189 						0, vect_prologue);
1190             if (targetm.vectorize.builtin_mask_for_load)
1191 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 						  vector_stmt, stmt_info,
1193 						  0, vect_prologue);
1194           }
1195 
1196 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 					  stmt_info, 0, vect_body);
1198 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 					  stmt_info, 0, vect_body);
1200 
1201         if (dump_enabled_p ())
1202           dump_printf_loc (MSG_NOTE, vect_location,
1203                            "vect_model_load_cost: explicit realign optimized"
1204                            "\n");
1205 
1206         break;
1207       }
1208 
1209     case dr_unaligned_unsupported:
1210       {
1211         *inside_cost = VECT_MAX_COST;
1212 
1213         if (dump_enabled_p ())
1214           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215                            "vect_model_load_cost: unsupported access.\n");
1216         break;
1217       }
1218 
1219     default:
1220       gcc_unreachable ();
1221     }
1222 }
1223 
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225    the loop preheader for the vectorized stmt STMT.  */
1226 
1227 static void
vect_init_vector_1(gimple * stmt,gimple * new_stmt,gimple_stmt_iterator * gsi)1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 {
1230   if (gsi)
1231     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232   else
1233     {
1234       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 
1237       if (loop_vinfo)
1238         {
1239           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 	  basic_block new_bb;
1241 	  edge pe;
1242 
1243           if (nested_in_vect_loop_p (loop, stmt))
1244             loop = loop->inner;
1245 
1246 	  pe = loop_preheader_edge (loop);
1247           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248           gcc_assert (!new_bb);
1249 	}
1250       else
1251        {
1252           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253           basic_block bb;
1254           gimple_stmt_iterator gsi_bb_start;
1255 
1256           gcc_assert (bb_vinfo);
1257           bb = BB_VINFO_BB (bb_vinfo);
1258           gsi_bb_start = gsi_after_labels (bb);
1259           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260        }
1261     }
1262 
1263   if (dump_enabled_p ())
1264     {
1265       dump_printf_loc (MSG_NOTE, vect_location,
1266                        "created new init_stmt: ");
1267       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268     }
1269 }
1270 
1271 /* Function vect_init_vector.
1272 
1273    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1275    vector type a vector with all elements equal to VAL is created first.
1276    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1277    initialization at the loop preheader.
1278    Return the DEF of INIT_STMT.
1279    It will be used in the vectorization of STMT.  */
1280 
1281 tree
vect_init_vector(gimple * stmt,tree val,tree type,gimple_stmt_iterator * gsi)1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 {
1284   gimple *init_stmt;
1285   tree new_temp;
1286 
1287   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1288   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289     {
1290       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 	{
1293 	  /* Scalar boolean value should be transformed into
1294 	     all zeros or all ones value before building a vector.  */
1295 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1296 	    {
1297 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1299 
1300 	      if (CONSTANT_CLASS_P (val))
1301 		val = integer_zerop (val) ? false_val : true_val;
1302 	      else
1303 		{
1304 		  new_temp = make_ssa_name (TREE_TYPE (type));
1305 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 						   val, true_val, false_val);
1307 		  vect_init_vector_1 (stmt, init_stmt, gsi);
1308 		  val = new_temp;
1309 		}
1310 	    }
1311 	  else if (CONSTANT_CLASS_P (val))
1312 	    val = fold_convert (TREE_TYPE (type), val);
1313 	  else
1314 	    {
1315 	      new_temp = make_ssa_name (TREE_TYPE (type));
1316 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 		init_stmt = gimple_build_assign (new_temp,
1318 						 fold_build1 (VIEW_CONVERT_EXPR,
1319 							      TREE_TYPE (type),
1320 							      val));
1321 	      else
1322 		init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 	      vect_init_vector_1 (stmt, init_stmt, gsi);
1324 	      val = new_temp;
1325 	    }
1326 	}
1327       val = build_vector_from_val (type, val);
1328     }
1329 
1330   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331   init_stmt = gimple_build_assign  (new_temp, val);
1332   vect_init_vector_1 (stmt, init_stmt, gsi);
1333   return new_temp;
1334 }
1335 
1336 /* Function vect_get_vec_def_for_operand_1.
1337 
1338    For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339    DT that will be used in the vectorized stmt.  */
1340 
1341 tree
vect_get_vec_def_for_operand_1(gimple * def_stmt,enum vect_def_type dt)1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 {
1344   tree vec_oprnd;
1345   gimple *vec_stmt;
1346   stmt_vec_info def_stmt_info = NULL;
1347 
1348   switch (dt)
1349     {
1350     /* operand is a constant or a loop invariant.  */
1351     case vect_constant_def:
1352     case vect_external_def:
1353       /* Code should use vect_get_vec_def_for_operand.  */
1354       gcc_unreachable ();
1355 
1356     /* operand is defined inside the loop.  */
1357     case vect_internal_def:
1358       {
1359         /* Get the def from the vectorized stmt.  */
1360         def_stmt_info = vinfo_for_stmt (def_stmt);
1361 
1362         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363         /* Get vectorized pattern statement.  */
1364         if (!vec_stmt
1365             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366             && !STMT_VINFO_RELEVANT (def_stmt_info))
1367           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369         gcc_assert (vec_stmt);
1370 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 	  vec_oprnd = PHI_RESULT (vec_stmt);
1372 	else if (is_gimple_call (vec_stmt))
1373 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1374 	else
1375 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1376         return vec_oprnd;
1377       }
1378 
1379     /* operand is defined by a loop header phi.  */
1380     case vect_reduction_def:
1381     case vect_double_reduction_def:
1382     case vect_nested_cycle:
1383     case vect_induction_def:
1384       {
1385 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 
1387         /* Get the def from the vectorized stmt.  */
1388         def_stmt_info = vinfo_for_stmt (def_stmt);
1389         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 	  vec_oprnd = PHI_RESULT (vec_stmt);
1392 	else
1393 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1394         return vec_oprnd;
1395       }
1396 
1397     default:
1398       gcc_unreachable ();
1399     }
1400 }
1401 
1402 
1403 /* Function vect_get_vec_def_for_operand.
1404 
1405    OP is an operand in STMT.  This function returns a (vector) def that will be
1406    used in the vectorized stmt for STMT.
1407 
1408    In the case that OP is an SSA_NAME which is defined in the loop, then
1409    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 
1411    In case OP is an invariant or constant, a new stmt that creates a vector def
1412    needs to be introduced.  VECTYPE may be used to specify a required type for
1413    vector invariant.  */
1414 
1415 tree
vect_get_vec_def_for_operand(tree op,gimple * stmt,tree vectype)1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 {
1418   gimple *def_stmt;
1419   enum vect_def_type dt;
1420   bool is_simple_use;
1421   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 
1424   if (dump_enabled_p ())
1425     {
1426       dump_printf_loc (MSG_NOTE, vect_location,
1427                        "vect_get_vec_def_for_operand: ");
1428       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429       dump_printf (MSG_NOTE, "\n");
1430     }
1431 
1432   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433   gcc_assert (is_simple_use);
1434   if (def_stmt && dump_enabled_p ())
1435     {
1436       dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1437       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438     }
1439 
1440   if (dt == vect_constant_def || dt == vect_external_def)
1441     {
1442       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443       tree vector_type;
1444 
1445       if (vectype)
1446 	vector_type = vectype;
1447       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450       else
1451 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 
1453       gcc_assert (vector_type);
1454       return vect_init_vector (stmt, op, vector_type, NULL);
1455     }
1456   else
1457     return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458 }
1459 
1460 
1461 /* Function vect_get_vec_def_for_stmt_copy
1462 
1463    Return a vector-def for an operand.  This function is used when the
1464    vectorized stmt to be created (by the caller to this function) is a "copy"
1465    created in case the vectorized result cannot fit in one vector, and several
1466    copies of the vector-stmt are required.  In this case the vector-def is
1467    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468    of the stmt that defines VEC_OPRND.
1469    DT is the type of the vector def VEC_OPRND.
1470 
1471    Context:
1472         In case the vectorization factor (VF) is bigger than the number
1473    of elements that can fit in a vectype (nunits), we have to generate
1474    more than one vector stmt to vectorize the scalar stmt.  This situation
1475    arises when there are multiple data-types operated upon in the loop; the
1476    smallest data-type determines the VF, and as a result, when vectorizing
1477    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478    vector stmt (each computing a vector of 'nunits' results, and together
1479    computing 'VF' results in each iteration).  This function is called when
1480    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481    which VF=16 and nunits=4, so the number of copies required is 4):
1482 
1483    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1484 
1485    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1486                         VS1.1:  vx.1 = memref1      VS1.2
1487                         VS1.2:  vx.2 = memref2      VS1.3
1488                         VS1.3:  vx.3 = memref3
1489 
1490    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1491                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1492                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1493                         VSnew.3:  vz3 = vx.3 + ...
1494 
1495    The vectorization of S1 is explained in vectorizable_load.
1496    The vectorization of S2:
1497         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498    the function 'vect_get_vec_def_for_operand' is called to
1499    get the relevant vector-def for each operand of S2.  For operand x it
1500    returns  the vector-def 'vx.0'.
1501 
1502         To create the remaining copies of the vector-stmt (VSnew.j), this
1503    function is called to get the relevant vector-def for each operand.  It is
1504    obtained from the respective VS1.j stmt, which is recorded in the
1505    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 
1507         For example, to obtain the vector-def 'vx.1' in order to create the
1508    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511    and return its def ('vx.1').
1512    Overall, to create the above sequence this function will be called 3 times:
1513         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1516 
1517 tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 {
1520   gimple *vec_stmt_for_operand;
1521   stmt_vec_info def_stmt_info;
1522 
1523   /* Do nothing; can reuse same def.  */
1524   if (dt == vect_external_def || dt == vect_constant_def )
1525     return vec_oprnd;
1526 
1527   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529   gcc_assert (def_stmt_info);
1530   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531   gcc_assert (vec_stmt_for_operand);
1532   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534   else
1535     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536   return vec_oprnd;
1537 }
1538 
1539 
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1542 
1543 void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 				 vec<tree> *vec_oprnds0,
1546 				 vec<tree> *vec_oprnds1)
1547 {
1548   tree vec_oprnd = vec_oprnds0->pop ();
1549 
1550   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551   vec_oprnds0->quick_push (vec_oprnd);
1552 
1553   if (vec_oprnds1 && vec_oprnds1->length ())
1554     {
1555       vec_oprnd = vec_oprnds1->pop ();
1556       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557       vec_oprnds1->quick_push (vec_oprnd);
1558     }
1559 }
1560 
1561 
1562 /* Get vectorized definitions for OP0 and OP1.  */
1563 
1564 void
vect_get_vec_defs(tree op0,tree op1,gimple * stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 		   vec<tree> *vec_oprnds0,
1567 		   vec<tree> *vec_oprnds1,
1568 		   slp_tree slp_node)
1569 {
1570   if (slp_node)
1571     {
1572       int nops = (op1 == NULL_TREE) ? 1 : 2;
1573       auto_vec<tree> ops (nops);
1574       auto_vec<vec<tree> > vec_defs (nops);
1575 
1576       ops.quick_push (op0);
1577       if (op1)
1578         ops.quick_push (op1);
1579 
1580       vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 
1582       *vec_oprnds0 = vec_defs[0];
1583       if (op1)
1584 	*vec_oprnds1 = vec_defs[1];
1585     }
1586   else
1587     {
1588       tree vec_oprnd;
1589 
1590       vec_oprnds0->create (1);
1591       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592       vec_oprnds0->quick_push (vec_oprnd);
1593 
1594       if (op1)
1595 	{
1596 	  vec_oprnds1->create (1);
1597 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 	  vec_oprnds1->quick_push (vec_oprnd);
1599 	}
1600     }
1601 }
1602 
1603 /* Helper function called by vect_finish_replace_stmt and
1604    vect_finish_stmt_generation.  Set the location of the new
1605    statement and create a stmt_vec_info for it.  */
1606 
1607 static void
vect_finish_stmt_generation_1(gimple * stmt,gimple * vec_stmt)1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609 {
1610   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611   vec_info *vinfo = stmt_info->vinfo;
1612 
1613   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 
1615   if (dump_enabled_p ())
1616     {
1617       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619     }
1620 
1621   gimple_set_location (vec_stmt, gimple_location (stmt));
1622 
1623   /* While EH edges will generally prevent vectorization, stmt might
1624      e.g. be in a must-not-throw region.  Ensure newly created stmts
1625      that could throw are part of the same region.  */
1626   int lp_nr = lookup_stmt_eh_lp (stmt);
1627   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629 }
1630 
1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632    which sets the same scalar result as STMT did.  */
1633 
1634 void
vect_finish_replace_stmt(gimple * stmt,gimple * vec_stmt)1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636 {
1637   gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638 
1639   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640   gsi_replace (&gsi, vec_stmt, true);
1641 
1642   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643 }
1644 
1645 /* Function vect_finish_stmt_generation.
1646 
1647    Insert a new stmt.  */
1648 
1649 void
vect_finish_stmt_generation(gimple * stmt,gimple * vec_stmt,gimple_stmt_iterator * gsi)1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651 			     gimple_stmt_iterator *gsi)
1652 {
1653   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654 
1655   if (!gsi_end_p (*gsi)
1656       && gimple_has_mem_ops (vec_stmt))
1657     {
1658       gimple *at_stmt = gsi_stmt (*gsi);
1659       tree vuse = gimple_vuse (at_stmt);
1660       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 	{
1662 	  tree vdef = gimple_vdef (at_stmt);
1663 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 	  /* If we have an SSA vuse and insert a store, update virtual
1665 	     SSA form to avoid triggering the renamer.  Do so only
1666 	     if we can easily see all uses - which is what almost always
1667 	     happens with the way vectorized stmts are inserted.  */
1668 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 	      && ((is_gimple_assign (vec_stmt)
1670 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 		  || (is_gimple_call (vec_stmt)
1672 		      && !(gimple_call_flags (vec_stmt)
1673 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 	    {
1675 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 	      gimple_set_vdef (vec_stmt, new_vdef);
1677 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 	    }
1679 	}
1680     }
1681   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1683 }
1684 
1685 /* We want to vectorize a call to combined function CFN with function
1686    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687    as the types of all inputs.  Check whether this is possible using
1688    an internal function, returning its code if so or IFN_LAST if not.  */
1689 
1690 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1691 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692 				tree vectype_out, tree vectype_in)
1693 {
1694   internal_fn ifn;
1695   if (internal_fn_p (cfn))
1696     ifn = as_internal_fn (cfn);
1697   else
1698     ifn = associated_internal_fn (fndecl);
1699   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700     {
1701       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702       if (info.vectorizable)
1703 	{
1704 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1706 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707 					      OPTIMIZE_FOR_SPEED))
1708 	    return ifn;
1709 	}
1710     }
1711   return IFN_LAST;
1712 }
1713 
1714 
1715 static tree permute_vec_elements (tree, tree, tree, gimple *,
1716 				  gimple_stmt_iterator *);
1717 
1718 /* Check whether a load or store statement in the loop described by
1719    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1720    whether the vectorizer pass has the appropriate support, as well as
1721    whether the target does.
1722 
1723    VLS_TYPE says whether the statement is a load or store and VECTYPE
1724    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1725    says how the load or store is going to be implemented and GROUP_SIZE
1726    is the number of load or store statements in the containing group.
1727    If the access is a gather load or scatter store, GS_INFO describes
1728    its arguments.
1729 
1730    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731    supported, otherwise record the required mask types.  */
1732 
1733 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735 			  vec_load_store_type vls_type, int group_size,
1736 			  vect_memory_access_type memory_access_type,
1737 			  gather_scatter_info *gs_info)
1738 {
1739   /* Invariant loads need no special support.  */
1740   if (memory_access_type == VMAT_INVARIANT)
1741     return;
1742 
1743   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744   machine_mode vecmode = TYPE_MODE (vectype);
1745   bool is_load = (vls_type == VLS_LOAD);
1746   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747     {
1748       if (is_load
1749 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1750 	  : !vect_store_lanes_supported (vectype, group_size, true))
1751 	{
1752 	  if (dump_enabled_p ())
1753 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 			     "can't use a fully-masked loop because the"
1755 			     " target doesn't have an appropriate masked"
1756 			     " load/store-lanes instruction.\n");
1757 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 	  return;
1759 	}
1760       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762       return;
1763     }
1764 
1765   if (memory_access_type == VMAT_GATHER_SCATTER)
1766     {
1767       internal_fn ifn = (is_load
1768 			 ? IFN_MASK_GATHER_LOAD
1769 			 : IFN_MASK_SCATTER_STORE);
1770       tree offset_type = TREE_TYPE (gs_info->offset);
1771       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1772 						   gs_info->memory_type,
1773 						   TYPE_SIGN (offset_type),
1774 						   gs_info->scale))
1775 	{
1776 	  if (dump_enabled_p ())
1777 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 			     "can't use a fully-masked loop because the"
1779 			     " target doesn't have an appropriate masked"
1780 			     " gather load or scatter store instruction.\n");
1781 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 	  return;
1783 	}
1784       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786       return;
1787     }
1788 
1789   if (memory_access_type != VMAT_CONTIGUOUS
1790       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791     {
1792       /* Element X of the data must come from iteration i * VF + X of the
1793 	 scalar loop.  We need more work to support other mappings.  */
1794       if (dump_enabled_p ())
1795 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 			 "can't use a fully-masked loop because an access"
1797 			 " isn't contiguous.\n");
1798       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799       return;
1800     }
1801 
1802   machine_mode mask_mode;
1803   if (!(targetm.vectorize.get_mask_mode
1804 	(GET_MODE_NUNITS (vecmode),
1805 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807     {
1808       if (dump_enabled_p ())
1809 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 			 "can't use a fully-masked loop because the target"
1811 			 " doesn't have the appropriate masked load or"
1812 			 " store.\n");
1813       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814       return;
1815     }
1816   /* We might load more scalars than we need for permuting SLP loads.
1817      We checked in get_group_load_store_type that the extra elements
1818      don't leak into a new vector.  */
1819   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821   unsigned int nvectors;
1822   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824   else
1825     gcc_unreachable ();
1826 }
1827 
1828 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1829    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830    that needs to be applied to all loads and stores in a vectorized loop.
1831    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832 
1833    MASK_TYPE is the type of both masks.  If new statements are needed,
1834    insert them before GSI.  */
1835 
1836 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 			 gimple_stmt_iterator *gsi)
1839 {
1840   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841   if (!loop_mask)
1842     return vec_mask;
1843 
1844   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 					  vec_mask, loop_mask);
1848   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849   return and_res;
1850 }
1851 
1852 /* Determine whether we can use a gather load or scatter store to vectorize
1853    strided load or store STMT by truncating the current offset to a smaller
1854    width.  We need to be able to construct an offset vector:
1855 
1856      { 0, X, X*2, X*3, ... }
1857 
1858    without loss of precision, where X is STMT's DR_STEP.
1859 
1860    Return true if this is possible, describing the gather load or scatter
1861    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1862 
1863 static bool
vect_truncate_gather_scatter_offset(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865 				     bool masked_p,
1866 				     gather_scatter_info *gs_info)
1867 {
1868   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870   tree step = DR_STEP (dr);
1871   if (TREE_CODE (step) != INTEGER_CST)
1872     {
1873       /* ??? Perhaps we could use range information here?  */
1874       if (dump_enabled_p ())
1875 	dump_printf_loc (MSG_NOTE, vect_location,
1876 			 "cannot truncate variable step.\n");
1877       return false;
1878     }
1879 
1880   /* Get the number of bits in an element.  */
1881   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884 
1885   /* Set COUNT to the upper limit on the number of elements - 1.
1886      Start with the maximum vectorization factor.  */
1887   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888 
1889   /* Try lowering COUNT to the number of scalar latch iterations.  */
1890   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891   widest_int max_iters;
1892   if (max_loop_iterations (loop, &max_iters)
1893       && max_iters < count)
1894     count = max_iters.to_shwi ();
1895 
1896   /* Try scales of 1 and the element size.  */
1897   int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898   bool overflow_p = false;
1899   for (int i = 0; i < 2; ++i)
1900     {
1901       int scale = scales[i];
1902       widest_int factor;
1903       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904 	continue;
1905 
1906       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907 	 in OFFSET_BITS bits.  */
1908       widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909       if (overflow_p)
1910 	continue;
1911       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912       if (wi::min_precision (range, sign) > element_bits)
1913 	{
1914 	  overflow_p = true;
1915 	  continue;
1916 	}
1917 
1918       /* See whether the target supports the operation.  */
1919       tree memory_type = TREE_TYPE (DR_REF (dr));
1920       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921 				     memory_type, element_bits, sign, scale,
1922 				     &gs_info->ifn, &gs_info->element_type))
1923 	continue;
1924 
1925       tree offset_type = build_nonstandard_integer_type (element_bits,
1926 							 sign == UNSIGNED);
1927 
1928       gs_info->decl = NULL_TREE;
1929       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930 	 but we don't need to store that here.  */
1931       gs_info->base = NULL_TREE;
1932       gs_info->offset = fold_convert (offset_type, step);
1933       gs_info->offset_dt = vect_constant_def;
1934       gs_info->offset_vectype = NULL_TREE;
1935       gs_info->scale = scale;
1936       gs_info->memory_type = memory_type;
1937       return true;
1938     }
1939 
1940   if (overflow_p && dump_enabled_p ())
1941     dump_printf_loc (MSG_NOTE, vect_location,
1942 		     "truncating gather/scatter offset to %d bits"
1943 		     " might change its value.\n", element_bits);
1944 
1945   return false;
1946 }
1947 
1948 /* Return true if we can use gather/scatter internal functions to
1949    vectorize STMT, which is a grouped or strided load or store.
1950    MASKED_P is true if load or store is conditional.  When returning
1951    true, fill in GS_INFO with the information required to perform the
1952    operation.  */
1953 
1954 static bool
vect_use_strided_gather_scatters_p(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1956 				    bool masked_p,
1957 				    gather_scatter_info *gs_info)
1958 {
1959   if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960       || gs_info->decl)
1961     return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962 						masked_p, gs_info);
1963 
1964   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966   tree offset_type = TREE_TYPE (gs_info->offset);
1967   unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968 
1969   /* Enforced by vect_check_gather_scatter.  */
1970   gcc_assert (element_bits >= offset_bits);
1971 
1972   /* If the elements are wider than the offset, convert the offset to the
1973      same width, without changing its sign.  */
1974   if (element_bits > offset_bits)
1975     {
1976       bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978       gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979     }
1980 
1981   if (dump_enabled_p ())
1982     dump_printf_loc (MSG_NOTE, vect_location,
1983 		     "using gather/scatter for strided/grouped access,"
1984 		     " scale = %d\n", gs_info->scale);
1985 
1986   return true;
1987 }
1988 
1989 /* STMT is a non-strided load or store, meaning that it accesses
1990    elements with a known constant step.  Return -1 if that step
1991    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1992 
1993 static int
compare_step_with_zero(gimple * stmt)1994 compare_step_with_zero (gimple *stmt)
1995 {
1996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998   return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999 			       size_zero_node);
2000 }
2001 
2002 /* If the target supports a permute mask that reverses the elements in
2003    a vector of type VECTYPE, return that mask, otherwise return null.  */
2004 
2005 static tree
perm_mask_for_reverse(tree vectype)2006 perm_mask_for_reverse (tree vectype)
2007 {
2008   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2009 
2010   /* The encoding has a single stepped pattern.  */
2011   vec_perm_builder sel (nunits, 1, 3);
2012   for (int i = 0; i < 3; ++i)
2013     sel.quick_push (nunits - 1 - i);
2014 
2015   vec_perm_indices indices (sel, 1, nunits);
2016   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2017     return NULL_TREE;
2018   return vect_gen_perm_mask_checked (vectype, indices);
2019 }
2020 
2021 /* A subroutine of get_load_store_type, with a subset of the same
2022    arguments.  Handle the case where STMT is a load or store that
2023    accesses consecutive elements with a negative step.  */
2024 
2025 static vect_memory_access_type
get_negative_load_store_type(gimple * stmt,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2026 get_negative_load_store_type (gimple *stmt, tree vectype,
2027 			      vec_load_store_type vls_type,
2028 			      unsigned int ncopies)
2029 {
2030   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2031   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2032   dr_alignment_support alignment_support_scheme;
2033 
2034   if (ncopies > 1)
2035     {
2036       if (dump_enabled_p ())
2037 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2038 			 "multiple types with negative step.\n");
2039       return VMAT_ELEMENTWISE;
2040     }
2041 
2042   alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2043   if (alignment_support_scheme != dr_aligned
2044       && alignment_support_scheme != dr_unaligned_supported)
2045     {
2046       if (dump_enabled_p ())
2047 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2048 			 "negative step but alignment required.\n");
2049       return VMAT_ELEMENTWISE;
2050     }
2051 
2052   if (vls_type == VLS_STORE_INVARIANT)
2053     {
2054       if (dump_enabled_p ())
2055 	dump_printf_loc (MSG_NOTE, vect_location,
2056 			 "negative step with invariant source;"
2057 			 " no permute needed.\n");
2058       return VMAT_CONTIGUOUS_DOWN;
2059     }
2060 
2061   if (!perm_mask_for_reverse (vectype))
2062     {
2063       if (dump_enabled_p ())
2064 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2065 			 "negative step and reversing not supported.\n");
2066       return VMAT_ELEMENTWISE;
2067     }
2068 
2069   return VMAT_CONTIGUOUS_REVERSE;
2070 }
2071 
2072 /* STMT is either a masked or unconditional store.  Return the value
2073    being stored.  */
2074 
2075 tree
vect_get_store_rhs(gimple * stmt)2076 vect_get_store_rhs (gimple *stmt)
2077 {
2078   if (gassign *assign = dyn_cast <gassign *> (stmt))
2079     {
2080       gcc_assert (gimple_assign_single_p (assign));
2081       return gimple_assign_rhs1 (assign);
2082     }
2083   if (gcall *call = dyn_cast <gcall *> (stmt))
2084     {
2085       internal_fn ifn = gimple_call_internal_fn (call);
2086       int index = internal_fn_stored_value_index (ifn);
2087       gcc_assert (index >= 0);
2088       return gimple_call_arg (stmt, index);
2089     }
2090   gcc_unreachable ();
2091 }
2092 
2093 /* A subroutine of get_load_store_type, with a subset of the same
2094    arguments.  Handle the case where STMT is part of a grouped load
2095    or store.
2096 
2097    For stores, the statements in the group are all consecutive
2098    and there is no gap at the end.  For loads, the statements in the
2099    group might not be consecutive; there can be gaps between statements
2100    as well as at the end.  */
2101 
2102 static bool
get_group_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2103 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2104 			   bool masked_p, vec_load_store_type vls_type,
2105 			   vect_memory_access_type *memory_access_type,
2106 			   gather_scatter_info *gs_info)
2107 {
2108   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2109   vec_info *vinfo = stmt_info->vinfo;
2110   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2111   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2112   gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2113   data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2114   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2115   bool single_element_p = (stmt == first_stmt
2116 			   && !GROUP_NEXT_ELEMENT (stmt_info));
2117   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2118   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2119 
2120   /* True if the vectorized statements would access beyond the last
2121      statement in the group.  */
2122   bool overrun_p = false;
2123 
2124   /* True if we can cope with such overrun by peeling for gaps, so that
2125      there is at least one final scalar iteration after the vector loop.  */
2126   bool can_overrun_p = (!masked_p
2127 			&& vls_type == VLS_LOAD
2128 			&& loop_vinfo
2129 			&& !loop->inner);
2130 
2131   /* There can only be a gap at the end of the group if the stride is
2132      known at compile time.  */
2133   gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2134 
2135   /* Stores can't yet have gaps.  */
2136   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2137 
2138   if (slp)
2139     {
2140       if (STMT_VINFO_STRIDED_P (stmt_info))
2141 	{
2142 	  /* Try to use consecutive accesses of GROUP_SIZE elements,
2143 	     separated by the stride, until we have a complete vector.
2144 	     Fall back to scalar accesses if that isn't possible.  */
2145 	  if (multiple_p (nunits, group_size))
2146 	    *memory_access_type = VMAT_STRIDED_SLP;
2147 	  else
2148 	    *memory_access_type = VMAT_ELEMENTWISE;
2149 	}
2150       else
2151 	{
2152 	  overrun_p = loop_vinfo && gap != 0;
2153 	  if (overrun_p && vls_type != VLS_LOAD)
2154 	    {
2155 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2156 			       "Grouped store with gaps requires"
2157 			       " non-consecutive accesses\n");
2158 	      return false;
2159 	    }
2160 	  /* An overrun is fine if the trailing elements are smaller
2161 	     than the alignment boundary B.  Every vector access will
2162 	     be a multiple of B and so we are guaranteed to access a
2163 	     non-gap element in the same B-sized block.  */
2164 	  if (overrun_p
2165 	      && gap < (vect_known_alignment_in_bytes (first_dr)
2166 			/ vect_get_scalar_dr_size (first_dr)))
2167 	    overrun_p = false;
2168 	  if (overrun_p && !can_overrun_p)
2169 	    {
2170 	      if (dump_enabled_p ())
2171 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2172 				 "Peeling for outer loop is not supported\n");
2173 	      return false;
2174 	    }
2175 	  int cmp = compare_step_with_zero (stmt);
2176 	  if (cmp < 0)
2177 	    *memory_access_type = get_negative_load_store_type
2178 	      (stmt, vectype, vls_type, 1);
2179 	  else
2180 	    {
2181 	      gcc_assert (!loop_vinfo || cmp > 0);
2182 	      *memory_access_type = VMAT_CONTIGUOUS;
2183 	    }
2184 	}
2185     }
2186   else
2187     {
2188       /* We can always handle this case using elementwise accesses,
2189 	 but see if something more efficient is available.  */
2190       *memory_access_type = VMAT_ELEMENTWISE;
2191 
2192       /* If there is a gap at the end of the group then these optimizations
2193 	 would access excess elements in the last iteration.  */
2194       bool would_overrun_p = (gap != 0);
2195       /* An overrun is fine if the trailing elements are smaller than the
2196 	 alignment boundary B.  Every vector access will be a multiple of B
2197 	 and so we are guaranteed to access a non-gap element in the
2198 	 same B-sized block.  */
2199       if (would_overrun_p
2200 	  && !masked_p
2201 	  && gap < (vect_known_alignment_in_bytes (first_dr)
2202 		    / vect_get_scalar_dr_size (first_dr)))
2203 	would_overrun_p = false;
2204 
2205       if (!STMT_VINFO_STRIDED_P (stmt_info)
2206 	  && (can_overrun_p || !would_overrun_p)
2207 	  && compare_step_with_zero (stmt) > 0)
2208 	{
2209 	  /* First cope with the degenerate case of a single-element
2210 	     vector.  */
2211 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2212 	    *memory_access_type = VMAT_CONTIGUOUS;
2213 
2214 	  /* Otherwise try using LOAD/STORE_LANES.  */
2215 	  if (*memory_access_type == VMAT_ELEMENTWISE
2216 	      && (vls_type == VLS_LOAD
2217 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2218 		  : vect_store_lanes_supported (vectype, group_size,
2219 						masked_p)))
2220 	    {
2221 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2222 	      overrun_p = would_overrun_p;
2223 	    }
2224 
2225 	  /* If that fails, try using permuting loads.  */
2226 	  if (*memory_access_type == VMAT_ELEMENTWISE
2227 	      && (vls_type == VLS_LOAD
2228 		  ? vect_grouped_load_supported (vectype, single_element_p,
2229 						 group_size)
2230 		  : vect_grouped_store_supported (vectype, group_size)))
2231 	    {
2232 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2233 	      overrun_p = would_overrun_p;
2234 	    }
2235 	}
2236 
2237       /* As a last resort, trying using a gather load or scatter store.
2238 
2239 	 ??? Although the code can handle all group sizes correctly,
2240 	 it probably isn't a win to use separate strided accesses based
2241 	 on nearby locations.  Or, even if it's a win over scalar code,
2242 	 it might not be a win over vectorizing at a lower VF, if that
2243 	 allows us to use contiguous accesses.  */
2244       if (*memory_access_type == VMAT_ELEMENTWISE
2245 	  && single_element_p
2246 	  && loop_vinfo
2247 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2248 						 masked_p, gs_info))
2249 	*memory_access_type = VMAT_GATHER_SCATTER;
2250     }
2251 
2252   if (vls_type != VLS_LOAD && first_stmt == stmt)
2253     {
2254       /* STMT is the leader of the group. Check the operands of all the
2255 	 stmts of the group.  */
2256       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2257       while (next_stmt)
2258 	{
2259 	  tree op = vect_get_store_rhs (next_stmt);
2260 	  gimple *def_stmt;
2261 	  enum vect_def_type dt;
2262 	  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2263 	    {
2264 	      if (dump_enabled_p ())
2265 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2266 				 "use not simple.\n");
2267 	      return false;
2268 	    }
2269 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2270 	}
2271     }
2272 
2273   if (overrun_p)
2274     {
2275       gcc_assert (can_overrun_p);
2276       if (dump_enabled_p ())
2277 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2278 			 "Data access with gaps requires scalar "
2279 			 "epilogue loop\n");
2280       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2281     }
2282 
2283   return true;
2284 }
2285 
2286 /* Analyze load or store statement STMT of type VLS_TYPE.  Return true
2287    if there is a memory access type that the vectorized form can use,
2288    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2289    or scatters, fill in GS_INFO accordingly.
2290 
2291    SLP says whether we're performing SLP rather than loop vectorization.
2292    MASKED_P is true if the statement is conditional on a vectorized mask.
2293    VECTYPE is the vector type that the vectorized statements will use.
2294    NCOPIES is the number of vector statements that will be needed.  */
2295 
2296 static bool
get_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2297 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2298 		     vec_load_store_type vls_type, unsigned int ncopies,
2299 		     vect_memory_access_type *memory_access_type,
2300 		     gather_scatter_info *gs_info)
2301 {
2302   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2303   vec_info *vinfo = stmt_info->vinfo;
2304   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2305   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2306   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2307     {
2308       *memory_access_type = VMAT_GATHER_SCATTER;
2309       gimple *def_stmt;
2310       if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2311 	gcc_unreachable ();
2312       else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2313 				    &gs_info->offset_dt,
2314 				    &gs_info->offset_vectype))
2315 	{
2316 	  if (dump_enabled_p ())
2317 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2318 			     "%s index use not simple.\n",
2319 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2320 	  return false;
2321 	}
2322     }
2323   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2324     {
2325       if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2326 				      memory_access_type, gs_info))
2327 	return false;
2328     }
2329   else if (STMT_VINFO_STRIDED_P (stmt_info))
2330     {
2331       gcc_assert (!slp);
2332       if (loop_vinfo
2333 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2334 						 masked_p, gs_info))
2335 	*memory_access_type = VMAT_GATHER_SCATTER;
2336       else
2337 	*memory_access_type = VMAT_ELEMENTWISE;
2338     }
2339   else
2340     {
2341       int cmp = compare_step_with_zero (stmt);
2342       if (cmp < 0)
2343 	*memory_access_type = get_negative_load_store_type
2344 	  (stmt, vectype, vls_type, ncopies);
2345       else if (cmp == 0)
2346 	{
2347 	  gcc_assert (vls_type == VLS_LOAD);
2348 	  *memory_access_type = VMAT_INVARIANT;
2349 	}
2350       else
2351 	*memory_access_type = VMAT_CONTIGUOUS;
2352     }
2353 
2354   if ((*memory_access_type == VMAT_ELEMENTWISE
2355        || *memory_access_type == VMAT_STRIDED_SLP)
2356       && !nunits.is_constant ())
2357     {
2358       if (dump_enabled_p ())
2359 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 			 "Not using elementwise accesses due to variable "
2361 			 "vectorization factor.\n");
2362       return false;
2363     }
2364 
2365   /* FIXME: At the moment the cost model seems to underestimate the
2366      cost of using elementwise accesses.  This check preserves the
2367      traditional behavior until that can be fixed.  */
2368   if (*memory_access_type == VMAT_ELEMENTWISE
2369       && !STMT_VINFO_STRIDED_P (stmt_info)
2370       && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2371 	   && !GROUP_NEXT_ELEMENT (stmt_info)
2372 	   && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2373     {
2374       if (dump_enabled_p ())
2375 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 			 "not falling back to elementwise accesses\n");
2377       return false;
2378     }
2379   return true;
2380 }
2381 
2382 /* Return true if boolean argument MASK is suitable for vectorizing
2383    conditional load or store STMT.  When returning true, store the type
2384    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2385    in *MASK_VECTYPE_OUT.  */
2386 
2387 static bool
vect_check_load_store_mask(gimple * stmt,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2388 vect_check_load_store_mask (gimple *stmt, tree mask,
2389 			    vect_def_type *mask_dt_out,
2390 			    tree *mask_vectype_out)
2391 {
2392   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2393     {
2394       if (dump_enabled_p ())
2395 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 			 "mask argument is not a boolean.\n");
2397       return false;
2398     }
2399 
2400   if (TREE_CODE (mask) != SSA_NAME)
2401     {
2402       if (dump_enabled_p ())
2403 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 			 "mask argument is not an SSA name.\n");
2405       return false;
2406     }
2407 
2408   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2409   gimple *def_stmt;
2410   enum vect_def_type mask_dt;
2411   tree mask_vectype;
2412   if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2413 			   &mask_vectype))
2414     {
2415       if (dump_enabled_p ())
2416 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 			 "mask use not simple.\n");
2418       return false;
2419     }
2420 
2421   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2422   if (!mask_vectype)
2423     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2424 
2425   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2426     {
2427       if (dump_enabled_p ())
2428 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 			 "could not find an appropriate vector mask type.\n");
2430       return false;
2431     }
2432 
2433   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2434 		TYPE_VECTOR_SUBPARTS (vectype)))
2435     {
2436       if (dump_enabled_p ())
2437 	{
2438 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 			   "vector mask type ");
2440 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2441 	  dump_printf (MSG_MISSED_OPTIMIZATION,
2442 		       " does not match vector data type ");
2443 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2444 	  dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2445 	}
2446       return false;
2447     }
2448 
2449   *mask_dt_out = mask_dt;
2450   *mask_vectype_out = mask_vectype;
2451   return true;
2452 }
2453 
2454 /* Return true if stored value RHS is suitable for vectorizing store
2455    statement STMT.  When returning true, store the type of the
2456    definition in *RHS_DT_OUT, the type of the vectorized store value in
2457    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2458 
2459 static bool
vect_check_store_rhs(gimple * stmt,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2460 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2461 		      tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2462 {
2463   /* In the case this is a store from a constant make sure
2464      native_encode_expr can handle it.  */
2465   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2466     {
2467       if (dump_enabled_p ())
2468 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 			 "cannot encode constant as a byte sequence.\n");
2470       return false;
2471     }
2472 
2473   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2474   gimple *def_stmt;
2475   enum vect_def_type rhs_dt;
2476   tree rhs_vectype;
2477   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2478 			   &rhs_vectype))
2479     {
2480       if (dump_enabled_p ())
2481 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 			 "use not simple.\n");
2483       return false;
2484     }
2485 
2486   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2487   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2488     {
2489       if (dump_enabled_p ())
2490 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 			 "incompatible vector types.\n");
2492       return false;
2493     }
2494 
2495   *rhs_dt_out = rhs_dt;
2496   *rhs_vectype_out = rhs_vectype;
2497   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2498     *vls_type_out = VLS_STORE_INVARIANT;
2499   else
2500     *vls_type_out = VLS_STORE;
2501   return true;
2502 }
2503 
2504 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2505    Note that we support masks with floating-point type, in which case the
2506    floats are interpreted as a bitmask.  */
2507 
2508 static tree
vect_build_all_ones_mask(gimple * stmt,tree masktype)2509 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2510 {
2511   if (TREE_CODE (masktype) == INTEGER_TYPE)
2512     return build_int_cst (masktype, -1);
2513   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2514     {
2515       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2516       mask = build_vector_from_val (masktype, mask);
2517       return vect_init_vector (stmt, mask, masktype, NULL);
2518     }
2519   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2520     {
2521       REAL_VALUE_TYPE r;
2522       long tmp[6];
2523       for (int j = 0; j < 6; ++j)
2524 	tmp[j] = -1;
2525       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2526       tree mask = build_real (TREE_TYPE (masktype), r);
2527       mask = build_vector_from_val (masktype, mask);
2528       return vect_init_vector (stmt, mask, masktype, NULL);
2529     }
2530   gcc_unreachable ();
2531 }
2532 
2533 /* Build an all-zero merge value of type VECTYPE while vectorizing
2534    STMT as a gather load.  */
2535 
2536 static tree
vect_build_zero_merge_argument(gimple * stmt,tree vectype)2537 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2538 {
2539   tree merge;
2540   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2541     merge = build_int_cst (TREE_TYPE (vectype), 0);
2542   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2543     {
2544       REAL_VALUE_TYPE r;
2545       long tmp[6];
2546       for (int j = 0; j < 6; ++j)
2547 	tmp[j] = 0;
2548       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2549       merge = build_real (TREE_TYPE (vectype), r);
2550     }
2551   else
2552     gcc_unreachable ();
2553   merge = build_vector_from_val (vectype, merge);
2554   return vect_init_vector (stmt, merge, vectype, NULL);
2555 }
2556 
2557 /* Build a gather load call while vectorizing STMT.  Insert new instructions
2558    before GSI and add them to VEC_STMT.  GS_INFO describes the gather load
2559    operation.  If the load is conditional, MASK is the unvectorized
2560    condition and MASK_DT is its definition type, otherwise MASK is null.  */
2561 
2562 static void
vect_build_gather_load_calls(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask,vect_def_type mask_dt)2563 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2564 			      gimple **vec_stmt, gather_scatter_info *gs_info,
2565 			      tree mask, vect_def_type mask_dt)
2566 {
2567   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2568   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2569   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2570   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2571   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2572   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2573   edge pe = loop_preheader_edge (loop);
2574   enum { NARROW, NONE, WIDEN } modifier;
2575   poly_uint64 gather_off_nunits
2576     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2577 
2578   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2579   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2580   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2581   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2582   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2583   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2584   tree scaletype = TREE_VALUE (arglist);
2585   gcc_checking_assert (types_compatible_p (srctype, rettype)
2586 		       && (!mask || types_compatible_p (srctype, masktype)));
2587 
2588   tree perm_mask = NULL_TREE;
2589   tree mask_perm_mask = NULL_TREE;
2590   if (known_eq (nunits, gather_off_nunits))
2591     modifier = NONE;
2592   else if (known_eq (nunits * 2, gather_off_nunits))
2593     {
2594       modifier = WIDEN;
2595 
2596       /* Currently widening gathers and scatters are only supported for
2597 	 fixed-length vectors.  */
2598       int count = gather_off_nunits.to_constant ();
2599       vec_perm_builder sel (count, count, 1);
2600       for (int i = 0; i < count; ++i)
2601 	sel.quick_push (i | (count / 2));
2602 
2603       vec_perm_indices indices (sel, 1, count);
2604       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2605 					      indices);
2606     }
2607   else if (known_eq (nunits, gather_off_nunits * 2))
2608     {
2609       modifier = NARROW;
2610 
2611       /* Currently narrowing gathers and scatters are only supported for
2612 	 fixed-length vectors.  */
2613       int count = nunits.to_constant ();
2614       vec_perm_builder sel (count, count, 1);
2615       sel.quick_grow (count);
2616       for (int i = 0; i < count; ++i)
2617 	sel[i] = i < count / 2 ? i : i + count / 2;
2618       vec_perm_indices indices (sel, 2, count);
2619       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2620 
2621       ncopies *= 2;
2622 
2623       if (mask)
2624 	{
2625 	  for (int i = 0; i < count; ++i)
2626 	    sel[i] = i | (count / 2);
2627 	  indices.new_vector (sel, 2, count);
2628 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2629 	}
2630     }
2631   else
2632     gcc_unreachable ();
2633 
2634   tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2635 					       vectype);
2636 
2637   tree ptr = fold_convert (ptrtype, gs_info->base);
2638   if (!is_gimple_min_invariant (ptr))
2639     {
2640       gimple_seq seq;
2641       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2642       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2643       gcc_assert (!new_bb);
2644     }
2645 
2646   tree scale = build_int_cst (scaletype, gs_info->scale);
2647 
2648   tree vec_oprnd0 = NULL_TREE;
2649   tree vec_mask = NULL_TREE;
2650   tree src_op = NULL_TREE;
2651   tree mask_op = NULL_TREE;
2652   tree prev_res = NULL_TREE;
2653   stmt_vec_info prev_stmt_info = NULL;
2654 
2655   if (!mask)
2656     {
2657       src_op = vect_build_zero_merge_argument (stmt, rettype);
2658       mask_op = vect_build_all_ones_mask (stmt, masktype);
2659     }
2660 
2661   for (int j = 0; j < ncopies; ++j)
2662     {
2663       tree op, var;
2664       gimple *new_stmt;
2665       if (modifier == WIDEN && (j & 1))
2666 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2667 				   perm_mask, stmt, gsi);
2668       else if (j == 0)
2669 	op = vec_oprnd0
2670 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2671       else
2672 	op = vec_oprnd0
2673 	  = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2674 
2675       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2676 	{
2677 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2678 				TYPE_VECTOR_SUBPARTS (idxtype)));
2679 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2680 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2681 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2682 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2683 	  op = var;
2684 	}
2685 
2686       if (mask)
2687 	{
2688 	  if (mask_perm_mask && (j & 1))
2689 	    mask_op = permute_vec_elements (mask_op, mask_op,
2690 					    mask_perm_mask, stmt, gsi);
2691 	  else
2692 	    {
2693 	      if (j == 0)
2694 		vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2695 	      else
2696 		vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2697 
2698 	      mask_op = vec_mask;
2699 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2700 		{
2701 		  gcc_assert
2702 		    (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2703 			       TYPE_VECTOR_SUBPARTS (masktype)));
2704 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2705 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2706 		  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2707 						  mask_op);
2708 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2709 		  mask_op = var;
2710 		}
2711 	    }
2712 	  src_op = mask_op;
2713 	}
2714 
2715       new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2716 				    mask_op, scale);
2717 
2718       if (!useless_type_conversion_p (vectype, rettype))
2719 	{
2720 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2721 				TYPE_VECTOR_SUBPARTS (rettype)));
2722 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2723 	  gimple_call_set_lhs (new_stmt, op);
2724 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2725 	  var = make_ssa_name (vec_dest);
2726 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2727 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2728 	}
2729       else
2730 	{
2731 	  var = make_ssa_name (vec_dest, new_stmt);
2732 	  gimple_call_set_lhs (new_stmt, var);
2733 	}
2734 
2735       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2736 
2737       if (modifier == NARROW)
2738 	{
2739 	  if ((j & 1) == 0)
2740 	    {
2741 	      prev_res = var;
2742 	      continue;
2743 	    }
2744 	  var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2745 	  new_stmt = SSA_NAME_DEF_STMT (var);
2746 	}
2747 
2748       if (prev_stmt_info == NULL)
2749 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2750       else
2751 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2752       prev_stmt_info = vinfo_for_stmt (new_stmt);
2753     }
2754 }
2755 
2756 /* Prepare the base and offset in GS_INFO for vectorization.
2757    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2758    to the vectorized offset argument for the first copy of STMT.  STMT
2759    is the statement described by GS_INFO and LOOP is the containing loop.  */
2760 
2761 static void
vect_get_gather_scatter_ops(struct loop * loop,gimple * stmt,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)2762 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2763 			     gather_scatter_info *gs_info,
2764 			     tree *dataref_ptr, tree *vec_offset)
2765 {
2766   gimple_seq stmts = NULL;
2767   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2768   if (stmts != NULL)
2769     {
2770       basic_block new_bb;
2771       edge pe = loop_preheader_edge (loop);
2772       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2773       gcc_assert (!new_bb);
2774     }
2775   tree offset_type = TREE_TYPE (gs_info->offset);
2776   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2777   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2778 					      offset_vectype);
2779 }
2780 
2781 /* Prepare to implement a grouped or strided load or store using
2782    the gather load or scatter store operation described by GS_INFO.
2783    STMT is the load or store statement.
2784 
2785    Set *DATAREF_BUMP to the amount that should be added to the base
2786    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2787    to an invariant offset vector in which element I has the value
2788    I * DR_STEP / SCALE.  */
2789 
2790 static void
vect_get_strided_load_store_ops(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2791 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2792 				 gather_scatter_info *gs_info,
2793 				 tree *dataref_bump, tree *vec_offset)
2794 {
2795   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2796   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2797   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2798   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2799   gimple_seq stmts;
2800 
2801   tree bump = size_binop (MULT_EXPR,
2802 			  fold_convert (sizetype, DR_STEP (dr)),
2803 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2804   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2805   if (stmts)
2806     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2807 
2808   /* The offset given in GS_INFO can have pointer type, so use the element
2809      type of the vector instead.  */
2810   tree offset_type = TREE_TYPE (gs_info->offset);
2811   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2812   offset_type = TREE_TYPE (offset_vectype);
2813 
2814   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2815   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2816 			  ssize_int (gs_info->scale));
2817   step = fold_convert (offset_type, step);
2818   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2819 
2820   /* Create {0, X, X*2, X*3, ...}.  */
2821   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2822 			      build_zero_cst (offset_type), step);
2823   if (stmts)
2824     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2825 }
2826 
2827 /* Return the amount that should be added to a vector pointer to move
2828    to the next or previous copy of AGGR_TYPE.  DR is the data reference
2829    being vectorized and MEMORY_ACCESS_TYPE describes the type of
2830    vectorization.  */
2831 
2832 static tree
vect_get_data_ptr_increment(data_reference * dr,tree aggr_type,vect_memory_access_type memory_access_type)2833 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2834 			     vect_memory_access_type memory_access_type)
2835 {
2836   if (memory_access_type == VMAT_INVARIANT)
2837     return size_zero_node;
2838 
2839   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2840   tree step = vect_dr_behavior (dr)->step;
2841   if (tree_int_cst_sgn (step) == -1)
2842     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2843   return iv_step;
2844 }
2845 
2846 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
2847 
2848 static bool
vectorizable_bswap(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,tree vectype_in,enum vect_def_type * dt)2849 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2850 		    gimple **vec_stmt, slp_tree slp_node,
2851 		    tree vectype_in, enum vect_def_type *dt)
2852 {
2853   tree op, vectype;
2854   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2855   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2856   unsigned ncopies;
2857   unsigned HOST_WIDE_INT nunits, num_bytes;
2858 
2859   op = gimple_call_arg (stmt, 0);
2860   vectype = STMT_VINFO_VECTYPE (stmt_info);
2861 
2862   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2863     return false;
2864 
2865   /* Multiple types in SLP are handled by creating the appropriate number of
2866      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2867      case of SLP.  */
2868   if (slp_node)
2869     ncopies = 1;
2870   else
2871     ncopies = vect_get_num_copies (loop_vinfo, vectype);
2872 
2873   gcc_assert (ncopies >= 1);
2874 
2875   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2876   if (! char_vectype)
2877     return false;
2878 
2879   if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2880     return false;
2881 
2882   unsigned word_bytes = num_bytes / nunits;
2883 
2884   /* The encoding uses one stepped pattern for each byte in the word.  */
2885   vec_perm_builder elts (num_bytes, word_bytes, 3);
2886   for (unsigned i = 0; i < 3; ++i)
2887     for (unsigned j = 0; j < word_bytes; ++j)
2888       elts.quick_push ((i + 1) * word_bytes - j - 1);
2889 
2890   vec_perm_indices indices (elts, 1, num_bytes);
2891   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2892     return false;
2893 
2894   if (! vec_stmt)
2895     {
2896       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2897       if (dump_enabled_p ())
2898         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2899                          "\n");
2900       if (! slp_node)
2901 	{
2902 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2903 			 1, vector_stmt, stmt_info, 0, vect_prologue);
2904 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2905 			 ncopies, vec_perm, stmt_info, 0, vect_body);
2906 	}
2907       return true;
2908     }
2909 
2910   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2911 
2912   /* Transform.  */
2913   vec<tree> vec_oprnds = vNULL;
2914   gimple *new_stmt = NULL;
2915   stmt_vec_info prev_stmt_info = NULL;
2916   for (unsigned j = 0; j < ncopies; j++)
2917     {
2918       /* Handle uses.  */
2919       if (j == 0)
2920         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2921       else
2922         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2923 
2924       /* Arguments are ready. create the new vector stmt.  */
2925       unsigned i;
2926       tree vop;
2927       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2928        {
2929 	 tree tem = make_ssa_name (char_vectype);
2930 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931 						      char_vectype, vop));
2932 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933 	 tree tem2 = make_ssa_name (char_vectype);
2934 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2935 					 tem, tem, bswap_vconst);
2936 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2937 	 tem = make_ssa_name (vectype);
2938 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2939 						      vectype, tem2));
2940 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2941          if (slp_node)
2942            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2943        }
2944 
2945       if (slp_node)
2946         continue;
2947 
2948       if (j == 0)
2949         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950       else
2951         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2952 
2953       prev_stmt_info = vinfo_for_stmt (new_stmt);
2954     }
2955 
2956   vec_oprnds.release ();
2957   return true;
2958 }
2959 
2960 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2961    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2962    in a single step.  On success, store the binary pack code in
2963    *CONVERT_CODE.  */
2964 
2965 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)2966 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2967 			  tree_code *convert_code)
2968 {
2969   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2970       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2971     return false;
2972 
2973   tree_code code;
2974   int multi_step_cvt = 0;
2975   auto_vec <tree, 8> interm_types;
2976   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2977 					&code, &multi_step_cvt,
2978 					&interm_types)
2979       || multi_step_cvt)
2980     return false;
2981 
2982   *convert_code = code;
2983   return true;
2984 }
2985 
2986 /* Function vectorizable_call.
2987 
2988    Check if GS performs a function call that can be vectorized.
2989    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2990    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2991    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2992 
2993 static bool
vectorizable_call(gimple * gs,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)2994 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2995 		   slp_tree slp_node)
2996 {
2997   gcall *stmt;
2998   tree vec_dest;
2999   tree scalar_dest;
3000   tree op, type;
3001   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3002   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3003   tree vectype_out, vectype_in;
3004   poly_uint64 nunits_in;
3005   poly_uint64 nunits_out;
3006   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3007   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3008   vec_info *vinfo = stmt_info->vinfo;
3009   tree fndecl, new_temp, rhs_type;
3010   gimple *def_stmt;
3011   enum vect_def_type dt[3]
3012     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3013   int ndts = 3;
3014   gimple *new_stmt = NULL;
3015   int ncopies, j;
3016   vec<tree> vargs = vNULL;
3017   enum { NARROW, NONE, WIDEN } modifier;
3018   size_t i, nargs;
3019   tree lhs;
3020 
3021   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3022     return false;
3023 
3024   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3025       && ! vec_stmt)
3026     return false;
3027 
3028   /* Is GS a vectorizable call?   */
3029   stmt = dyn_cast <gcall *> (gs);
3030   if (!stmt)
3031     return false;
3032 
3033   if (gimple_call_internal_p (stmt)
3034       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3035 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3036     /* Handled by vectorizable_load and vectorizable_store.  */
3037     return false;
3038 
3039   if (gimple_call_lhs (stmt) == NULL_TREE
3040       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3041     return false;
3042 
3043   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3044 
3045   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3046 
3047   /* Process function arguments.  */
3048   rhs_type = NULL_TREE;
3049   vectype_in = NULL_TREE;
3050   nargs = gimple_call_num_args (stmt);
3051 
3052   /* Bail out if the function has more than three arguments, we do not have
3053      interesting builtin functions to vectorize with more than two arguments
3054      except for fma.  No arguments is also not good.  */
3055   if (nargs == 0 || nargs > 3)
3056     return false;
3057 
3058   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3059   if (gimple_call_internal_p (stmt)
3060       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3061     {
3062       nargs = 0;
3063       rhs_type = unsigned_type_node;
3064     }
3065 
3066   for (i = 0; i < nargs; i++)
3067     {
3068       tree opvectype;
3069 
3070       op = gimple_call_arg (stmt, i);
3071 
3072       /* We can only handle calls with arguments of the same type.  */
3073       if (rhs_type
3074 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3075 	{
3076 	  if (dump_enabled_p ())
3077 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3078                              "argument types differ.\n");
3079 	  return false;
3080 	}
3081       if (!rhs_type)
3082 	rhs_type = TREE_TYPE (op);
3083 
3084       if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3085 	{
3086 	  if (dump_enabled_p ())
3087 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3088                              "use not simple.\n");
3089 	  return false;
3090 	}
3091 
3092       if (!vectype_in)
3093 	vectype_in = opvectype;
3094       else if (opvectype
3095 	       && opvectype != vectype_in)
3096 	{
3097 	  if (dump_enabled_p ())
3098 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3099                              "argument vector types differ.\n");
3100 	  return false;
3101 	}
3102     }
3103   /* If all arguments are external or constant defs use a vector type with
3104      the same size as the output vector type.  */
3105   if (!vectype_in)
3106     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3107   if (vec_stmt)
3108     gcc_assert (vectype_in);
3109   if (!vectype_in)
3110     {
3111       if (dump_enabled_p ())
3112         {
3113           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3114                            "no vectype for scalar type ");
3115           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3116           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3117         }
3118 
3119       return false;
3120     }
3121 
3122   /* FORNOW */
3123   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3124   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3125   if (known_eq (nunits_in * 2, nunits_out))
3126     modifier = NARROW;
3127   else if (known_eq (nunits_out, nunits_in))
3128     modifier = NONE;
3129   else if (known_eq (nunits_out * 2, nunits_in))
3130     modifier = WIDEN;
3131   else
3132     return false;
3133 
3134   /* We only handle functions that do not read or clobber memory.  */
3135   if (gimple_vuse (stmt))
3136     {
3137       if (dump_enabled_p ())
3138 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3139 			 "function reads from or writes to memory.\n");
3140       return false;
3141     }
3142 
3143   /* For now, we only vectorize functions if a target specific builtin
3144      is available.  TODO -- in some cases, it might be profitable to
3145      insert the calls for pieces of the vector, in order to be able
3146      to vectorize other operations in the loop.  */
3147   fndecl = NULL_TREE;
3148   internal_fn ifn = IFN_LAST;
3149   combined_fn cfn = gimple_call_combined_fn (stmt);
3150   tree callee = gimple_call_fndecl (stmt);
3151 
3152   /* First try using an internal function.  */
3153   tree_code convert_code = ERROR_MARK;
3154   if (cfn != CFN_LAST
3155       && (modifier == NONE
3156 	  || (modifier == NARROW
3157 	      && simple_integer_narrowing (vectype_out, vectype_in,
3158 					   &convert_code))))
3159     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3160 					  vectype_in);
3161 
3162   /* If that fails, try asking for a target-specific built-in function.  */
3163   if (ifn == IFN_LAST)
3164     {
3165       if (cfn != CFN_LAST)
3166 	fndecl = targetm.vectorize.builtin_vectorized_function
3167 	  (cfn, vectype_out, vectype_in);
3168       else if (callee)
3169 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3170 	  (callee, vectype_out, vectype_in);
3171     }
3172 
3173   if (ifn == IFN_LAST && !fndecl)
3174     {
3175       if (cfn == CFN_GOMP_SIMD_LANE
3176 	  && !slp_node
3177 	  && loop_vinfo
3178 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3179 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3180 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3181 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3182 	{
3183 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3184 	     { 0, 1, 2, ... vf - 1 } vector.  */
3185 	  gcc_assert (nargs == 0);
3186 	}
3187       else if (modifier == NONE
3188 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3189 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3190 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3191 	return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3192 				   vectype_in, dt);
3193       else
3194 	{
3195 	  if (dump_enabled_p ())
3196 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 			     "function is not vectorizable.\n");
3198 	  return false;
3199 	}
3200     }
3201 
3202   if (slp_node)
3203     ncopies = 1;
3204   else if (modifier == NARROW && ifn == IFN_LAST)
3205     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3206   else
3207     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3208 
3209   /* Sanity check: make sure that at least one copy of the vectorized stmt
3210      needs to be generated.  */
3211   gcc_assert (ncopies >= 1);
3212 
3213   if (!vec_stmt) /* transformation not required.  */
3214     {
3215       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3216       if (dump_enabled_p ())
3217         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3218                          "\n");
3219       if (!slp_node)
3220 	{
3221 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3222 	  if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3223 	    add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3224 			   vec_promote_demote, stmt_info, 0, vect_body);
3225 	}
3226 
3227       return true;
3228     }
3229 
3230   /* Transform.  */
3231 
3232   if (dump_enabled_p ())
3233     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3234 
3235   /* Handle def.  */
3236   scalar_dest = gimple_call_lhs (stmt);
3237   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3238 
3239   prev_stmt_info = NULL;
3240   if (modifier == NONE || ifn != IFN_LAST)
3241     {
3242       tree prev_res = NULL_TREE;
3243       for (j = 0; j < ncopies; ++j)
3244 	{
3245 	  /* Build argument list for the vectorized call.  */
3246 	  if (j == 0)
3247 	    vargs.create (nargs);
3248 	  else
3249 	    vargs.truncate (0);
3250 
3251 	  if (slp_node)
3252 	    {
3253 	      auto_vec<vec<tree> > vec_defs (nargs);
3254 	      vec<tree> vec_oprnds0;
3255 
3256 	      for (i = 0; i < nargs; i++)
3257 		vargs.quick_push (gimple_call_arg (stmt, i));
3258 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3259 	      vec_oprnds0 = vec_defs[0];
3260 
3261 	      /* Arguments are ready.  Create the new vector stmt.  */
3262 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3263 		{
3264 		  size_t k;
3265 		  for (k = 0; k < nargs; k++)
3266 		    {
3267 		      vec<tree> vec_oprndsk = vec_defs[k];
3268 		      vargs[k] = vec_oprndsk[i];
3269 		    }
3270 		  if (modifier == NARROW)
3271 		    {
3272 		      tree half_res = make_ssa_name (vectype_in);
3273 		      gcall *call
3274 			= gimple_build_call_internal_vec (ifn, vargs);
3275 		      gimple_call_set_lhs (call, half_res);
3276 		      gimple_call_set_nothrow (call, true);
3277 		      new_stmt = call;
3278 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3279 		      if ((i & 1) == 0)
3280 			{
3281 			  prev_res = half_res;
3282 			  continue;
3283 			}
3284 		      new_temp = make_ssa_name (vec_dest);
3285 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3286 						      prev_res, half_res);
3287 		    }
3288 		  else
3289 		    {
3290 		      gcall *call;
3291 		      if (ifn != IFN_LAST)
3292 			call = gimple_build_call_internal_vec (ifn, vargs);
3293 		      else
3294 			call = gimple_build_call_vec (fndecl, vargs);
3295 		      new_temp = make_ssa_name (vec_dest, call);
3296 		      gimple_call_set_lhs (call, new_temp);
3297 		      gimple_call_set_nothrow (call, true);
3298 		      new_stmt = call;
3299 		    }
3300 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3301 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3302 		}
3303 
3304 	      for (i = 0; i < nargs; i++)
3305 		{
3306 		  vec<tree> vec_oprndsi = vec_defs[i];
3307 		  vec_oprndsi.release ();
3308 		}
3309 	      continue;
3310 	    }
3311 
3312 	  for (i = 0; i < nargs; i++)
3313 	    {
3314 	      op = gimple_call_arg (stmt, i);
3315 	      if (j == 0)
3316 		vec_oprnd0
3317 		  = vect_get_vec_def_for_operand (op, stmt);
3318 	      else
3319 		{
3320 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
3321 		  vec_oprnd0
3322                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3323 		}
3324 
3325 	      vargs.quick_push (vec_oprnd0);
3326 	    }
3327 
3328 	  if (gimple_call_internal_p (stmt)
3329 	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3330 	    {
3331 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3332 	      tree new_var
3333 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3334 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3335 	      vect_init_vector_1 (stmt, init_stmt, NULL);
3336 	      new_temp = make_ssa_name (vec_dest);
3337 	      new_stmt = gimple_build_assign (new_temp, new_var);
3338 	    }
3339 	  else if (modifier == NARROW)
3340 	    {
3341 	      tree half_res = make_ssa_name (vectype_in);
3342 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3343 	      gimple_call_set_lhs (call, half_res);
3344 	      gimple_call_set_nothrow (call, true);
3345 	      new_stmt = call;
3346 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3347 	      if ((j & 1) == 0)
3348 		{
3349 		  prev_res = half_res;
3350 		  continue;
3351 		}
3352 	      new_temp = make_ssa_name (vec_dest);
3353 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3354 					      prev_res, half_res);
3355 	    }
3356 	  else
3357 	    {
3358 	      gcall *call;
3359 	      if (ifn != IFN_LAST)
3360 		call = gimple_build_call_internal_vec (ifn, vargs);
3361 	      else
3362 		call = gimple_build_call_vec (fndecl, vargs);
3363 	      new_temp = make_ssa_name (vec_dest, new_stmt);
3364 	      gimple_call_set_lhs (call, new_temp);
3365 	      gimple_call_set_nothrow (call, true);
3366 	      new_stmt = call;
3367 	    }
3368 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3369 
3370 	  if (j == (modifier == NARROW ? 1 : 0))
3371 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3372 	  else
3373 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3374 
3375 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3376 	}
3377     }
3378   else if (modifier == NARROW)
3379     {
3380       for (j = 0; j < ncopies; ++j)
3381 	{
3382 	  /* Build argument list for the vectorized call.  */
3383 	  if (j == 0)
3384 	    vargs.create (nargs * 2);
3385 	  else
3386 	    vargs.truncate (0);
3387 
3388 	  if (slp_node)
3389 	    {
3390 	      auto_vec<vec<tree> > vec_defs (nargs);
3391 	      vec<tree> vec_oprnds0;
3392 
3393 	      for (i = 0; i < nargs; i++)
3394 		vargs.quick_push (gimple_call_arg (stmt, i));
3395 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3396 	      vec_oprnds0 = vec_defs[0];
3397 
3398 	      /* Arguments are ready.  Create the new vector stmt.  */
3399 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3400 		{
3401 		  size_t k;
3402 		  vargs.truncate (0);
3403 		  for (k = 0; k < nargs; k++)
3404 		    {
3405 		      vec<tree> vec_oprndsk = vec_defs[k];
3406 		      vargs.quick_push (vec_oprndsk[i]);
3407 		      vargs.quick_push (vec_oprndsk[i + 1]);
3408 		    }
3409 		  gcall *call;
3410 		  if (ifn != IFN_LAST)
3411 		    call = gimple_build_call_internal_vec (ifn, vargs);
3412 		  else
3413 		    call = gimple_build_call_vec (fndecl, vargs);
3414 		  new_temp = make_ssa_name (vec_dest, call);
3415 		  gimple_call_set_lhs (call, new_temp);
3416 		  gimple_call_set_nothrow (call, true);
3417 		  new_stmt = call;
3418 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3419 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3420 		}
3421 
3422 	      for (i = 0; i < nargs; i++)
3423 		{
3424 		  vec<tree> vec_oprndsi = vec_defs[i];
3425 		  vec_oprndsi.release ();
3426 		}
3427 	      continue;
3428 	    }
3429 
3430 	  for (i = 0; i < nargs; i++)
3431 	    {
3432 	      op = gimple_call_arg (stmt, i);
3433 	      if (j == 0)
3434 		{
3435 		  vec_oprnd0
3436 		    = vect_get_vec_def_for_operand (op, stmt);
3437 		  vec_oprnd1
3438 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3439 		}
3440 	      else
3441 		{
3442 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3443 		  vec_oprnd0
3444 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3445 		  vec_oprnd1
3446 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3447 		}
3448 
3449 	      vargs.quick_push (vec_oprnd0);
3450 	      vargs.quick_push (vec_oprnd1);
3451 	    }
3452 
3453 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
3454 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3455 	  gimple_call_set_lhs (new_stmt, new_temp);
3456 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3457 
3458 	  if (j == 0)
3459 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3460 	  else
3461 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3462 
3463 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3464 	}
3465 
3466       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3467     }
3468   else
3469     /* No current target implements this case.  */
3470     return false;
3471 
3472   vargs.release ();
3473 
3474   /* The call in STMT might prevent it from being removed in dce.
3475      We however cannot remove it here, due to the way the ssa name
3476      it defines is mapped to the new definition.  So just replace
3477      rhs of the statement with something harmless.  */
3478 
3479   if (slp_node)
3480     return true;
3481 
3482   type = TREE_TYPE (scalar_dest);
3483   if (is_pattern_stmt_p (stmt_info))
3484     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3485   else
3486     lhs = gimple_call_lhs (stmt);
3487 
3488   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3489   set_vinfo_for_stmt (new_stmt, stmt_info);
3490   set_vinfo_for_stmt (stmt, NULL);
3491   STMT_VINFO_STMT (stmt_info) = new_stmt;
3492   gsi_replace (gsi, new_stmt, false);
3493 
3494   return true;
3495 }
3496 
3497 
3498 struct simd_call_arg_info
3499 {
3500   tree vectype;
3501   tree op;
3502   HOST_WIDE_INT linear_step;
3503   enum vect_def_type dt;
3504   unsigned int align;
3505   bool simd_lane_linear;
3506 };
3507 
3508 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3509    is linear within simd lane (but not within whole loop), note it in
3510    *ARGINFO.  */
3511 
3512 static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)3513 vect_simd_lane_linear (tree op, struct loop *loop,
3514 		       struct simd_call_arg_info *arginfo)
3515 {
3516   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3517 
3518   if (!is_gimple_assign (def_stmt)
3519       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3520       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3521     return;
3522 
3523   tree base = gimple_assign_rhs1 (def_stmt);
3524   HOST_WIDE_INT linear_step = 0;
3525   tree v = gimple_assign_rhs2 (def_stmt);
3526   while (TREE_CODE (v) == SSA_NAME)
3527     {
3528       tree t;
3529       def_stmt = SSA_NAME_DEF_STMT (v);
3530       if (is_gimple_assign (def_stmt))
3531 	switch (gimple_assign_rhs_code (def_stmt))
3532 	  {
3533 	  case PLUS_EXPR:
3534 	    t = gimple_assign_rhs2 (def_stmt);
3535 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3536 	      return;
3537 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3538 	    v = gimple_assign_rhs1 (def_stmt);
3539 	    continue;
3540 	  case MULT_EXPR:
3541 	    t = gimple_assign_rhs2 (def_stmt);
3542 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3543 	      return;
3544 	    linear_step = tree_to_shwi (t);
3545 	    v = gimple_assign_rhs1 (def_stmt);
3546 	    continue;
3547 	  CASE_CONVERT:
3548 	    t = gimple_assign_rhs1 (def_stmt);
3549 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3550 		|| (TYPE_PRECISION (TREE_TYPE (v))
3551 		    < TYPE_PRECISION (TREE_TYPE (t))))
3552 	      return;
3553 	    if (!linear_step)
3554 	      linear_step = 1;
3555 	    v = t;
3556 	    continue;
3557 	  default:
3558 	    return;
3559 	  }
3560       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3561 	       && loop->simduid
3562 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3563 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3564 		   == loop->simduid))
3565 	{
3566 	  if (!linear_step)
3567 	    linear_step = 1;
3568 	  arginfo->linear_step = linear_step;
3569 	  arginfo->op = base;
3570 	  arginfo->simd_lane_linear = true;
3571 	  return;
3572 	}
3573     }
3574 }
3575 
3576 /* Return the number of elements in vector type VECTYPE, which is associated
3577    with a SIMD clone.  At present these vectors always have a constant
3578    length.  */
3579 
3580 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3581 simd_clone_subparts (tree vectype)
3582 {
3583   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3584 }
3585 
3586 /* Function vectorizable_simd_clone_call.
3587 
3588    Check if STMT performs a function call that can be vectorized
3589    by calling a simd clone of the function.
3590    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3591    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3592    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3593 
3594 static bool
vectorizable_simd_clone_call(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)3595 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3596 			      gimple **vec_stmt, slp_tree slp_node)
3597 {
3598   tree vec_dest;
3599   tree scalar_dest;
3600   tree op, type;
3601   tree vec_oprnd0 = NULL_TREE;
3602   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3603   tree vectype;
3604   unsigned int nunits;
3605   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3606   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3607   vec_info *vinfo = stmt_info->vinfo;
3608   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3609   tree fndecl, new_temp;
3610   gimple *def_stmt;
3611   gimple *new_stmt = NULL;
3612   int ncopies, j;
3613   auto_vec<simd_call_arg_info> arginfo;
3614   vec<tree> vargs = vNULL;
3615   size_t i, nargs;
3616   tree lhs, rtype, ratype;
3617   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3618 
3619   /* Is STMT a vectorizable call?   */
3620   if (!is_gimple_call (stmt))
3621     return false;
3622 
3623   fndecl = gimple_call_fndecl (stmt);
3624   if (fndecl == NULL_TREE)
3625     return false;
3626 
3627   struct cgraph_node *node = cgraph_node::get (fndecl);
3628   if (node == NULL || node->simd_clones == NULL)
3629     return false;
3630 
3631   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3632     return false;
3633 
3634   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3635       && ! vec_stmt)
3636     return false;
3637 
3638   if (gimple_call_lhs (stmt)
3639       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3640     return false;
3641 
3642   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3643 
3644   vectype = STMT_VINFO_VECTYPE (stmt_info);
3645 
3646   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3647     return false;
3648 
3649   /* FORNOW */
3650   if (slp_node)
3651     return false;
3652 
3653   /* Process function arguments.  */
3654   nargs = gimple_call_num_args (stmt);
3655 
3656   /* Bail out if the function has zero arguments.  */
3657   if (nargs == 0)
3658     return false;
3659 
3660   arginfo.reserve (nargs, true);
3661 
3662   for (i = 0; i < nargs; i++)
3663     {
3664       simd_call_arg_info thisarginfo;
3665       affine_iv iv;
3666 
3667       thisarginfo.linear_step = 0;
3668       thisarginfo.align = 0;
3669       thisarginfo.op = NULL_TREE;
3670       thisarginfo.simd_lane_linear = false;
3671 
3672       op = gimple_call_arg (stmt, i);
3673       if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3674 			       &thisarginfo.vectype)
3675 	  || thisarginfo.dt == vect_uninitialized_def)
3676 	{
3677 	  if (dump_enabled_p ())
3678 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3679 			     "use not simple.\n");
3680 	  return false;
3681 	}
3682 
3683       if (thisarginfo.dt == vect_constant_def
3684 	  || thisarginfo.dt == vect_external_def)
3685 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3686       else
3687 	gcc_assert (thisarginfo.vectype != NULL_TREE);
3688 
3689       /* For linear arguments, the analyze phase should have saved
3690 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3691       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3692 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3693 	{
3694 	  gcc_assert (vec_stmt);
3695 	  thisarginfo.linear_step
3696 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3697 	  thisarginfo.op
3698 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3699 	  thisarginfo.simd_lane_linear
3700 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3701 	       == boolean_true_node);
3702 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3703 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3704 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3705 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3706 	    {
3707 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3708 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3709 	      tree opt = TREE_TYPE (thisarginfo.op);
3710 	      bias = fold_convert (TREE_TYPE (step), bias);
3711 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3712 	      thisarginfo.op
3713 		= fold_build2 (POINTER_TYPE_P (opt)
3714 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3715 			       thisarginfo.op, bias);
3716 	    }
3717 	}
3718       else if (!vec_stmt
3719 	       && thisarginfo.dt != vect_constant_def
3720 	       && thisarginfo.dt != vect_external_def
3721 	       && loop_vinfo
3722 	       && TREE_CODE (op) == SSA_NAME
3723 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3724 			     &iv, false)
3725 	       && tree_fits_shwi_p (iv.step))
3726 	{
3727 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3728 	  thisarginfo.op = iv.base;
3729 	}
3730       else if ((thisarginfo.dt == vect_constant_def
3731 		|| thisarginfo.dt == vect_external_def)
3732 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3733 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3734       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3735 	 linear too.  */
3736       if (POINTER_TYPE_P (TREE_TYPE (op))
3737 	  && !thisarginfo.linear_step
3738 	  && !vec_stmt
3739 	  && thisarginfo.dt != vect_constant_def
3740 	  && thisarginfo.dt != vect_external_def
3741 	  && loop_vinfo
3742 	  && !slp_node
3743 	  && TREE_CODE (op) == SSA_NAME)
3744 	vect_simd_lane_linear (op, loop, &thisarginfo);
3745 
3746       arginfo.quick_push (thisarginfo);
3747     }
3748 
3749   unsigned HOST_WIDE_INT vf;
3750   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3751     {
3752       if (dump_enabled_p ())
3753 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3754 			 "not considering SIMD clones; not yet supported"
3755 			 " for variable-width vectors.\n");
3756       return false;
3757     }
3758 
3759   unsigned int badness = 0;
3760   struct cgraph_node *bestn = NULL;
3761   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3762     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3763   else
3764     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3765 	 n = n->simdclone->next_clone)
3766       {
3767 	unsigned int this_badness = 0;
3768 	if (n->simdclone->simdlen > vf
3769 	    || n->simdclone->nargs != nargs)
3770 	  continue;
3771 	if (n->simdclone->simdlen < vf)
3772 	  this_badness += (exact_log2 (vf)
3773 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
3774 	if (n->simdclone->inbranch)
3775 	  this_badness += 2048;
3776 	int target_badness = targetm.simd_clone.usable (n);
3777 	if (target_badness < 0)
3778 	  continue;
3779 	this_badness += target_badness * 512;
3780 	/* FORNOW: Have to add code to add the mask argument.  */
3781 	if (n->simdclone->inbranch)
3782 	  continue;
3783 	for (i = 0; i < nargs; i++)
3784 	  {
3785 	    switch (n->simdclone->args[i].arg_type)
3786 	      {
3787 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
3788 		if (!useless_type_conversion_p
3789 			(n->simdclone->args[i].orig_type,
3790 			 TREE_TYPE (gimple_call_arg (stmt, i))))
3791 		  i = -1;
3792 		else if (arginfo[i].dt == vect_constant_def
3793 			 || arginfo[i].dt == vect_external_def
3794 			 || arginfo[i].linear_step)
3795 		  this_badness += 64;
3796 		break;
3797 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
3798 		if (arginfo[i].dt != vect_constant_def
3799 		    && arginfo[i].dt != vect_external_def)
3800 		  i = -1;
3801 		break;
3802 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3803 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3804 		if (arginfo[i].dt == vect_constant_def
3805 		    || arginfo[i].dt == vect_external_def
3806 		    || (arginfo[i].linear_step
3807 			!= n->simdclone->args[i].linear_step))
3808 		  i = -1;
3809 		break;
3810 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3811 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3812 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3813 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3814 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3815 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3816 		/* FORNOW */
3817 		i = -1;
3818 		break;
3819 	      case SIMD_CLONE_ARG_TYPE_MASK:
3820 		gcc_unreachable ();
3821 	      }
3822 	    if (i == (size_t) -1)
3823 	      break;
3824 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
3825 	      {
3826 		i = -1;
3827 		break;
3828 	      }
3829 	    if (arginfo[i].align)
3830 	      this_badness += (exact_log2 (arginfo[i].align)
3831 			       - exact_log2 (n->simdclone->args[i].alignment));
3832 	  }
3833 	if (i == (size_t) -1)
3834 	  continue;
3835 	if (bestn == NULL || this_badness < badness)
3836 	  {
3837 	    bestn = n;
3838 	    badness = this_badness;
3839 	  }
3840       }
3841 
3842   if (bestn == NULL)
3843     return false;
3844 
3845   for (i = 0; i < nargs; i++)
3846     if ((arginfo[i].dt == vect_constant_def
3847 	 || arginfo[i].dt == vect_external_def)
3848 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3849       {
3850 	arginfo[i].vectype
3851 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3852 								     i)));
3853 	if (arginfo[i].vectype == NULL
3854 	    || (simd_clone_subparts (arginfo[i].vectype)
3855 		> bestn->simdclone->simdlen))
3856 	  return false;
3857       }
3858 
3859   fndecl = bestn->decl;
3860   nunits = bestn->simdclone->simdlen;
3861   ncopies = vf / nunits;
3862 
3863   /* If the function isn't const, only allow it in simd loops where user
3864      has asserted that at least nunits consecutive iterations can be
3865      performed using SIMD instructions.  */
3866   if ((loop == NULL || (unsigned) loop->safelen < nunits)
3867       && gimple_vuse (stmt))
3868     return false;
3869 
3870   /* Sanity check: make sure that at least one copy of the vectorized stmt
3871      needs to be generated.  */
3872   gcc_assert (ncopies >= 1);
3873 
3874   if (!vec_stmt) /* transformation not required.  */
3875     {
3876       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3877       for (i = 0; i < nargs; i++)
3878 	if ((bestn->simdclone->args[i].arg_type
3879 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3880 	    || (bestn->simdclone->args[i].arg_type
3881 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3882 	  {
3883 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3884 									+ 1);
3885 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3886 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3887 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
3888 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
3889 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3890 	    tree sll = arginfo[i].simd_lane_linear
3891 		       ? boolean_true_node : boolean_false_node;
3892 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3893 	  }
3894       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3895       if (dump_enabled_p ())
3896 	dump_printf_loc (MSG_NOTE, vect_location,
3897 			 "=== vectorizable_simd_clone_call ===\n");
3898 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3899       return true;
3900     }
3901 
3902   /* Transform.  */
3903 
3904   if (dump_enabled_p ())
3905     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3906 
3907   /* Handle def.  */
3908   scalar_dest = gimple_call_lhs (stmt);
3909   vec_dest = NULL_TREE;
3910   rtype = NULL_TREE;
3911   ratype = NULL_TREE;
3912   if (scalar_dest)
3913     {
3914       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3915       rtype = TREE_TYPE (TREE_TYPE (fndecl));
3916       if (TREE_CODE (rtype) == ARRAY_TYPE)
3917 	{
3918 	  ratype = rtype;
3919 	  rtype = TREE_TYPE (ratype);
3920 	}
3921     }
3922 
3923   prev_stmt_info = NULL;
3924   for (j = 0; j < ncopies; ++j)
3925     {
3926       /* Build argument list for the vectorized call.  */
3927       if (j == 0)
3928 	vargs.create (nargs);
3929       else
3930 	vargs.truncate (0);
3931 
3932       for (i = 0; i < nargs; i++)
3933 	{
3934 	  unsigned int k, l, m, o;
3935 	  tree atype;
3936 	  op = gimple_call_arg (stmt, i);
3937 	  switch (bestn->simdclone->args[i].arg_type)
3938 	    {
3939 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
3940 	      atype = bestn->simdclone->args[i].vector_type;
3941 	      o = nunits / simd_clone_subparts (atype);
3942 	      for (m = j * o; m < (j + 1) * o; m++)
3943 		{
3944 		  if (simd_clone_subparts (atype)
3945 		      < simd_clone_subparts (arginfo[i].vectype))
3946 		    {
3947 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3948 		      k = (simd_clone_subparts (arginfo[i].vectype)
3949 			   / simd_clone_subparts (atype));
3950 		      gcc_assert ((k & (k - 1)) == 0);
3951 		      if (m == 0)
3952 			vec_oprnd0
3953 			  = vect_get_vec_def_for_operand (op, stmt);
3954 		      else
3955 			{
3956 			  vec_oprnd0 = arginfo[i].op;
3957 			  if ((m & (k - 1)) == 0)
3958 			    vec_oprnd0
3959 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3960 								vec_oprnd0);
3961 			}
3962 		      arginfo[i].op = vec_oprnd0;
3963 		      vec_oprnd0
3964 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3965 				  bitsize_int (prec),
3966 				  bitsize_int ((m & (k - 1)) * prec));
3967 		      new_stmt
3968 			= gimple_build_assign (make_ssa_name (atype),
3969 					       vec_oprnd0);
3970 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3971 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
3972 		    }
3973 		  else
3974 		    {
3975 		      k = (simd_clone_subparts (atype)
3976 			   / simd_clone_subparts (arginfo[i].vectype));
3977 		      gcc_assert ((k & (k - 1)) == 0);
3978 		      vec<constructor_elt, va_gc> *ctor_elts;
3979 		      if (k != 1)
3980 			vec_alloc (ctor_elts, k);
3981 		      else
3982 			ctor_elts = NULL;
3983 		      for (l = 0; l < k; l++)
3984 			{
3985 			  if (m == 0 && l == 0)
3986 			    vec_oprnd0
3987 			      = vect_get_vec_def_for_operand (op, stmt);
3988 			  else
3989 			    vec_oprnd0
3990 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3991 								arginfo[i].op);
3992 			  arginfo[i].op = vec_oprnd0;
3993 			  if (k == 1)
3994 			    break;
3995 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3996 						  vec_oprnd0);
3997 			}
3998 		      if (k == 1)
3999 			vargs.safe_push (vec_oprnd0);
4000 		      else
4001 			{
4002 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4003 			  new_stmt
4004 			    = gimple_build_assign (make_ssa_name (atype),
4005 						   vec_oprnd0);
4006 			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4007 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4008 			}
4009 		    }
4010 		}
4011 	      break;
4012 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4013 	      vargs.safe_push (op);
4014 	      break;
4015 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4016 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4017 	      if (j == 0)
4018 		{
4019 		  gimple_seq stmts;
4020 		  arginfo[i].op
4021 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
4022 					    NULL_TREE);
4023 		  if (stmts != NULL)
4024 		    {
4025 		      basic_block new_bb;
4026 		      edge pe = loop_preheader_edge (loop);
4027 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4028 		      gcc_assert (!new_bb);
4029 		    }
4030 		  if (arginfo[i].simd_lane_linear)
4031 		    {
4032 		      vargs.safe_push (arginfo[i].op);
4033 		      break;
4034 		    }
4035 		  tree phi_res = copy_ssa_name (op);
4036 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4037 		  set_vinfo_for_stmt (new_phi,
4038 				      new_stmt_vec_info (new_phi, loop_vinfo));
4039 		  add_phi_arg (new_phi, arginfo[i].op,
4040 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4041 		  enum tree_code code
4042 		    = POINTER_TYPE_P (TREE_TYPE (op))
4043 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4044 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4045 			      ? sizetype : TREE_TYPE (op);
4046 		  widest_int cst
4047 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4048 			       ncopies * nunits);
4049 		  tree tcst = wide_int_to_tree (type, cst);
4050 		  tree phi_arg = copy_ssa_name (op);
4051 		  new_stmt
4052 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4053 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4054 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4055 		  set_vinfo_for_stmt (new_stmt,
4056 				      new_stmt_vec_info (new_stmt, loop_vinfo));
4057 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4058 			       UNKNOWN_LOCATION);
4059 		  arginfo[i].op = phi_res;
4060 		  vargs.safe_push (phi_res);
4061 		}
4062 	      else
4063 		{
4064 		  enum tree_code code
4065 		    = POINTER_TYPE_P (TREE_TYPE (op))
4066 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4067 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4068 			      ? sizetype : TREE_TYPE (op);
4069 		  widest_int cst
4070 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4071 			       j * nunits);
4072 		  tree tcst = wide_int_to_tree (type, cst);
4073 		  new_temp = make_ssa_name (TREE_TYPE (op));
4074 		  new_stmt = gimple_build_assign (new_temp, code,
4075 						  arginfo[i].op, tcst);
4076 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4077 		  vargs.safe_push (new_temp);
4078 		}
4079 	      break;
4080 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4081 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4082 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4083 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4084 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4085 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4086 	    default:
4087 	      gcc_unreachable ();
4088 	    }
4089 	}
4090 
4091       new_stmt = gimple_build_call_vec (fndecl, vargs);
4092       if (vec_dest)
4093 	{
4094 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4095 	  if (ratype)
4096 	    new_temp = create_tmp_var (ratype);
4097 	  else if (simd_clone_subparts (vectype)
4098 		   == simd_clone_subparts (rtype))
4099 	    new_temp = make_ssa_name (vec_dest, new_stmt);
4100 	  else
4101 	    new_temp = make_ssa_name (rtype, new_stmt);
4102 	  gimple_call_set_lhs (new_stmt, new_temp);
4103 	}
4104       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4105 
4106       if (vec_dest)
4107 	{
4108 	  if (simd_clone_subparts (vectype) < nunits)
4109 	    {
4110 	      unsigned int k, l;
4111 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4112 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4113 	      k = nunits / simd_clone_subparts (vectype);
4114 	      gcc_assert ((k & (k - 1)) == 0);
4115 	      for (l = 0; l < k; l++)
4116 		{
4117 		  tree t;
4118 		  if (ratype)
4119 		    {
4120 		      t = build_fold_addr_expr (new_temp);
4121 		      t = build2 (MEM_REF, vectype, t,
4122 				  build_int_cst (TREE_TYPE (t), l * bytes));
4123 		    }
4124 		  else
4125 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4126 				bitsize_int (prec), bitsize_int (l * prec));
4127 		  new_stmt
4128 		    = gimple_build_assign (make_ssa_name (vectype), t);
4129 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4130 		  if (j == 0 && l == 0)
4131 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4132 		  else
4133 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4134 
4135 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4136 		}
4137 
4138 	      if (ratype)
4139 		{
4140 		  tree clobber = build_constructor (ratype, NULL);
4141 		  TREE_THIS_VOLATILE (clobber) = 1;
4142 		  new_stmt = gimple_build_assign (new_temp, clobber);
4143 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4144 		}
4145 	      continue;
4146 	    }
4147 	  else if (simd_clone_subparts (vectype) > nunits)
4148 	    {
4149 	      unsigned int k = (simd_clone_subparts (vectype)
4150 				/ simd_clone_subparts (rtype));
4151 	      gcc_assert ((k & (k - 1)) == 0);
4152 	      if ((j & (k - 1)) == 0)
4153 		vec_alloc (ret_ctor_elts, k);
4154 	      if (ratype)
4155 		{
4156 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4157 		  for (m = 0; m < o; m++)
4158 		    {
4159 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4160 					 size_int (m), NULL_TREE, NULL_TREE);
4161 		      new_stmt
4162 			= gimple_build_assign (make_ssa_name (rtype), tem);
4163 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4164 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4165 					      gimple_assign_lhs (new_stmt));
4166 		    }
4167 		  tree clobber = build_constructor (ratype, NULL);
4168 		  TREE_THIS_VOLATILE (clobber) = 1;
4169 		  new_stmt = gimple_build_assign (new_temp, clobber);
4170 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 		}
4172 	      else
4173 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4174 	      if ((j & (k - 1)) != k - 1)
4175 		continue;
4176 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4177 	      new_stmt
4178 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4179 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4180 
4181 	      if ((unsigned) j == k - 1)
4182 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4183 	      else
4184 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185 
4186 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
4187 	      continue;
4188 	    }
4189 	  else if (ratype)
4190 	    {
4191 	      tree t = build_fold_addr_expr (new_temp);
4192 	      t = build2 (MEM_REF, vectype, t,
4193 			  build_int_cst (TREE_TYPE (t), 0));
4194 	      new_stmt
4195 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4196 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4197 	      tree clobber = build_constructor (ratype, NULL);
4198 	      TREE_THIS_VOLATILE (clobber) = 1;
4199 	      vect_finish_stmt_generation (stmt,
4200 					   gimple_build_assign (new_temp,
4201 								clobber), gsi);
4202 	    }
4203 	}
4204 
4205       if (j == 0)
4206 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4207       else
4208 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4209 
4210       prev_stmt_info = vinfo_for_stmt (new_stmt);
4211     }
4212 
4213   vargs.release ();
4214 
4215   /* The call in STMT might prevent it from being removed in dce.
4216      We however cannot remove it here, due to the way the ssa name
4217      it defines is mapped to the new definition.  So just replace
4218      rhs of the statement with something harmless.  */
4219 
4220   if (slp_node)
4221     return true;
4222 
4223   if (scalar_dest)
4224     {
4225       type = TREE_TYPE (scalar_dest);
4226       if (is_pattern_stmt_p (stmt_info))
4227 	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4228       else
4229 	lhs = gimple_call_lhs (stmt);
4230       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4231     }
4232   else
4233     new_stmt = gimple_build_nop ();
4234   set_vinfo_for_stmt (new_stmt, stmt_info);
4235   set_vinfo_for_stmt (stmt, NULL);
4236   STMT_VINFO_STMT (stmt_info) = new_stmt;
4237   gsi_replace (gsi, new_stmt, true);
4238   unlink_stmt_vdef (stmt);
4239 
4240   return true;
4241 }
4242 
4243 
4244 /* Function vect_gen_widened_results_half
4245 
4246    Create a vector stmt whose code, type, number of arguments, and result
4247    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4248    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4249    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4250    needs to be created (DECL is a function-decl of a target-builtin).
4251    STMT is the original scalar stmt that we are vectorizing.  */
4252 
4253 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple * stmt)4254 vect_gen_widened_results_half (enum tree_code code,
4255 			       tree decl,
4256                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4257 			       tree vec_dest, gimple_stmt_iterator *gsi,
4258 			       gimple *stmt)
4259 {
4260   gimple *new_stmt;
4261   tree new_temp;
4262 
4263   /* Generate half of the widened result:  */
4264   if (code == CALL_EXPR)
4265     {
4266       /* Target specific support  */
4267       if (op_type == binary_op)
4268 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4269       else
4270 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4271       new_temp = make_ssa_name (vec_dest, new_stmt);
4272       gimple_call_set_lhs (new_stmt, new_temp);
4273     }
4274   else
4275     {
4276       /* Generic support */
4277       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4278       if (op_type != binary_op)
4279 	vec_oprnd1 = NULL;
4280       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4281       new_temp = make_ssa_name (vec_dest, new_stmt);
4282       gimple_assign_set_lhs (new_stmt, new_temp);
4283     }
4284   vect_finish_stmt_generation (stmt, new_stmt, gsi);
4285 
4286   return new_stmt;
4287 }
4288 
4289 
4290 /* Get vectorized definitions for loop-based vectorization.  For the first
4291    operand we call vect_get_vec_def_for_operand() (with OPRND containing
4292    scalar operand), and for the rest we get a copy with
4293    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4294    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4295    The vectors are collected into VEC_OPRNDS.  */
4296 
4297 static void
vect_get_loop_based_defs(tree * oprnd,gimple * stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)4298 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4299 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4300 {
4301   tree vec_oprnd;
4302 
4303   /* Get first vector operand.  */
4304   /* All the vector operands except the very first one (that is scalar oprnd)
4305      are stmt copies.  */
4306   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4307     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4308   else
4309     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4310 
4311   vec_oprnds->quick_push (vec_oprnd);
4312 
4313   /* Get second vector operand.  */
4314   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4315   vec_oprnds->quick_push (vec_oprnd);
4316 
4317   *oprnd = vec_oprnd;
4318 
4319   /* For conversion in multiple steps, continue to get operands
4320      recursively.  */
4321   if (multi_step_cvt)
4322     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
4323 }
4324 
4325 
4326 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4327    For multi-step conversions store the resulting vectors and call the function
4328    recursively.  */
4329 
4330 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple * stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4331 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4332 				       int multi_step_cvt, gimple *stmt,
4333 				       vec<tree> vec_dsts,
4334 				       gimple_stmt_iterator *gsi,
4335 				       slp_tree slp_node, enum tree_code code,
4336 				       stmt_vec_info *prev_stmt_info)
4337 {
4338   unsigned int i;
4339   tree vop0, vop1, new_tmp, vec_dest;
4340   gimple *new_stmt;
4341   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4342 
4343   vec_dest = vec_dsts.pop ();
4344 
4345   for (i = 0; i < vec_oprnds->length (); i += 2)
4346     {
4347       /* Create demotion operation.  */
4348       vop0 = (*vec_oprnds)[i];
4349       vop1 = (*vec_oprnds)[i + 1];
4350       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4351       new_tmp = make_ssa_name (vec_dest, new_stmt);
4352       gimple_assign_set_lhs (new_stmt, new_tmp);
4353       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4354 
4355       if (multi_step_cvt)
4356 	/* Store the resulting vector for next recursive call.  */
4357 	(*vec_oprnds)[i/2] = new_tmp;
4358       else
4359 	{
4360 	  /* This is the last step of the conversion sequence. Store the
4361 	     vectors in SLP_NODE or in vector info of the scalar statement
4362 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4363 	  if (slp_node)
4364 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4365 	  else
4366 	    {
4367 	      if (!*prev_stmt_info)
4368 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4369 	      else
4370 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4371 
4372 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
4373 	    }
4374 	}
4375     }
4376 
4377   /* For multi-step demotion operations we first generate demotion operations
4378      from the source type to the intermediate types, and then combine the
4379      results (stored in VEC_OPRNDS) in demotion operation to the destination
4380      type.  */
4381   if (multi_step_cvt)
4382     {
4383       /* At each level of recursion we have half of the operands we had at the
4384 	 previous level.  */
4385       vec_oprnds->truncate ((i+1)/2);
4386       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4387 					     stmt, vec_dsts, gsi, slp_node,
4388 					     VEC_PACK_TRUNC_EXPR,
4389 					     prev_stmt_info);
4390     }
4391 
4392   vec_dsts.quick_push (vec_dest);
4393 }
4394 
4395 
4396 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4397    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
4398    the resulting vectors and call the function recursively.  */
4399 
4400 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple * stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)4401 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4402 					vec<tree> *vec_oprnds1,
4403 					gimple *stmt, tree vec_dest,
4404 					gimple_stmt_iterator *gsi,
4405 					enum tree_code code1,
4406 					enum tree_code code2, tree decl1,
4407 					tree decl2, int op_type)
4408 {
4409   int i;
4410   tree vop0, vop1, new_tmp1, new_tmp2;
4411   gimple *new_stmt1, *new_stmt2;
4412   vec<tree> vec_tmp = vNULL;
4413 
4414   vec_tmp.create (vec_oprnds0->length () * 2);
4415   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4416     {
4417       if (op_type == binary_op)
4418 	vop1 = (*vec_oprnds1)[i];
4419       else
4420 	vop1 = NULL_TREE;
4421 
4422       /* Generate the two halves of promotion operation.  */
4423       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4424 						 op_type, vec_dest, gsi, stmt);
4425       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4426 						 op_type, vec_dest, gsi, stmt);
4427       if (is_gimple_call (new_stmt1))
4428 	{
4429 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4430 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4431 	}
4432       else
4433 	{
4434 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4435 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4436 	}
4437 
4438       /* Store the results for the next step.  */
4439       vec_tmp.quick_push (new_tmp1);
4440       vec_tmp.quick_push (new_tmp2);
4441     }
4442 
4443   vec_oprnds0->release ();
4444   *vec_oprnds0 = vec_tmp;
4445 }
4446 
4447 
4448 /* Check if STMT performs a conversion operation, that can be vectorized.
4449    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4450    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4451    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4452 
4453 static bool
vectorizable_conversion(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)4454 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4455 			 gimple **vec_stmt, slp_tree slp_node)
4456 {
4457   tree vec_dest;
4458   tree scalar_dest;
4459   tree op0, op1 = NULL_TREE;
4460   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4461   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4462   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4463   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4464   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4465   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4466   tree new_temp;
4467   gimple *def_stmt;
4468   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4469   int ndts = 2;
4470   gimple *new_stmt = NULL;
4471   stmt_vec_info prev_stmt_info;
4472   poly_uint64 nunits_in;
4473   poly_uint64 nunits_out;
4474   tree vectype_out, vectype_in;
4475   int ncopies, i, j;
4476   tree lhs_type, rhs_type;
4477   enum { NARROW, NONE, WIDEN } modifier;
4478   vec<tree> vec_oprnds0 = vNULL;
4479   vec<tree> vec_oprnds1 = vNULL;
4480   tree vop0;
4481   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4482   vec_info *vinfo = stmt_info->vinfo;
4483   int multi_step_cvt = 0;
4484   vec<tree> interm_types = vNULL;
4485   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4486   int op_type;
4487   unsigned short fltsz;
4488 
4489   /* Is STMT a vectorizable conversion?   */
4490 
4491   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4492     return false;
4493 
4494   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4495       && ! vec_stmt)
4496     return false;
4497 
4498   if (!is_gimple_assign (stmt))
4499     return false;
4500 
4501   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4502     return false;
4503 
4504   code = gimple_assign_rhs_code (stmt);
4505   if (!CONVERT_EXPR_CODE_P (code)
4506       && code != FIX_TRUNC_EXPR
4507       && code != FLOAT_EXPR
4508       && code != WIDEN_MULT_EXPR
4509       && code != WIDEN_LSHIFT_EXPR)
4510     return false;
4511 
4512   op_type = TREE_CODE_LENGTH (code);
4513 
4514   /* Check types of lhs and rhs.  */
4515   scalar_dest = gimple_assign_lhs (stmt);
4516   lhs_type = TREE_TYPE (scalar_dest);
4517   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4518 
4519   op0 = gimple_assign_rhs1 (stmt);
4520   rhs_type = TREE_TYPE (op0);
4521 
4522   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4523       && !((INTEGRAL_TYPE_P (lhs_type)
4524 	    && INTEGRAL_TYPE_P (rhs_type))
4525 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4526 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4527     return false;
4528 
4529   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4530       && ((INTEGRAL_TYPE_P (lhs_type)
4531 	   && !type_has_mode_precision_p (lhs_type))
4532 	  || (INTEGRAL_TYPE_P (rhs_type)
4533 	      && !type_has_mode_precision_p (rhs_type))))
4534     {
4535       if (dump_enabled_p ())
4536 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4537                          "type conversion to/from bit-precision unsupported."
4538                          "\n");
4539       return false;
4540     }
4541 
4542   /* Check the operands of the operation.  */
4543   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4544     {
4545       if (dump_enabled_p ())
4546 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4547                          "use not simple.\n");
4548       return false;
4549     }
4550   if (op_type == binary_op)
4551     {
4552       bool ok;
4553 
4554       op1 = gimple_assign_rhs2 (stmt);
4555       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4556       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4557 	 OP1.  */
4558       if (CONSTANT_CLASS_P (op0))
4559 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4560       else
4561 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4562 
4563       if (!ok)
4564 	{
4565           if (dump_enabled_p ())
4566             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4567                              "use not simple.\n");
4568 	  return false;
4569 	}
4570     }
4571 
4572   /* If op0 is an external or constant defs use a vector type of
4573      the same size as the output vector type.  */
4574   if (!vectype_in)
4575     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4576   if (vec_stmt)
4577     gcc_assert (vectype_in);
4578   if (!vectype_in)
4579     {
4580       if (dump_enabled_p ())
4581 	{
4582 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4583                            "no vectype for scalar type ");
4584 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4585           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4586 	}
4587 
4588       return false;
4589     }
4590 
4591   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4592       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4593     {
4594       if (dump_enabled_p ())
4595 	{
4596 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4597                            "can't convert between boolean and non "
4598 			   "boolean vectors");
4599 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4600           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4601 	}
4602 
4603       return false;
4604     }
4605 
4606   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4607   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4608   if (known_eq (nunits_out, nunits_in))
4609     modifier = NONE;
4610   else if (multiple_p (nunits_out, nunits_in))
4611     modifier = NARROW;
4612   else
4613     {
4614       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4615       modifier = WIDEN;
4616     }
4617 
4618   /* Multiple types in SLP are handled by creating the appropriate number of
4619      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4620      case of SLP.  */
4621   if (slp_node)
4622     ncopies = 1;
4623   else if (modifier == NARROW)
4624     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4625   else
4626     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4627 
4628   /* Sanity check: make sure that at least one copy of the vectorized stmt
4629      needs to be generated.  */
4630   gcc_assert (ncopies >= 1);
4631 
4632   bool found_mode = false;
4633   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4634   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4635   opt_scalar_mode rhs_mode_iter;
4636 
4637   /* Supportable by target?  */
4638   switch (modifier)
4639     {
4640     case NONE:
4641       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4642 	return false;
4643       if (supportable_convert_operation (code, vectype_out, vectype_in,
4644 					 &decl1, &code1))
4645 	break;
4646       /* FALLTHRU */
4647     unsupported:
4648       if (dump_enabled_p ())
4649 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650                          "conversion not supported by target.\n");
4651       return false;
4652 
4653     case WIDEN:
4654       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4655 					  &code1, &code2, &multi_step_cvt,
4656 					  &interm_types))
4657 	{
4658 	  /* Binary widening operation can only be supported directly by the
4659 	     architecture.  */
4660 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4661 	  break;
4662 	}
4663 
4664       if (code != FLOAT_EXPR
4665 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4666 	goto unsupported;
4667 
4668       fltsz = GET_MODE_SIZE (lhs_mode);
4669       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4670 	{
4671 	  rhs_mode = rhs_mode_iter.require ();
4672 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4673 	    break;
4674 
4675 	  cvt_type
4676 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4677 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4678 	  if (cvt_type == NULL_TREE)
4679 	    goto unsupported;
4680 
4681 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4682 	    {
4683 	      if (!supportable_convert_operation (code, vectype_out,
4684 						  cvt_type, &decl1, &codecvt1))
4685 		goto unsupported;
4686 	    }
4687 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
4688 						    cvt_type, &codecvt1,
4689 						    &codecvt2, &multi_step_cvt,
4690 						    &interm_types))
4691 	    continue;
4692 	  else
4693 	    gcc_assert (multi_step_cvt == 0);
4694 
4695 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4696 					      vectype_in, &code1, &code2,
4697 					      &multi_step_cvt, &interm_types))
4698 	    {
4699 	      found_mode = true;
4700 	      break;
4701 	    }
4702 	}
4703 
4704       if (!found_mode)
4705 	goto unsupported;
4706 
4707       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4708 	codecvt2 = ERROR_MARK;
4709       else
4710 	{
4711 	  multi_step_cvt++;
4712 	  interm_types.safe_push (cvt_type);
4713 	  cvt_type = NULL_TREE;
4714 	}
4715       break;
4716 
4717     case NARROW:
4718       gcc_assert (op_type == unary_op);
4719       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4720 					   &code1, &multi_step_cvt,
4721 					   &interm_types))
4722 	break;
4723 
4724       if (code != FIX_TRUNC_EXPR
4725 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4726 	goto unsupported;
4727 
4728       cvt_type
4729 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4730       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4731       if (cvt_type == NULL_TREE)
4732 	goto unsupported;
4733       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4734 					  &decl1, &codecvt1))
4735 	goto unsupported;
4736       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4737 					   &code1, &multi_step_cvt,
4738 					   &interm_types))
4739 	break;
4740       goto unsupported;
4741 
4742     default:
4743       gcc_unreachable ();
4744     }
4745 
4746   if (!vec_stmt)		/* transformation not required.  */
4747     {
4748       if (dump_enabled_p ())
4749 	dump_printf_loc (MSG_NOTE, vect_location,
4750                          "=== vectorizable_conversion ===\n");
4751       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4752         {
4753 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4754 	  if (!slp_node)
4755 	    vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4756 	}
4757       else if (modifier == NARROW)
4758 	{
4759 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4760 	  if (!slp_node)
4761 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4762 	}
4763       else
4764 	{
4765 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4766 	  if (!slp_node)
4767 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4768 	}
4769       interm_types.release ();
4770       return true;
4771     }
4772 
4773   /* Transform.  */
4774   if (dump_enabled_p ())
4775     dump_printf_loc (MSG_NOTE, vect_location,
4776                      "transform conversion. ncopies = %d.\n", ncopies);
4777 
4778   if (op_type == binary_op)
4779     {
4780       if (CONSTANT_CLASS_P (op0))
4781 	op0 = fold_convert (TREE_TYPE (op1), op0);
4782       else if (CONSTANT_CLASS_P (op1))
4783 	op1 = fold_convert (TREE_TYPE (op0), op1);
4784     }
4785 
4786   /* In case of multi-step conversion, we first generate conversion operations
4787      to the intermediate types, and then from that types to the final one.
4788      We create vector destinations for the intermediate type (TYPES) received
4789      from supportable_*_operation, and store them in the correct order
4790      for future use in vect_create_vectorized_*_stmts ().  */
4791   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4792   vec_dest = vect_create_destination_var (scalar_dest,
4793 					  (cvt_type && modifier == WIDEN)
4794 					  ? cvt_type : vectype_out);
4795   vec_dsts.quick_push (vec_dest);
4796 
4797   if (multi_step_cvt)
4798     {
4799       for (i = interm_types.length () - 1;
4800 	   interm_types.iterate (i, &intermediate_type); i--)
4801 	{
4802 	  vec_dest = vect_create_destination_var (scalar_dest,
4803 						  intermediate_type);
4804 	  vec_dsts.quick_push (vec_dest);
4805 	}
4806     }
4807 
4808   if (cvt_type)
4809     vec_dest = vect_create_destination_var (scalar_dest,
4810 					    modifier == WIDEN
4811 					    ? vectype_out : cvt_type);
4812 
4813   if (!slp_node)
4814     {
4815       if (modifier == WIDEN)
4816 	{
4817 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4818 	  if (op_type == binary_op)
4819 	    vec_oprnds1.create (1);
4820 	}
4821       else if (modifier == NARROW)
4822 	vec_oprnds0.create (
4823 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4824     }
4825   else if (code == WIDEN_LSHIFT_EXPR)
4826     vec_oprnds1.create (slp_node->vec_stmts_size);
4827 
4828   last_oprnd = op0;
4829   prev_stmt_info = NULL;
4830   switch (modifier)
4831     {
4832     case NONE:
4833       for (j = 0; j < ncopies; j++)
4834 	{
4835 	  if (j == 0)
4836 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4837 	  else
4838 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4839 
4840 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4841 	    {
4842 	      /* Arguments are ready, create the new vector stmt.  */
4843 	      if (code1 == CALL_EXPR)
4844 		{
4845 		  new_stmt = gimple_build_call (decl1, 1, vop0);
4846 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4847 		  gimple_call_set_lhs (new_stmt, new_temp);
4848 		}
4849 	      else
4850 		{
4851 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4852 		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4853 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4854 		  gimple_assign_set_lhs (new_stmt, new_temp);
4855 		}
4856 
4857 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4858 	      if (slp_node)
4859 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4860 	      else
4861 		{
4862 		  if (!prev_stmt_info)
4863 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4864 		  else
4865 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4866 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4867 		}
4868 	    }
4869 	}
4870       break;
4871 
4872     case WIDEN:
4873       /* In case the vectorization factor (VF) is bigger than the number
4874 	 of elements that we can fit in a vectype (nunits), we have to
4875 	 generate more than one vector stmt - i.e - we need to "unroll"
4876 	 the vector stmt by a factor VF/nunits.  */
4877       for (j = 0; j < ncopies; j++)
4878 	{
4879 	  /* Handle uses.  */
4880 	  if (j == 0)
4881 	    {
4882 	      if (slp_node)
4883 		{
4884 		  if (code == WIDEN_LSHIFT_EXPR)
4885 		    {
4886 		      unsigned int k;
4887 
4888 		      vec_oprnd1 = op1;
4889 		      /* Store vec_oprnd1 for every vector stmt to be created
4890 			 for SLP_NODE.  We check during the analysis that all
4891 			 the shift arguments are the same.  */
4892 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4893 			vec_oprnds1.quick_push (vec_oprnd1);
4894 
4895 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4896 					 slp_node);
4897 		    }
4898 		  else
4899 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4900 				       &vec_oprnds1, slp_node);
4901 		}
4902 	      else
4903 		{
4904 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4905 		  vec_oprnds0.quick_push (vec_oprnd0);
4906 		  if (op_type == binary_op)
4907 		    {
4908 		      if (code == WIDEN_LSHIFT_EXPR)
4909 			vec_oprnd1 = op1;
4910 		      else
4911 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4912 		      vec_oprnds1.quick_push (vec_oprnd1);
4913 		    }
4914 		}
4915 	    }
4916 	  else
4917 	    {
4918 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4919 	      vec_oprnds0.truncate (0);
4920 	      vec_oprnds0.quick_push (vec_oprnd0);
4921 	      if (op_type == binary_op)
4922 		{
4923 		  if (code == WIDEN_LSHIFT_EXPR)
4924 		    vec_oprnd1 = op1;
4925 		  else
4926 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4927 								 vec_oprnd1);
4928 		  vec_oprnds1.truncate (0);
4929 		  vec_oprnds1.quick_push (vec_oprnd1);
4930 		}
4931 	    }
4932 
4933 	  /* Arguments are ready.  Create the new vector stmts.  */
4934 	  for (i = multi_step_cvt; i >= 0; i--)
4935 	    {
4936 	      tree this_dest = vec_dsts[i];
4937 	      enum tree_code c1 = code1, c2 = code2;
4938 	      if (i == 0 && codecvt2 != ERROR_MARK)
4939 		{
4940 		  c1 = codecvt1;
4941 		  c2 = codecvt2;
4942 		}
4943 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4944 						      &vec_oprnds1,
4945 						      stmt, this_dest, gsi,
4946 						      c1, c2, decl1, decl2,
4947 						      op_type);
4948 	    }
4949 
4950 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4951 	    {
4952 	      if (cvt_type)
4953 		{
4954 		  if (codecvt1 == CALL_EXPR)
4955 		    {
4956 		      new_stmt = gimple_build_call (decl1, 1, vop0);
4957 		      new_temp = make_ssa_name (vec_dest, new_stmt);
4958 		      gimple_call_set_lhs (new_stmt, new_temp);
4959 		    }
4960 		  else
4961 		    {
4962 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4963 		      new_temp = make_ssa_name (vec_dest);
4964 		      new_stmt = gimple_build_assign (new_temp, codecvt1,
4965 						      vop0);
4966 		    }
4967 
4968 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4969 		}
4970 	      else
4971 		new_stmt = SSA_NAME_DEF_STMT (vop0);
4972 
4973 	      if (slp_node)
4974 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4975 	      else
4976 		{
4977 		  if (!prev_stmt_info)
4978 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4979 		  else
4980 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4981 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4982 		}
4983 	    }
4984 	}
4985 
4986       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4987       break;
4988 
4989     case NARROW:
4990       /* In case the vectorization factor (VF) is bigger than the number
4991 	 of elements that we can fit in a vectype (nunits), we have to
4992 	 generate more than one vector stmt - i.e - we need to "unroll"
4993 	 the vector stmt by a factor VF/nunits.  */
4994       for (j = 0; j < ncopies; j++)
4995 	{
4996 	  /* Handle uses.  */
4997 	  if (slp_node)
4998 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4999 			       slp_node);
5000 	  else
5001 	    {
5002 	      vec_oprnds0.truncate (0);
5003 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5004 					vect_pow2 (multi_step_cvt) - 1);
5005 	    }
5006 
5007 	  /* Arguments are ready.  Create the new vector stmts.  */
5008 	  if (cvt_type)
5009 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5010 	      {
5011 		if (codecvt1 == CALL_EXPR)
5012 		  {
5013 		    new_stmt = gimple_build_call (decl1, 1, vop0);
5014 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5015 		    gimple_call_set_lhs (new_stmt, new_temp);
5016 		  }
5017 		else
5018 		  {
5019 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5020 		    new_temp = make_ssa_name (vec_dest);
5021 		    new_stmt = gimple_build_assign (new_temp, codecvt1,
5022 						    vop0);
5023 		  }
5024 
5025 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
5026 		vec_oprnds0[i] = new_temp;
5027 	      }
5028 
5029 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5030 						 stmt, vec_dsts, gsi,
5031 						 slp_node, code1,
5032 						 &prev_stmt_info);
5033 	}
5034 
5035       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5036       break;
5037     }
5038 
5039   vec_oprnds0.release ();
5040   vec_oprnds1.release ();
5041   interm_types.release ();
5042 
5043   return true;
5044 }
5045 
5046 
5047 /* Function vectorizable_assignment.
5048 
5049    Check if STMT performs an assignment (copy) that can be vectorized.
5050    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5051    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5052    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5053 
5054 static bool
vectorizable_assignment(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5055 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5056 			 gimple **vec_stmt, slp_tree slp_node)
5057 {
5058   tree vec_dest;
5059   tree scalar_dest;
5060   tree op;
5061   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5062   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5063   tree new_temp;
5064   gimple *def_stmt;
5065   enum vect_def_type dt[1] = {vect_unknown_def_type};
5066   int ndts = 1;
5067   int ncopies;
5068   int i, j;
5069   vec<tree> vec_oprnds = vNULL;
5070   tree vop;
5071   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5072   vec_info *vinfo = stmt_info->vinfo;
5073   gimple *new_stmt = NULL;
5074   stmt_vec_info prev_stmt_info = NULL;
5075   enum tree_code code;
5076   tree vectype_in;
5077 
5078   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5079     return false;
5080 
5081   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5082       && ! vec_stmt)
5083     return false;
5084 
5085   /* Is vectorizable assignment?  */
5086   if (!is_gimple_assign (stmt))
5087     return false;
5088 
5089   scalar_dest = gimple_assign_lhs (stmt);
5090   if (TREE_CODE (scalar_dest) != SSA_NAME)
5091     return false;
5092 
5093   code = gimple_assign_rhs_code (stmt);
5094   if (gimple_assign_single_p (stmt)
5095       || code == PAREN_EXPR
5096       || CONVERT_EXPR_CODE_P (code))
5097     op = gimple_assign_rhs1 (stmt);
5098   else
5099     return false;
5100 
5101   if (code == VIEW_CONVERT_EXPR)
5102     op = TREE_OPERAND (op, 0);
5103 
5104   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5105   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5106 
5107   /* Multiple types in SLP are handled by creating the appropriate number of
5108      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5109      case of SLP.  */
5110   if (slp_node)
5111     ncopies = 1;
5112   else
5113     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5114 
5115   gcc_assert (ncopies >= 1);
5116 
5117   if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5118     {
5119       if (dump_enabled_p ())
5120         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121                          "use not simple.\n");
5122       return false;
5123     }
5124 
5125   /* We can handle NOP_EXPR conversions that do not change the number
5126      of elements or the vector size.  */
5127   if ((CONVERT_EXPR_CODE_P (code)
5128        || code == VIEW_CONVERT_EXPR)
5129       && (!vectype_in
5130 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5131 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5132 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5133     return false;
5134 
5135   /* We do not handle bit-precision changes.  */
5136   if ((CONVERT_EXPR_CODE_P (code)
5137        || code == VIEW_CONVERT_EXPR)
5138       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5139       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5140 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5141       /* But a conversion that does not change the bit-pattern is ok.  */
5142       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5143 	    > TYPE_PRECISION (TREE_TYPE (op)))
5144 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5145       /* Conversion between boolean types of different sizes is
5146 	 a simple assignment in case their vectypes are same
5147 	 boolean vectors.  */
5148       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5149 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5150     {
5151       if (dump_enabled_p ())
5152         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5153                          "type conversion to/from bit-precision "
5154                          "unsupported.\n");
5155       return false;
5156     }
5157 
5158   if (!vec_stmt) /* transformation not required.  */
5159     {
5160       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5161       if (dump_enabled_p ())
5162         dump_printf_loc (MSG_NOTE, vect_location,
5163                          "=== vectorizable_assignment ===\n");
5164       if (!slp_node)
5165 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5166       return true;
5167     }
5168 
5169   /* Transform.  */
5170   if (dump_enabled_p ())
5171     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5172 
5173   /* Handle def.  */
5174   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5175 
5176   /* Handle use.  */
5177   for (j = 0; j < ncopies; j++)
5178     {
5179       /* Handle uses.  */
5180       if (j == 0)
5181         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5182       else
5183         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5184 
5185       /* Arguments are ready. create the new vector stmt.  */
5186       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5187        {
5188 	 if (CONVERT_EXPR_CODE_P (code)
5189 	     || code == VIEW_CONVERT_EXPR)
5190 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5191          new_stmt = gimple_build_assign (vec_dest, vop);
5192          new_temp = make_ssa_name (vec_dest, new_stmt);
5193          gimple_assign_set_lhs (new_stmt, new_temp);
5194          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5195          if (slp_node)
5196            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5197        }
5198 
5199       if (slp_node)
5200         continue;
5201 
5202       if (j == 0)
5203         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5204       else
5205         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5206 
5207       prev_stmt_info = vinfo_for_stmt (new_stmt);
5208     }
5209 
5210   vec_oprnds.release ();
5211   return true;
5212 }
5213 
5214 
5215 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5216    either as shift by a scalar or by a vector.  */
5217 
5218 bool
vect_supportable_shift(enum tree_code code,tree scalar_type)5219 vect_supportable_shift (enum tree_code code, tree scalar_type)
5220 {
5221 
5222   machine_mode vec_mode;
5223   optab optab;
5224   int icode;
5225   tree vectype;
5226 
5227   vectype = get_vectype_for_scalar_type (scalar_type);
5228   if (!vectype)
5229     return false;
5230 
5231   optab = optab_for_tree_code (code, vectype, optab_scalar);
5232   if (!optab
5233       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5234     {
5235       optab = optab_for_tree_code (code, vectype, optab_vector);
5236       if (!optab
5237           || (optab_handler (optab, TYPE_MODE (vectype))
5238                       == CODE_FOR_nothing))
5239         return false;
5240     }
5241 
5242   vec_mode = TYPE_MODE (vectype);
5243   icode = (int) optab_handler (optab, vec_mode);
5244   if (icode == CODE_FOR_nothing)
5245     return false;
5246 
5247   return true;
5248 }
5249 
5250 
5251 /* Function vectorizable_shift.
5252 
5253    Check if STMT performs a shift operation that can be vectorized.
5254    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5255    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5256    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5257 
5258 static bool
vectorizable_shift(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5259 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5260                     gimple **vec_stmt, slp_tree slp_node)
5261 {
5262   tree vec_dest;
5263   tree scalar_dest;
5264   tree op0, op1 = NULL;
5265   tree vec_oprnd1 = NULL_TREE;
5266   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5267   tree vectype;
5268   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5269   enum tree_code code;
5270   machine_mode vec_mode;
5271   tree new_temp;
5272   optab optab;
5273   int icode;
5274   machine_mode optab_op2_mode;
5275   gimple *def_stmt;
5276   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5277   int ndts = 2;
5278   gimple *new_stmt = NULL;
5279   stmt_vec_info prev_stmt_info;
5280   poly_uint64 nunits_in;
5281   poly_uint64 nunits_out;
5282   tree vectype_out;
5283   tree op1_vectype;
5284   int ncopies;
5285   int j, i;
5286   vec<tree> vec_oprnds0 = vNULL;
5287   vec<tree> vec_oprnds1 = vNULL;
5288   tree vop0, vop1;
5289   unsigned int k;
5290   bool scalar_shift_arg = true;
5291   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5292   vec_info *vinfo = stmt_info->vinfo;
5293 
5294   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5295     return false;
5296 
5297   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5298       && ! vec_stmt)
5299     return false;
5300 
5301   /* Is STMT a vectorizable binary/unary operation?   */
5302   if (!is_gimple_assign (stmt))
5303     return false;
5304 
5305   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5306     return false;
5307 
5308   code = gimple_assign_rhs_code (stmt);
5309 
5310   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5311       || code == RROTATE_EXPR))
5312     return false;
5313 
5314   scalar_dest = gimple_assign_lhs (stmt);
5315   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5316   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5317     {
5318       if (dump_enabled_p ())
5319         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5320                          "bit-precision shifts not supported.\n");
5321       return false;
5322     }
5323 
5324   op0 = gimple_assign_rhs1 (stmt);
5325   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5326     {
5327       if (dump_enabled_p ())
5328         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5329                          "use not simple.\n");
5330       return false;
5331     }
5332   /* If op0 is an external or constant def use a vector type with
5333      the same size as the output vector type.  */
5334   if (!vectype)
5335     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5336   if (vec_stmt)
5337     gcc_assert (vectype);
5338   if (!vectype)
5339     {
5340       if (dump_enabled_p ())
5341         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5342                          "no vectype for scalar type\n");
5343       return false;
5344     }
5345 
5346   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5347   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5348   if (maybe_ne (nunits_out, nunits_in))
5349     return false;
5350 
5351   op1 = gimple_assign_rhs2 (stmt);
5352   if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5353     {
5354       if (dump_enabled_p ())
5355         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356                          "use not simple.\n");
5357       return false;
5358     }
5359 
5360   /* Multiple types in SLP are handled by creating the appropriate number of
5361      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5362      case of SLP.  */
5363   if (slp_node)
5364     ncopies = 1;
5365   else
5366     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5367 
5368   gcc_assert (ncopies >= 1);
5369 
5370   /* Determine whether the shift amount is a vector, or scalar.  If the
5371      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5372 
5373   if ((dt[1] == vect_internal_def
5374        || dt[1] == vect_induction_def)
5375       && !slp_node)
5376     scalar_shift_arg = false;
5377   else if (dt[1] == vect_constant_def
5378 	   || dt[1] == vect_external_def
5379 	   || dt[1] == vect_internal_def)
5380     {
5381       /* In SLP, need to check whether the shift count is the same,
5382 	 in loops if it is a constant or invariant, it is always
5383 	 a scalar shift.  */
5384       if (slp_node)
5385 	{
5386 	  vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5387 	  gimple *slpstmt;
5388 
5389 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5390 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5391 	      scalar_shift_arg = false;
5392 
5393 	  /* For internal SLP defs we have to make sure we see scalar stmts
5394 	     for all vector elements.
5395 	     ???  For different vectors we could resort to a different
5396 	     scalar shift operand but code-generation below simply always
5397 	     takes the first.  */
5398 	  if (dt[1] == vect_internal_def
5399 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),                           stmts.length ()))
5400 	    scalar_shift_arg = false;
5401 	}
5402 
5403       /* If the shift amount is computed by a pattern stmt we cannot
5404          use the scalar amount directly thus give up and use a vector
5405 	 shift.  */
5406       if (dt[1] == vect_internal_def)
5407 	{
5408 	  gimple *def = SSA_NAME_DEF_STMT (op1);
5409 	  if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5410 	    scalar_shift_arg = false;
5411 	}
5412     }
5413   else
5414     {
5415       if (dump_enabled_p ())
5416         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417                          "operand mode requires invariant argument.\n");
5418       return false;
5419     }
5420 
5421   /* Vector shifted by vector.  */
5422   if (!scalar_shift_arg)
5423     {
5424       optab = optab_for_tree_code (code, vectype, optab_vector);
5425       if (dump_enabled_p ())
5426         dump_printf_loc (MSG_NOTE, vect_location,
5427                          "vector/vector shift/rotate found.\n");
5428 
5429       if (!op1_vectype)
5430 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5431       if (op1_vectype == NULL_TREE
5432 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5433 	{
5434 	  if (dump_enabled_p ())
5435 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436                              "unusable type for last operand in"
5437                              " vector/vector shift/rotate.\n");
5438 	  return false;
5439 	}
5440     }
5441   /* See if the machine has a vector shifted by scalar insn and if not
5442      then see if it has a vector shifted by vector insn.  */
5443   else
5444     {
5445       optab = optab_for_tree_code (code, vectype, optab_scalar);
5446       if (optab
5447           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5448         {
5449           if (dump_enabled_p ())
5450             dump_printf_loc (MSG_NOTE, vect_location,
5451                              "vector/scalar shift/rotate found.\n");
5452         }
5453       else
5454         {
5455           optab = optab_for_tree_code (code, vectype, optab_vector);
5456           if (optab
5457                && (optab_handler (optab, TYPE_MODE (vectype))
5458                       != CODE_FOR_nothing))
5459             {
5460 	      scalar_shift_arg = false;
5461 
5462               if (dump_enabled_p ())
5463                 dump_printf_loc (MSG_NOTE, vect_location,
5464                                  "vector/vector shift/rotate found.\n");
5465 
5466               /* Unlike the other binary operators, shifts/rotates have
5467                  the rhs being int, instead of the same type as the lhs,
5468                  so make sure the scalar is the right type if we are
5469 		 dealing with vectors of long long/long/short/char.  */
5470               if (dt[1] == vect_constant_def)
5471                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5472 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5473 						   TREE_TYPE (op1)))
5474 		{
5475 		  if (slp_node
5476 		      && TYPE_MODE (TREE_TYPE (vectype))
5477 			 != TYPE_MODE (TREE_TYPE (op1)))
5478 		    {
5479                       if (dump_enabled_p ())
5480                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5481                                          "unusable type for last operand in"
5482                                          " vector/vector shift/rotate.\n");
5483 		      return false;
5484 		    }
5485 		  if (vec_stmt && !slp_node)
5486 		    {
5487 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
5488 		      op1 = vect_init_vector (stmt, op1,
5489 					      TREE_TYPE (vectype), NULL);
5490 		    }
5491 		}
5492             }
5493         }
5494     }
5495 
5496   /* Supportable by target?  */
5497   if (!optab)
5498     {
5499       if (dump_enabled_p ())
5500         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5501                          "no optab.\n");
5502       return false;
5503     }
5504   vec_mode = TYPE_MODE (vectype);
5505   icode = (int) optab_handler (optab, vec_mode);
5506   if (icode == CODE_FOR_nothing)
5507     {
5508       if (dump_enabled_p ())
5509         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5510                          "op not supported by target.\n");
5511       /* Check only during analysis.  */
5512       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5513 	  || (!vec_stmt
5514 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5515         return false;
5516       if (dump_enabled_p ())
5517         dump_printf_loc (MSG_NOTE, vect_location,
5518                          "proceeding using word mode.\n");
5519     }
5520 
5521   /* Worthwhile without SIMD support?  Check only during analysis.  */
5522   if (!vec_stmt
5523       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5524       && !vect_worthwhile_without_simd_p (vinfo, code))
5525     {
5526       if (dump_enabled_p ())
5527         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5528                          "not worthwhile without SIMD support.\n");
5529       return false;
5530     }
5531 
5532   if (!vec_stmt) /* transformation not required.  */
5533     {
5534       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5535       if (dump_enabled_p ())
5536         dump_printf_loc (MSG_NOTE, vect_location,
5537                          "=== vectorizable_shift ===\n");
5538       if (!slp_node)
5539 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5540       return true;
5541     }
5542 
5543   /* Transform.  */
5544 
5545   if (dump_enabled_p ())
5546     dump_printf_loc (MSG_NOTE, vect_location,
5547                      "transform binary/unary operation.\n");
5548 
5549   /* Handle def.  */
5550   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5551 
5552   prev_stmt_info = NULL;
5553   for (j = 0; j < ncopies; j++)
5554     {
5555       /* Handle uses.  */
5556       if (j == 0)
5557         {
5558           if (scalar_shift_arg)
5559             {
5560               /* Vector shl and shr insn patterns can be defined with scalar
5561                  operand 2 (shift operand).  In this case, use constant or loop
5562                  invariant op1 directly, without extending it to vector mode
5563                  first.  */
5564               optab_op2_mode = insn_data[icode].operand[2].mode;
5565               if (!VECTOR_MODE_P (optab_op2_mode))
5566                 {
5567                   if (dump_enabled_p ())
5568                     dump_printf_loc (MSG_NOTE, vect_location,
5569                                      "operand 1 using scalar mode.\n");
5570                   vec_oprnd1 = op1;
5571                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5572                   vec_oprnds1.quick_push (vec_oprnd1);
5573                   if (slp_node)
5574                     {
5575                       /* Store vec_oprnd1 for every vector stmt to be created
5576                          for SLP_NODE.  We check during the analysis that all
5577                          the shift arguments are the same.
5578                          TODO: Allow different constants for different vector
5579                          stmts generated for an SLP instance.  */
5580                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5581                         vec_oprnds1.quick_push (vec_oprnd1);
5582                     }
5583                 }
5584             }
5585 
5586           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5587              (a special case for certain kind of vector shifts); otherwise,
5588              operand 1 should be of a vector type (the usual case).  */
5589           if (vec_oprnd1)
5590             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5591                                slp_node);
5592           else
5593             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5594                                slp_node);
5595         }
5596       else
5597         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5598 
5599       /* Arguments are ready.  Create the new vector stmt.  */
5600       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5601         {
5602           vop1 = vec_oprnds1[i];
5603 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5604           new_temp = make_ssa_name (vec_dest, new_stmt);
5605           gimple_assign_set_lhs (new_stmt, new_temp);
5606           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5607           if (slp_node)
5608             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5609         }
5610 
5611       if (slp_node)
5612         continue;
5613 
5614       if (j == 0)
5615         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5616       else
5617         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5618       prev_stmt_info = vinfo_for_stmt (new_stmt);
5619     }
5620 
5621   vec_oprnds0.release ();
5622   vec_oprnds1.release ();
5623 
5624   return true;
5625 }
5626 
5627 
5628 /* Function vectorizable_operation.
5629 
5630    Check if STMT performs a binary, unary or ternary operation that can
5631    be vectorized.
5632    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5633    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5634    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5635 
5636 static bool
vectorizable_operation(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5637 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5638 			gimple **vec_stmt, slp_tree slp_node)
5639 {
5640   tree vec_dest;
5641   tree scalar_dest;
5642   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5643   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5644   tree vectype;
5645   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5646   enum tree_code code, orig_code;
5647   machine_mode vec_mode;
5648   tree new_temp;
5649   int op_type;
5650   optab optab;
5651   bool target_support_p;
5652   gimple *def_stmt;
5653   enum vect_def_type dt[3]
5654     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5655   int ndts = 3;
5656   gimple *new_stmt = NULL;
5657   stmt_vec_info prev_stmt_info;
5658   poly_uint64 nunits_in;
5659   poly_uint64 nunits_out;
5660   tree vectype_out;
5661   int ncopies;
5662   int j, i;
5663   vec<tree> vec_oprnds0 = vNULL;
5664   vec<tree> vec_oprnds1 = vNULL;
5665   vec<tree> vec_oprnds2 = vNULL;
5666   tree vop0, vop1, vop2;
5667   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5668   vec_info *vinfo = stmt_info->vinfo;
5669 
5670   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5671     return false;
5672 
5673   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5674       && ! vec_stmt)
5675     return false;
5676 
5677   /* Is STMT a vectorizable binary/unary operation?   */
5678   if (!is_gimple_assign (stmt))
5679     return false;
5680 
5681   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5682     return false;
5683 
5684   orig_code = code = gimple_assign_rhs_code (stmt);
5685 
5686   /* For pointer addition and subtraction, we should use the normal
5687      plus and minus for the vector operation.  */
5688   if (code == POINTER_PLUS_EXPR)
5689     code = PLUS_EXPR;
5690   if (code == POINTER_DIFF_EXPR)
5691     code = MINUS_EXPR;
5692 
5693   /* Support only unary or binary operations.  */
5694   op_type = TREE_CODE_LENGTH (code);
5695   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5696     {
5697       if (dump_enabled_p ())
5698         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699                          "num. args = %d (not unary/binary/ternary op).\n",
5700                          op_type);
5701       return false;
5702     }
5703 
5704   scalar_dest = gimple_assign_lhs (stmt);
5705   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5706 
5707   /* Most operations cannot handle bit-precision types without extra
5708      truncations.  */
5709   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5710       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5711       /* Exception are bitwise binary operations.  */
5712       && code != BIT_IOR_EXPR
5713       && code != BIT_XOR_EXPR
5714       && code != BIT_AND_EXPR)
5715     {
5716       if (dump_enabled_p ())
5717         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718                          "bit-precision arithmetic not supported.\n");
5719       return false;
5720     }
5721 
5722   op0 = gimple_assign_rhs1 (stmt);
5723   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5724     {
5725       if (dump_enabled_p ())
5726         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5727                          "use not simple.\n");
5728       return false;
5729     }
5730   /* If op0 is an external or constant def use a vector type with
5731      the same size as the output vector type.  */
5732   if (!vectype)
5733     {
5734       /* For boolean type we cannot determine vectype by
5735 	 invariant value (don't know whether it is a vector
5736 	 of booleans or vector of integers).  We use output
5737 	 vectype because operations on boolean don't change
5738 	 type.  */
5739       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5740 	{
5741 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5742 	    {
5743 	      if (dump_enabled_p ())
5744 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5745 				 "not supported operation on bool value.\n");
5746 	      return false;
5747 	    }
5748 	  vectype = vectype_out;
5749 	}
5750       else
5751 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5752     }
5753   if (vec_stmt)
5754     gcc_assert (vectype);
5755   if (!vectype)
5756     {
5757       if (dump_enabled_p ())
5758         {
5759           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5760                            "no vectype for scalar type ");
5761           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5762                              TREE_TYPE (op0));
5763           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5764         }
5765 
5766       return false;
5767     }
5768 
5769   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5770   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5771   if (maybe_ne (nunits_out, nunits_in))
5772     return false;
5773 
5774   if (op_type == binary_op || op_type == ternary_op)
5775     {
5776       op1 = gimple_assign_rhs2 (stmt);
5777       if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5778 	{
5779 	  if (dump_enabled_p ())
5780 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781                              "use not simple.\n");
5782 	  return false;
5783 	}
5784     }
5785   if (op_type == ternary_op)
5786     {
5787       op2 = gimple_assign_rhs3 (stmt);
5788       if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5789 	{
5790 	  if (dump_enabled_p ())
5791 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5792                              "use not simple.\n");
5793 	  return false;
5794 	}
5795     }
5796 
5797   /* Multiple types in SLP are handled by creating the appropriate number of
5798      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5799      case of SLP.  */
5800   if (slp_node)
5801     ncopies = 1;
5802   else
5803     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5804 
5805   gcc_assert (ncopies >= 1);
5806 
5807   /* Shifts are handled in vectorizable_shift ().  */
5808   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5809       || code == RROTATE_EXPR)
5810    return false;
5811 
5812   /* Supportable by target?  */
5813 
5814   vec_mode = TYPE_MODE (vectype);
5815   if (code == MULT_HIGHPART_EXPR)
5816     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5817   else
5818     {
5819       optab = optab_for_tree_code (code, vectype, optab_default);
5820       if (!optab)
5821 	{
5822           if (dump_enabled_p ())
5823             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824                              "no optab.\n");
5825 	  return false;
5826 	}
5827       target_support_p = (optab_handler (optab, vec_mode)
5828 			  != CODE_FOR_nothing);
5829     }
5830 
5831   if (!target_support_p)
5832     {
5833       if (dump_enabled_p ())
5834 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5835                          "op not supported by target.\n");
5836       /* Check only during analysis.  */
5837       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5838 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5839         return false;
5840       if (dump_enabled_p ())
5841 	dump_printf_loc (MSG_NOTE, vect_location,
5842                          "proceeding using word mode.\n");
5843     }
5844 
5845   /* Worthwhile without SIMD support?  Check only during analysis.  */
5846   if (!VECTOR_MODE_P (vec_mode)
5847       && !vec_stmt
5848       && !vect_worthwhile_without_simd_p (vinfo, code))
5849     {
5850       if (dump_enabled_p ())
5851         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5852                          "not worthwhile without SIMD support.\n");
5853       return false;
5854     }
5855 
5856   if (!vec_stmt) /* transformation not required.  */
5857     {
5858       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5859       if (dump_enabled_p ())
5860         dump_printf_loc (MSG_NOTE, vect_location,
5861                          "=== vectorizable_operation ===\n");
5862       if (!slp_node)
5863 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5864       return true;
5865     }
5866 
5867   /* Transform.  */
5868 
5869   if (dump_enabled_p ())
5870     dump_printf_loc (MSG_NOTE, vect_location,
5871                      "transform binary/unary operation.\n");
5872 
5873   /* Handle def.  */
5874   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5875 
5876   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5877      vectors with unsigned elements, but the result is signed.  So, we
5878      need to compute the MINUS_EXPR into vectype temporary and
5879      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
5880   tree vec_cvt_dest = NULL_TREE;
5881   if (orig_code == POINTER_DIFF_EXPR)
5882     vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5883 
5884   /* In case the vectorization factor (VF) is bigger than the number
5885      of elements that we can fit in a vectype (nunits), we have to generate
5886      more than one vector stmt - i.e - we need to "unroll" the
5887      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
5888      from one copy of the vector stmt to the next, in the field
5889      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
5890      stages to find the correct vector defs to be used when vectorizing
5891      stmts that use the defs of the current stmt.  The example below
5892      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5893      we need to create 4 vectorized stmts):
5894 
5895      before vectorization:
5896                                 RELATED_STMT    VEC_STMT
5897         S1:     x = memref      -               -
5898         S2:     z = x + 1       -               -
5899 
5900      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5901              there):
5902                                 RELATED_STMT    VEC_STMT
5903         VS1_0:  vx0 = memref0   VS1_1           -
5904         VS1_1:  vx1 = memref1   VS1_2           -
5905         VS1_2:  vx2 = memref2   VS1_3           -
5906         VS1_3:  vx3 = memref3   -               -
5907         S1:     x = load        -               VS1_0
5908         S2:     z = x + 1       -               -
5909 
5910      step2: vectorize stmt S2 (done here):
5911         To vectorize stmt S2 we first need to find the relevant vector
5912         def for the first operand 'x'.  This is, as usual, obtained from
5913         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5914         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
5915         relevant vector def 'vx0'.  Having found 'vx0' we can generate
5916         the vector stmt VS2_0, and as usual, record it in the
5917         STMT_VINFO_VEC_STMT of stmt S2.
5918         When creating the second copy (VS2_1), we obtain the relevant vector
5919         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5920         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
5921         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
5922         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5923         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
5924         chain of stmts and pointers:
5925                                 RELATED_STMT    VEC_STMT
5926         VS1_0:  vx0 = memref0   VS1_1           -
5927         VS1_1:  vx1 = memref1   VS1_2           -
5928         VS1_2:  vx2 = memref2   VS1_3           -
5929         VS1_3:  vx3 = memref3   -               -
5930         S1:     x = load        -               VS1_0
5931         VS2_0:  vz0 = vx0 + v1  VS2_1           -
5932         VS2_1:  vz1 = vx1 + v1  VS2_2           -
5933         VS2_2:  vz2 = vx2 + v1  VS2_3           -
5934         VS2_3:  vz3 = vx3 + v1  -               -
5935         S2:     z = x + 1       -               VS2_0  */
5936 
5937   prev_stmt_info = NULL;
5938   for (j = 0; j < ncopies; j++)
5939     {
5940       /* Handle uses.  */
5941       if (j == 0)
5942 	{
5943 	  if (op_type == binary_op)
5944 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5945 			       slp_node);
5946 	  else if (op_type == ternary_op)
5947 	    {
5948 	      if (slp_node)
5949 		{
5950 		  auto_vec<tree> ops(3);
5951 		  ops.quick_push (op0);
5952 		  ops.quick_push (op1);
5953 		  ops.quick_push (op2);
5954 		  auto_vec<vec<tree> > vec_defs(3);
5955 		  vect_get_slp_defs (ops, slp_node, &vec_defs);
5956 		  vec_oprnds0 = vec_defs[0];
5957 		  vec_oprnds1 = vec_defs[1];
5958 		  vec_oprnds2 = vec_defs[2];
5959 		}
5960 	      else
5961 		{
5962 		  vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5963 				     NULL);
5964 		  vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5965 				     NULL);
5966 		}
5967 	    }
5968 	  else
5969 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5970 			       slp_node);
5971 	}
5972       else
5973 	{
5974 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5975 	  if (op_type == ternary_op)
5976 	    {
5977 	      tree vec_oprnd = vec_oprnds2.pop ();
5978 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5979 							           vec_oprnd));
5980 	    }
5981 	}
5982 
5983       /* Arguments are ready.  Create the new vector stmt.  */
5984       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5985         {
5986 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
5987 		  ? vec_oprnds1[i] : NULL_TREE);
5988 	  vop2 = ((op_type == ternary_op)
5989 		  ? vec_oprnds2[i] : NULL_TREE);
5990 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5991 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5992 	  gimple_assign_set_lhs (new_stmt, new_temp);
5993 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5994 	  if (vec_cvt_dest)
5995 	    {
5996 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5997 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5998 					      new_temp);
5999 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6000 	      gimple_assign_set_lhs (new_stmt, new_temp);
6001 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6002 	    }
6003           if (slp_node)
6004 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6005         }
6006 
6007       if (slp_node)
6008         continue;
6009 
6010       if (j == 0)
6011 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6012       else
6013 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6014       prev_stmt_info = vinfo_for_stmt (new_stmt);
6015     }
6016 
6017   vec_oprnds0.release ();
6018   vec_oprnds1.release ();
6019   vec_oprnds2.release ();
6020 
6021   return true;
6022 }
6023 
6024 /* A helper function to ensure data reference DR's base alignment.  */
6025 
6026 static void
ensure_base_align(struct data_reference * dr)6027 ensure_base_align (struct data_reference *dr)
6028 {
6029   if (!dr->aux)
6030     return;
6031 
6032   if (DR_VECT_AUX (dr)->base_misaligned)
6033     {
6034       tree base_decl = DR_VECT_AUX (dr)->base_decl;
6035 
6036       unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6037 
6038       if (decl_in_symtab_p (base_decl))
6039 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6040       else
6041 	{
6042 	  SET_DECL_ALIGN (base_decl, align_base_to);
6043           DECL_USER_ALIGN (base_decl) = 1;
6044 	}
6045       DR_VECT_AUX (dr)->base_misaligned = false;
6046     }
6047 }
6048 
6049 
6050 /* Function get_group_alias_ptr_type.
6051 
6052    Return the alias type for the group starting at FIRST_STMT.  */
6053 
6054 static tree
get_group_alias_ptr_type(gimple * first_stmt)6055 get_group_alias_ptr_type (gimple *first_stmt)
6056 {
6057   struct data_reference *first_dr, *next_dr;
6058   gimple *next_stmt;
6059 
6060   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6061   next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6062   while (next_stmt)
6063     {
6064       next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6065       if (get_alias_set (DR_REF (first_dr))
6066 	  != get_alias_set (DR_REF (next_dr)))
6067 	{
6068 	  if (dump_enabled_p ())
6069 	    dump_printf_loc (MSG_NOTE, vect_location,
6070 			     "conflicting alias set types.\n");
6071 	  return ptr_type_node;
6072 	}
6073       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6074     }
6075   return reference_alias_ptr_type (DR_REF (first_dr));
6076 }
6077 
6078 
6079 /* Function vectorizable_store.
6080 
6081    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6082    can be vectorized.
6083    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6084    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6085    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6086 
6087 static bool
vectorizable_store(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)6088 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6089                     slp_tree slp_node)
6090 {
6091   tree data_ref;
6092   tree op;
6093   tree vec_oprnd = NULL_TREE;
6094   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6095   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6096   tree elem_type;
6097   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6098   struct loop *loop = NULL;
6099   machine_mode vec_mode;
6100   tree dummy;
6101   enum dr_alignment_support alignment_support_scheme;
6102   gimple *def_stmt;
6103   enum vect_def_type rhs_dt = vect_unknown_def_type;
6104   enum vect_def_type mask_dt = vect_unknown_def_type;
6105   stmt_vec_info prev_stmt_info = NULL;
6106   tree dataref_ptr = NULL_TREE;
6107   tree dataref_offset = NULL_TREE;
6108   gimple *ptr_incr = NULL;
6109   int ncopies;
6110   int j;
6111   gimple *next_stmt, *first_stmt;
6112   bool grouped_store;
6113   unsigned int group_size, i;
6114   vec<tree> oprnds = vNULL;
6115   vec<tree> result_chain = vNULL;
6116   bool inv_p;
6117   tree offset = NULL_TREE;
6118   vec<tree> vec_oprnds = vNULL;
6119   bool slp = (slp_node != NULL);
6120   unsigned int vec_num;
6121   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6122   vec_info *vinfo = stmt_info->vinfo;
6123   tree aggr_type;
6124   gather_scatter_info gs_info;
6125   gimple *new_stmt;
6126   poly_uint64 vf;
6127   vec_load_store_type vls_type;
6128   tree ref_type;
6129 
6130   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6131     return false;
6132 
6133   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6134       && ! vec_stmt)
6135     return false;
6136 
6137   /* Is vectorizable store? */
6138 
6139   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6140   if (is_gimple_assign (stmt))
6141     {
6142       tree scalar_dest = gimple_assign_lhs (stmt);
6143       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6144 	  && is_pattern_stmt_p (stmt_info))
6145 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
6146       if (TREE_CODE (scalar_dest) != ARRAY_REF
6147 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6148 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
6149 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
6150 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6151 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
6152 	  && TREE_CODE (scalar_dest) != MEM_REF)
6153 	return false;
6154     }
6155   else
6156     {
6157       gcall *call = dyn_cast <gcall *> (stmt);
6158       if (!call || !gimple_call_internal_p (call))
6159 	return false;
6160 
6161       internal_fn ifn = gimple_call_internal_fn (call);
6162       if (!internal_store_fn_p (ifn))
6163 	return false;
6164 
6165       if (slp_node != NULL)
6166 	{
6167 	  if (dump_enabled_p ())
6168 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 			     "SLP of masked stores not supported.\n");
6170 	  return false;
6171 	}
6172 
6173       int mask_index = internal_fn_mask_index (ifn);
6174       if (mask_index >= 0)
6175 	{
6176 	  mask = gimple_call_arg (call, mask_index);
6177 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6178 					   &mask_vectype))
6179 	    return false;
6180 	}
6181     }
6182 
6183   op = vect_get_store_rhs (stmt);
6184 
6185   /* Cannot have hybrid store SLP -- that would mean storing to the
6186      same location twice.  */
6187   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6188 
6189   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6190   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6191 
6192   if (loop_vinfo)
6193     {
6194       loop = LOOP_VINFO_LOOP (loop_vinfo);
6195       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6196     }
6197   else
6198     vf = 1;
6199 
6200   /* Multiple types in SLP are handled by creating the appropriate number of
6201      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6202      case of SLP.  */
6203   if (slp)
6204     ncopies = 1;
6205   else
6206     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6207 
6208   gcc_assert (ncopies >= 1);
6209 
6210   /* FORNOW.  This restriction should be relaxed.  */
6211   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6212     {
6213       if (dump_enabled_p ())
6214 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6215 			 "multiple types in nested loop.\n");
6216       return false;
6217     }
6218 
6219   if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6220     return false;
6221 
6222   elem_type = TREE_TYPE (vectype);
6223   vec_mode = TYPE_MODE (vectype);
6224 
6225   if (!STMT_VINFO_DATA_REF (stmt_info))
6226     return false;
6227 
6228   vect_memory_access_type memory_access_type;
6229   if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6230 			    &memory_access_type, &gs_info))
6231     return false;
6232 
6233   if (mask)
6234     {
6235       if (memory_access_type == VMAT_CONTIGUOUS)
6236 	{
6237 	  if (!VECTOR_MODE_P (vec_mode)
6238 	      || !can_vec_mask_load_store_p (vec_mode,
6239 					     TYPE_MODE (mask_vectype), false))
6240 	    return false;
6241 	}
6242       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6243 	       && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6244 	{
6245 	  if (dump_enabled_p ())
6246 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6247 			     "unsupported access type for masked store.\n");
6248 	  return false;
6249 	}
6250     }
6251   else
6252     {
6253       /* FORNOW. In some cases can vectorize even if data-type not supported
6254 	 (e.g. - array initialization with 0).  */
6255       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6256 	return false;
6257     }
6258 
6259   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6260 		   && memory_access_type != VMAT_GATHER_SCATTER
6261 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
6262   if (grouped_store)
6263     {
6264       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6265       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6266       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6267     }
6268   else
6269     {
6270       first_stmt = stmt;
6271       first_dr = dr;
6272       group_size = vec_num = 1;
6273     }
6274 
6275   if (!vec_stmt) /* transformation not required.  */
6276     {
6277       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6278 
6279       if (loop_vinfo
6280 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6281 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6282 				  memory_access_type, &gs_info);
6283 
6284       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6285       /* The SLP costs are calculated during SLP analysis.  */
6286       if (!slp_node)
6287 	vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6288 			       vls_type, NULL, NULL, NULL);
6289       return true;
6290     }
6291   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6292 
6293   /* Transform.  */
6294 
6295   ensure_base_align (dr);
6296 
6297   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6298     {
6299       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6300       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6301       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6302       tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6303       edge pe = loop_preheader_edge (loop);
6304       gimple_seq seq;
6305       basic_block new_bb;
6306       enum { NARROW, NONE, WIDEN } modifier;
6307       poly_uint64 scatter_off_nunits
6308 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6309 
6310       if (known_eq (nunits, scatter_off_nunits))
6311 	modifier = NONE;
6312       else if (known_eq (nunits * 2, scatter_off_nunits))
6313 	{
6314 	  modifier = WIDEN;
6315 
6316 	  /* Currently gathers and scatters are only supported for
6317 	     fixed-length vectors.  */
6318 	  unsigned int count = scatter_off_nunits.to_constant ();
6319 	  vec_perm_builder sel (count, count, 1);
6320 	  for (i = 0; i < (unsigned int) count; ++i)
6321 	    sel.quick_push (i | (count / 2));
6322 
6323 	  vec_perm_indices indices (sel, 1, count);
6324 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6325 						  indices);
6326 	  gcc_assert (perm_mask != NULL_TREE);
6327 	}
6328       else if (known_eq (nunits, scatter_off_nunits * 2))
6329 	{
6330 	  modifier = NARROW;
6331 
6332 	  /* Currently gathers and scatters are only supported for
6333 	     fixed-length vectors.  */
6334 	  unsigned int count = nunits.to_constant ();
6335 	  vec_perm_builder sel (count, count, 1);
6336 	  for (i = 0; i < (unsigned int) count; ++i)
6337 	    sel.quick_push (i | (count / 2));
6338 
6339 	  vec_perm_indices indices (sel, 2, count);
6340 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6341 	  gcc_assert (perm_mask != NULL_TREE);
6342 	  ncopies *= 2;
6343 	}
6344       else
6345 	gcc_unreachable ();
6346 
6347       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6348       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6349       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6350       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6351       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6352       scaletype = TREE_VALUE (arglist);
6353 
6354       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6355 			   && TREE_CODE (rettype) == VOID_TYPE);
6356 
6357       ptr = fold_convert (ptrtype, gs_info.base);
6358       if (!is_gimple_min_invariant (ptr))
6359 	{
6360 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6361 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6362 	  gcc_assert (!new_bb);
6363 	}
6364 
6365       /* Currently we support only unconditional scatter stores,
6366 	 so mask should be all ones.  */
6367       mask = build_int_cst (masktype, -1);
6368       mask = vect_init_vector (stmt, mask, masktype, NULL);
6369 
6370       scale = build_int_cst (scaletype, gs_info.scale);
6371 
6372       prev_stmt_info = NULL;
6373       for (j = 0; j < ncopies; ++j)
6374 	{
6375 	  if (j == 0)
6376 	    {
6377 	      src = vec_oprnd1
6378 		= vect_get_vec_def_for_operand (op, stmt);
6379 	      op = vec_oprnd0
6380 		= vect_get_vec_def_for_operand (gs_info.offset, stmt);
6381 	    }
6382 	  else if (modifier != NONE && (j & 1))
6383 	    {
6384 	      if (modifier == WIDEN)
6385 		{
6386 		  src = vec_oprnd1
6387 		    = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6388 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6389 					     stmt, gsi);
6390 		}
6391 	      else if (modifier == NARROW)
6392 		{
6393 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6394 					      stmt, gsi);
6395 		  op = vec_oprnd0
6396 		    = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6397 						      vec_oprnd0);
6398 		}
6399 	      else
6400 		gcc_unreachable ();
6401 	    }
6402 	  else
6403 	    {
6404 	      src = vec_oprnd1
6405 		= vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6406 	      op = vec_oprnd0
6407 		= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6408 						  vec_oprnd0);
6409 	    }
6410 
6411 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6412 	    {
6413 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6414 				    TYPE_VECTOR_SUBPARTS (srctype)));
6415 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
6416 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6417 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6418 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6419 	      src = var;
6420 	    }
6421 
6422 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6423 	    {
6424 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6425 				    TYPE_VECTOR_SUBPARTS (idxtype)));
6426 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6427 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6428 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6429 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6430 	      op = var;
6431 	    }
6432 
6433 	  new_stmt
6434 	    = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6435 
6436 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6437 
6438 	  if (prev_stmt_info == NULL)
6439 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6440 	  else
6441 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6442 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6443 	}
6444       return true;
6445     }
6446 
6447   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6448     {
6449       gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6450       GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6451     }
6452 
6453   if (grouped_store)
6454     {
6455       /* FORNOW */
6456       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6457 
6458       /* We vectorize all the stmts of the interleaving group when we
6459 	 reach the last stmt in the group.  */
6460       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6461 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6462 	  && !slp)
6463 	{
6464 	  *vec_stmt = NULL;
6465 	  return true;
6466 	}
6467 
6468       if (slp)
6469         {
6470           grouped_store = false;
6471           /* VEC_NUM is the number of vect stmts to be created for this
6472              group.  */
6473           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6474           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6475 	  gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6476           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6477 	  op = vect_get_store_rhs (first_stmt);
6478         }
6479       else
6480         /* VEC_NUM is the number of vect stmts to be created for this
6481            group.  */
6482 	vec_num = group_size;
6483 
6484       ref_type = get_group_alias_ptr_type (first_stmt);
6485     }
6486   else
6487     ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6488 
6489   if (dump_enabled_p ())
6490     dump_printf_loc (MSG_NOTE, vect_location,
6491                      "transform store. ncopies = %d\n", ncopies);
6492 
6493   if (memory_access_type == VMAT_ELEMENTWISE
6494       || memory_access_type == VMAT_STRIDED_SLP)
6495     {
6496       gimple_stmt_iterator incr_gsi;
6497       bool insert_after;
6498       gimple *incr;
6499       tree offvar;
6500       tree ivstep;
6501       tree running_off;
6502       tree stride_base, stride_step, alias_off;
6503       tree vec_oprnd;
6504       unsigned int g;
6505       /* Checked by get_load_store_type.  */
6506       unsigned int const_nunits = nunits.to_constant ();
6507 
6508       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6509       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6510 
6511       stride_base
6512 	= fold_build_pointer_plus
6513 	    (DR_BASE_ADDRESS (first_dr),
6514 	     size_binop (PLUS_EXPR,
6515 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6516 			 convert_to_ptrofftype (DR_INIT (first_dr))));
6517       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6518 
6519       /* For a store with loop-invariant (but other than power-of-2)
6520          stride (i.e. not a grouped access) like so:
6521 
6522 	   for (i = 0; i < n; i += stride)
6523 	     array[i] = ...;
6524 
6525 	 we generate a new induction variable and new stores from
6526 	 the components of the (vectorized) rhs:
6527 
6528 	   for (j = 0; ; j += VF*stride)
6529 	     vectemp = ...;
6530 	     tmp1 = vectemp[0];
6531 	     array[j] = tmp1;
6532 	     tmp2 = vectemp[1];
6533 	     array[j + stride] = tmp2;
6534 	     ...
6535          */
6536 
6537       unsigned nstores = const_nunits;
6538       unsigned lnel = 1;
6539       tree ltype = elem_type;
6540       tree lvectype = vectype;
6541       if (slp)
6542 	{
6543 	  if (group_size < const_nunits
6544 	      && const_nunits % group_size == 0)
6545 	    {
6546 	      nstores = const_nunits / group_size;
6547 	      lnel = group_size;
6548 	      ltype = build_vector_type (elem_type, group_size);
6549 	      lvectype = vectype;
6550 
6551 	      /* First check if vec_extract optab doesn't support extraction
6552 		 of vector elts directly.  */
6553 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6554 	      machine_mode vmode;
6555 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
6556 		  || !VECTOR_MODE_P (vmode)
6557 		  || !targetm.vector_mode_supported_p (vmode)
6558 		  || (convert_optab_handler (vec_extract_optab,
6559 					     TYPE_MODE (vectype), vmode)
6560 		      == CODE_FOR_nothing))
6561 		{
6562 		  /* Try to avoid emitting an extract of vector elements
6563 		     by performing the extracts using an integer type of the
6564 		     same size, extracting from a vector of those and then
6565 		     re-interpreting it as the original vector type if
6566 		     supported.  */
6567 		  unsigned lsize
6568 		    = group_size * GET_MODE_BITSIZE (elmode);
6569 		  elmode = int_mode_for_size (lsize, 0).require ();
6570 		  unsigned int lnunits = const_nunits / group_size;
6571 		  /* If we can't construct such a vector fall back to
6572 		     element extracts from the original vector type and
6573 		     element size stores.  */
6574 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
6575 		      && VECTOR_MODE_P (vmode)
6576 		      && targetm.vector_mode_supported_p (vmode)
6577 		      && (convert_optab_handler (vec_extract_optab,
6578 						 vmode, elmode)
6579 			  != CODE_FOR_nothing))
6580 		    {
6581 		      nstores = lnunits;
6582 		      lnel = group_size;
6583 		      ltype = build_nonstandard_integer_type (lsize, 1);
6584 		      lvectype = build_vector_type (ltype, nstores);
6585 		    }
6586 		  /* Else fall back to vector extraction anyway.
6587 		     Fewer stores are more important than avoiding spilling
6588 		     of the vector we extract from.  Compared to the
6589 		     construction case in vectorizable_load no store-forwarding
6590 		     issue exists here for reasonable archs.  */
6591 		}
6592 	    }
6593 	  else if (group_size >= const_nunits
6594 		   && group_size % const_nunits == 0)
6595 	    {
6596 	      nstores = 1;
6597 	      lnel = const_nunits;
6598 	      ltype = vectype;
6599 	      lvectype = vectype;
6600 	    }
6601 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6602 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6603 	}
6604 
6605       ivstep = stride_step;
6606       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6607 			    build_int_cst (TREE_TYPE (ivstep), vf));
6608 
6609       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6610 
6611       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6612       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6613       create_iv (stride_base, ivstep, NULL,
6614 		 loop, &incr_gsi, insert_after,
6615 		 &offvar, NULL);
6616       incr = gsi_stmt (incr_gsi);
6617       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6618 
6619       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6620 
6621       prev_stmt_info = NULL;
6622       alias_off = build_int_cst (ref_type, 0);
6623       next_stmt = first_stmt;
6624       for (g = 0; g < group_size; g++)
6625 	{
6626 	  running_off = offvar;
6627 	  if (g)
6628 	    {
6629 	      tree size = TYPE_SIZE_UNIT (ltype);
6630 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6631 				      size);
6632 	      tree newoff = copy_ssa_name (running_off, NULL);
6633 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6634 					  running_off, pos);
6635 	      vect_finish_stmt_generation (stmt, incr, gsi);
6636 	      running_off = newoff;
6637 	    }
6638 	  unsigned int group_el = 0;
6639 	  unsigned HOST_WIDE_INT
6640 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6641 	  for (j = 0; j < ncopies; j++)
6642 	    {
6643 	      /* We've set op and dt above, from vect_get_store_rhs,
6644 		 and first_stmt == stmt.  */
6645 	      if (j == 0)
6646 		{
6647 		  if (slp)
6648 		    {
6649 		      vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6650 					 slp_node);
6651 		      vec_oprnd = vec_oprnds[0];
6652 		    }
6653 		  else
6654 		    {
6655 		      op = vect_get_store_rhs (next_stmt);
6656 		      vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6657 		    }
6658 		}
6659 	      else
6660 		{
6661 		  if (slp)
6662 		    vec_oprnd = vec_oprnds[j];
6663 		  else
6664 		    {
6665 		      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6666 		      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6667 								  vec_oprnd);
6668 		    }
6669 		}
6670 	      /* Pun the vector to extract from if necessary.  */
6671 	      if (lvectype != vectype)
6672 		{
6673 		  tree tem = make_ssa_name (lvectype);
6674 		  gimple *pun
6675 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6676 							lvectype, vec_oprnd));
6677 		  vect_finish_stmt_generation (stmt, pun, gsi);
6678 		  vec_oprnd = tem;
6679 		}
6680 	      for (i = 0; i < nstores; i++)
6681 		{
6682 		  tree newref, newoff;
6683 		  gimple *incr, *assign;
6684 		  tree size = TYPE_SIZE (ltype);
6685 		  /* Extract the i'th component.  */
6686 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6687 					  bitsize_int (i), size);
6688 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6689 					   size, pos);
6690 
6691 		  elem = force_gimple_operand_gsi (gsi, elem, true,
6692 						   NULL_TREE, true,
6693 						   GSI_SAME_STMT);
6694 
6695 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
6696 						 group_el * elsz);
6697 		  newref = build2 (MEM_REF, ltype,
6698 				   running_off, this_off);
6699 		  vect_copy_ref_info (newref, DR_REF (first_dr));
6700 
6701 		  /* And store it to *running_off.  */
6702 		  assign = gimple_build_assign (newref, elem);
6703 		  vect_finish_stmt_generation (stmt, assign, gsi);
6704 
6705 		  group_el += lnel;
6706 		  if (! slp
6707 		      || group_el == group_size)
6708 		    {
6709 		      newoff = copy_ssa_name (running_off, NULL);
6710 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6711 						  running_off, stride_step);
6712 		      vect_finish_stmt_generation (stmt, incr, gsi);
6713 
6714 		      running_off = newoff;
6715 		      group_el = 0;
6716 		    }
6717 		  if (g == group_size - 1
6718 		      && !slp)
6719 		    {
6720 		      if (j == 0 && i == 0)
6721 			STMT_VINFO_VEC_STMT (stmt_info)
6722 			    = *vec_stmt = assign;
6723 		      else
6724 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6725 		      prev_stmt_info = vinfo_for_stmt (assign);
6726 		    }
6727 		}
6728 	    }
6729 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6730 	  if (slp)
6731 	    break;
6732 	}
6733 
6734       vec_oprnds.release ();
6735       return true;
6736     }
6737 
6738   auto_vec<tree> dr_chain (group_size);
6739   oprnds.create (group_size);
6740 
6741   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6742   gcc_assert (alignment_support_scheme);
6743   vec_loop_masks *loop_masks
6744     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6745        ? &LOOP_VINFO_MASKS (loop_vinfo)
6746        : NULL);
6747   /* Targets with store-lane instructions must not require explicit
6748      realignment.  vect_supportable_dr_alignment always returns either
6749      dr_aligned or dr_unaligned_supported for masked operations.  */
6750   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6751 	       && !mask
6752 	       && !loop_masks)
6753 	      || alignment_support_scheme == dr_aligned
6754 	      || alignment_support_scheme == dr_unaligned_supported);
6755 
6756   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6757       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6758     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6759 
6760   tree bump;
6761   tree vec_offset = NULL_TREE;
6762   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6763     {
6764       aggr_type = NULL_TREE;
6765       bump = NULL_TREE;
6766     }
6767   else if (memory_access_type == VMAT_GATHER_SCATTER)
6768     {
6769       aggr_type = elem_type;
6770       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6771 				       &bump, &vec_offset);
6772     }
6773   else
6774     {
6775       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6776 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6777       else
6778 	aggr_type = vectype;
6779       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6780     }
6781 
6782   if (mask)
6783     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6784 
6785   /* In case the vectorization factor (VF) is bigger than the number
6786      of elements that we can fit in a vectype (nunits), we have to generate
6787      more than one vector stmt - i.e - we need to "unroll" the
6788      vector stmt by a factor VF/nunits.  For more details see documentation in
6789      vect_get_vec_def_for_copy_stmt.  */
6790 
6791   /* In case of interleaving (non-unit grouped access):
6792 
6793         S1:  &base + 2 = x2
6794         S2:  &base = x0
6795         S3:  &base + 1 = x1
6796         S4:  &base + 3 = x3
6797 
6798      We create vectorized stores starting from base address (the access of the
6799      first stmt in the chain (S2 in the above example), when the last store stmt
6800      of the chain (S4) is reached:
6801 
6802         VS1: &base = vx2
6803 	VS2: &base + vec_size*1 = vx0
6804 	VS3: &base + vec_size*2 = vx1
6805 	VS4: &base + vec_size*3 = vx3
6806 
6807      Then permutation statements are generated:
6808 
6809 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6810 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6811 	...
6812 
6813      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6814      (the order of the data-refs in the output of vect_permute_store_chain
6815      corresponds to the order of scalar stmts in the interleaving chain - see
6816      the documentation of vect_permute_store_chain()).
6817 
6818      In case of both multiple types and interleaving, above vector stores and
6819      permutation stmts are created for every copy.  The result vector stmts are
6820      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6821      STMT_VINFO_RELATED_STMT for the next copies.
6822   */
6823 
6824   prev_stmt_info = NULL;
6825   tree vec_mask = NULL_TREE;
6826   for (j = 0; j < ncopies; j++)
6827     {
6828 
6829       if (j == 0)
6830 	{
6831           if (slp)
6832             {
6833 	      /* Get vectorized arguments for SLP_NODE.  */
6834               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6835                                  NULL, slp_node);
6836 
6837               vec_oprnd = vec_oprnds[0];
6838             }
6839           else
6840             {
6841 	      /* For interleaved stores we collect vectorized defs for all the
6842 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6843 		 used as an input to vect_permute_store_chain(), and OPRNDS as
6844 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6845 
6846 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6847 		 OPRNDS are of size 1.  */
6848 	      next_stmt = first_stmt;
6849 	      for (i = 0; i < group_size; i++)
6850 		{
6851 		  /* Since gaps are not supported for interleaved stores,
6852 		     GROUP_SIZE is the exact number of stmts in the chain.
6853 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
6854 		     there is no interleaving, GROUP_SIZE is 1, and only one
6855 		     iteration of the loop will be executed.  */
6856 		  op = vect_get_store_rhs (next_stmt);
6857 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6858 		  dr_chain.quick_push (vec_oprnd);
6859 		  oprnds.quick_push (vec_oprnd);
6860 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6861 		}
6862 	      if (mask)
6863 		vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6864 							 mask_vectype);
6865 	    }
6866 
6867 	  /* We should have catched mismatched types earlier.  */
6868 	  gcc_assert (useless_type_conversion_p (vectype,
6869 						 TREE_TYPE (vec_oprnd)));
6870 	  bool simd_lane_access_p
6871 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6872 	  if (simd_lane_access_p
6873 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6874 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6875 	      && integer_zerop (DR_OFFSET (first_dr))
6876 	      && integer_zerop (DR_INIT (first_dr))
6877 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
6878 					get_alias_set (TREE_TYPE (ref_type))))
6879 	    {
6880 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6881 	      dataref_offset = build_int_cst (ref_type, 0);
6882 	      inv_p = false;
6883 	    }
6884 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6885 	    {
6886 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6887 					   &dataref_ptr, &vec_offset);
6888 	      inv_p = false;
6889 	    }
6890 	  else
6891 	    dataref_ptr
6892 	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
6893 					  simd_lane_access_p ? loop : NULL,
6894 					  offset, &dummy, gsi, &ptr_incr,
6895 					  simd_lane_access_p, &inv_p,
6896 					  NULL_TREE, bump);
6897 	  gcc_assert (bb_vinfo || !inv_p);
6898 	}
6899       else
6900 	{
6901 	  /* For interleaved stores we created vectorized defs for all the
6902 	     defs stored in OPRNDS in the previous iteration (previous copy).
6903 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
6904 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6905 	     next copy.
6906 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6907 	     OPRNDS are of size 1.  */
6908 	  for (i = 0; i < group_size; i++)
6909 	    {
6910 	      op = oprnds[i];
6911 	      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6912 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6913 	      dr_chain[i] = vec_oprnd;
6914 	      oprnds[i] = vec_oprnd;
6915 	    }
6916 	  if (mask)
6917 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6918 	  if (dataref_offset)
6919 	    dataref_offset
6920 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6921 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6922 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6923 							 vec_offset);
6924 	  else
6925 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6926 					   bump);
6927 	}
6928 
6929       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6930 	{
6931 	  tree vec_array;
6932 
6933 	  /* Combine all the vectors into an array.  */
6934 	  vec_array = create_vector_array (vectype, vec_num);
6935 	  for (i = 0; i < vec_num; i++)
6936 	    {
6937 	      vec_oprnd = dr_chain[i];
6938 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6939 	    }
6940 
6941 	  tree final_mask = NULL;
6942 	  if (loop_masks)
6943 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6944 					     vectype, j);
6945 	  if (vec_mask)
6946 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6947 						  vec_mask, gsi);
6948 
6949 	  gcall *call;
6950 	  if (final_mask)
6951 	    {
6952 	      /* Emit:
6953 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6954 				     VEC_ARRAY).  */
6955 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6956 	      tree alias_ptr = build_int_cst (ref_type, align);
6957 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6958 						 dataref_ptr, alias_ptr,
6959 						 final_mask, vec_array);
6960 	    }
6961 	  else
6962 	    {
6963 	      /* Emit:
6964 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
6965 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6966 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6967 						 vec_array);
6968 	      gimple_call_set_lhs (call, data_ref);
6969 	    }
6970 	  gimple_call_set_nothrow (call, true);
6971 	  new_stmt = call;
6972 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6973 	}
6974       else
6975 	{
6976 	  new_stmt = NULL;
6977 	  if (grouped_store)
6978 	    {
6979 	      if (j == 0)
6980 		result_chain.create (group_size);
6981 	      /* Permute.  */
6982 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6983 					&result_chain);
6984 	    }
6985 
6986 	  next_stmt = first_stmt;
6987 	  for (i = 0; i < vec_num; i++)
6988 	    {
6989 	      unsigned align, misalign;
6990 
6991 	      tree final_mask = NULL_TREE;
6992 	      if (loop_masks)
6993 		final_mask = vect_get_loop_mask (gsi, loop_masks,
6994 						 vec_num * ncopies,
6995 						 vectype, vec_num * j + i);
6996 	      if (vec_mask)
6997 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6998 						      vec_mask, gsi);
6999 
7000 	      if (memory_access_type == VMAT_GATHER_SCATTER)
7001 		{
7002 		  tree scale = size_int (gs_info.scale);
7003 		  gcall *call;
7004 		  if (loop_masks)
7005 		    call = gimple_build_call_internal
7006 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7007 		       scale, vec_oprnd, final_mask);
7008 		  else
7009 		    call = gimple_build_call_internal
7010 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7011 		       scale, vec_oprnd);
7012 		  gimple_call_set_nothrow (call, true);
7013 		  new_stmt = call;
7014 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
7015 		  break;
7016 		}
7017 
7018 	      if (i > 0)
7019 		/* Bump the vector pointer.  */
7020 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7021 					       stmt, bump);
7022 
7023 	      if (slp)
7024 		vec_oprnd = vec_oprnds[i];
7025 	      else if (grouped_store)
7026 		/* For grouped stores vectorized defs are interleaved in
7027 		   vect_permute_store_chain().  */
7028 		vec_oprnd = result_chain[i];
7029 
7030 	      align = DR_TARGET_ALIGNMENT (first_dr);
7031 	      if (aligned_access_p (first_dr))
7032 		misalign = 0;
7033 	      else if (DR_MISALIGNMENT (first_dr) == -1)
7034 		{
7035 		  align = dr_alignment (vect_dr_behavior (first_dr));
7036 		  misalign = 0;
7037 		}
7038 	      else
7039 		misalign = DR_MISALIGNMENT (first_dr);
7040 	      if (dataref_offset == NULL_TREE
7041 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
7042 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7043 					misalign);
7044 
7045 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7046 		{
7047 		  tree perm_mask = perm_mask_for_reverse (vectype);
7048 		  tree perm_dest
7049 		    = vect_create_destination_var (vect_get_store_rhs (stmt),
7050 						   vectype);
7051 		  tree new_temp = make_ssa_name (perm_dest);
7052 
7053 		  /* Generate the permute statement.  */
7054 		  gimple *perm_stmt
7055 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7056 					   vec_oprnd, perm_mask);
7057 		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7058 
7059 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7060 		  vec_oprnd = new_temp;
7061 		}
7062 
7063 	      /* Arguments are ready.  Create the new vector stmt.  */
7064 	      if (final_mask)
7065 		{
7066 		  align = least_bit_hwi (misalign | align);
7067 		  tree ptr = build_int_cst (ref_type, align);
7068 		  gcall *call
7069 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
7070 						  dataref_ptr, ptr,
7071 						  final_mask, vec_oprnd);
7072 		  gimple_call_set_nothrow (call, true);
7073 		  new_stmt = call;
7074 		}
7075 	      else
7076 		{
7077 		  data_ref = fold_build2 (MEM_REF, vectype,
7078 					  dataref_ptr,
7079 					  dataref_offset
7080 					  ? dataref_offset
7081 					  : build_int_cst (ref_type, 0));
7082 		  if (aligned_access_p (first_dr))
7083 		    ;
7084 		  else if (DR_MISALIGNMENT (first_dr) == -1)
7085 		    TREE_TYPE (data_ref)
7086 		      = build_aligned_type (TREE_TYPE (data_ref),
7087 					    align * BITS_PER_UNIT);
7088 		  else
7089 		    TREE_TYPE (data_ref)
7090 		      = build_aligned_type (TREE_TYPE (data_ref),
7091 					    TYPE_ALIGN (elem_type));
7092 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
7093 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7094 		}
7095 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7096 
7097 	      if (slp)
7098 		continue;
7099 
7100 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7101 	      if (!next_stmt)
7102 		break;
7103 	    }
7104 	}
7105       if (!slp)
7106 	{
7107 	  if (j == 0)
7108 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7109 	  else
7110 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7111 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
7112 	}
7113     }
7114 
7115   oprnds.release ();
7116   result_chain.release ();
7117   vec_oprnds.release ();
7118 
7119   return true;
7120 }
7121 
7122 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7123    VECTOR_CST mask.  No checks are made that the target platform supports the
7124    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7125    vect_gen_perm_mask_checked.  */
7126 
7127 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)7128 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7129 {
7130   tree mask_type;
7131 
7132   poly_uint64 nunits = sel.length ();
7133   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7134 
7135   mask_type = build_vector_type (ssizetype, nunits);
7136   return vec_perm_indices_to_tree (mask_type, sel);
7137 }
7138 
7139 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7140    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7141 
7142 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)7143 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7144 {
7145   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7146   return vect_gen_perm_mask_any (vectype, sel);
7147 }
7148 
7149 /* Given a vector variable X and Y, that was generated for the scalar
7150    STMT, generate instructions to permute the vector elements of X and Y
7151    using permutation mask MASK_VEC, insert them at *GSI and return the
7152    permuted vector variable.  */
7153 
7154 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple * stmt,gimple_stmt_iterator * gsi)7155 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7156 		      gimple_stmt_iterator *gsi)
7157 {
7158   tree vectype = TREE_TYPE (x);
7159   tree perm_dest, data_ref;
7160   gimple *perm_stmt;
7161 
7162   tree scalar_dest = gimple_get_lhs (stmt);
7163   if (TREE_CODE (scalar_dest) == SSA_NAME)
7164     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7165   else
7166     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7167   data_ref = make_ssa_name (perm_dest);
7168 
7169   /* Generate the permute statement.  */
7170   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7171   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7172 
7173   return data_ref;
7174 }
7175 
7176 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7177    inserting them on the loops preheader edge.  Returns true if we
7178    were successful in doing so (and thus STMT can be moved then),
7179    otherwise returns false.  */
7180 
7181 static bool
hoist_defs_of_uses(gimple * stmt,struct loop * loop)7182 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7183 {
7184   ssa_op_iter i;
7185   tree op;
7186   bool any = false;
7187 
7188   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7189     {
7190       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7191       if (!gimple_nop_p (def_stmt)
7192 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7193 	{
7194 	  /* Make sure we don't need to recurse.  While we could do
7195 	     so in simple cases when there are more complex use webs
7196 	     we don't have an easy way to preserve stmt order to fulfil
7197 	     dependencies within them.  */
7198 	  tree op2;
7199 	  ssa_op_iter i2;
7200 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
7201 	    return false;
7202 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7203 	    {
7204 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7205 	      if (!gimple_nop_p (def_stmt2)
7206 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7207 		return false;
7208 	    }
7209 	  any = true;
7210 	}
7211     }
7212 
7213   if (!any)
7214     return true;
7215 
7216   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7217     {
7218       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7219       if (!gimple_nop_p (def_stmt)
7220 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7221 	{
7222 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7223 	  gsi_remove (&gsi, false);
7224 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7225 	}
7226     }
7227 
7228   return true;
7229 }
7230 
7231 /* vectorizable_load.
7232 
7233    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7234    can be vectorized.
7235    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7236    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7237    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
7238 
7239 static bool
vectorizable_load(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)7240 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7241                    slp_tree slp_node, slp_instance slp_node_instance)
7242 {
7243   tree scalar_dest;
7244   tree vec_dest = NULL;
7245   tree data_ref = NULL;
7246   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7247   stmt_vec_info prev_stmt_info;
7248   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7249   struct loop *loop = NULL;
7250   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7251   bool nested_in_vect_loop = false;
7252   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7253   tree elem_type;
7254   tree new_temp;
7255   machine_mode mode;
7256   gimple *new_stmt = NULL;
7257   tree dummy;
7258   enum dr_alignment_support alignment_support_scheme;
7259   tree dataref_ptr = NULL_TREE;
7260   tree dataref_offset = NULL_TREE;
7261   gimple *ptr_incr = NULL;
7262   int ncopies;
7263   int i, j;
7264   unsigned int group_size;
7265   poly_uint64 group_gap_adj;
7266   tree msq = NULL_TREE, lsq;
7267   tree offset = NULL_TREE;
7268   tree byte_offset = NULL_TREE;
7269   tree realignment_token = NULL_TREE;
7270   gphi *phi = NULL;
7271   vec<tree> dr_chain = vNULL;
7272   bool grouped_load = false;
7273   gimple *first_stmt;
7274   gimple *first_stmt_for_drptr = NULL;
7275   bool inv_p;
7276   bool compute_in_loop = false;
7277   struct loop *at_loop;
7278   int vec_num;
7279   bool slp = (slp_node != NULL);
7280   bool slp_perm = false;
7281   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7282   poly_uint64 vf;
7283   tree aggr_type;
7284   gather_scatter_info gs_info;
7285   vec_info *vinfo = stmt_info->vinfo;
7286   tree ref_type;
7287   enum vect_def_type mask_dt = vect_unknown_def_type;
7288 
7289   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7290     return false;
7291 
7292   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7293       && ! vec_stmt)
7294     return false;
7295 
7296   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7297   if (is_gimple_assign (stmt))
7298     {
7299       scalar_dest = gimple_assign_lhs (stmt);
7300       if (TREE_CODE (scalar_dest) != SSA_NAME)
7301 	return false;
7302 
7303       tree_code code = gimple_assign_rhs_code (stmt);
7304       if (code != ARRAY_REF
7305 	  && code != BIT_FIELD_REF
7306 	  && code != INDIRECT_REF
7307 	  && code != COMPONENT_REF
7308 	  && code != IMAGPART_EXPR
7309 	  && code != REALPART_EXPR
7310 	  && code != MEM_REF
7311 	  && TREE_CODE_CLASS (code) != tcc_declaration)
7312 	return false;
7313     }
7314   else
7315     {
7316       gcall *call = dyn_cast <gcall *> (stmt);
7317       if (!call || !gimple_call_internal_p (call))
7318 	return false;
7319 
7320       internal_fn ifn = gimple_call_internal_fn (call);
7321       if (!internal_load_fn_p (ifn))
7322 	return false;
7323 
7324       scalar_dest = gimple_call_lhs (call);
7325       if (!scalar_dest)
7326 	return false;
7327 
7328       if (slp_node != NULL)
7329 	{
7330 	  if (dump_enabled_p ())
7331 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7332 			     "SLP of masked loads not supported.\n");
7333 	  return false;
7334 	}
7335 
7336       int mask_index = internal_fn_mask_index (ifn);
7337       if (mask_index >= 0)
7338 	{
7339 	  mask = gimple_call_arg (call, mask_index);
7340 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7341 					   &mask_vectype))
7342 	    return false;
7343 	}
7344     }
7345 
7346   if (!STMT_VINFO_DATA_REF (stmt_info))
7347     return false;
7348 
7349   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7350   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7351 
7352   if (loop_vinfo)
7353     {
7354       loop = LOOP_VINFO_LOOP (loop_vinfo);
7355       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7356       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7357     }
7358   else
7359     vf = 1;
7360 
7361   /* Multiple types in SLP are handled by creating the appropriate number of
7362      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7363      case of SLP.  */
7364   if (slp)
7365     ncopies = 1;
7366   else
7367     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7368 
7369   gcc_assert (ncopies >= 1);
7370 
7371   /* FORNOW. This restriction should be relaxed.  */
7372   if (nested_in_vect_loop && ncopies > 1)
7373     {
7374       if (dump_enabled_p ())
7375         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7376                          "multiple types in nested loop.\n");
7377       return false;
7378     }
7379 
7380   /* Invalidate assumptions made by dependence analysis when vectorization
7381      on the unrolled body effectively re-orders stmts.  */
7382   if (ncopies > 1
7383       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7384       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7385 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7386     {
7387       if (dump_enabled_p ())
7388 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7389 			 "cannot perform implicit CSE when unrolling "
7390 			 "with negative dependence distance\n");
7391       return false;
7392     }
7393 
7394   elem_type = TREE_TYPE (vectype);
7395   mode = TYPE_MODE (vectype);
7396 
7397   /* FORNOW. In some cases can vectorize even if data-type not supported
7398     (e.g. - data copies).  */
7399   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7400     {
7401       if (dump_enabled_p ())
7402         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7403                          "Aligned load, but unsupported type.\n");
7404       return false;
7405     }
7406 
7407   /* Check if the load is a part of an interleaving chain.  */
7408   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7409     {
7410       grouped_load = true;
7411       /* FORNOW */
7412       gcc_assert (!nested_in_vect_loop);
7413       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7414 
7415       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7416       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7417 
7418       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7419 	slp_perm = true;
7420 
7421       /* Invalidate assumptions made by dependence analysis when vectorization
7422 	 on the unrolled body effectively re-orders stmts.  */
7423       if (!PURE_SLP_STMT (stmt_info)
7424 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7425 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7426 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7427 	{
7428 	  if (dump_enabled_p ())
7429 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7430 			     "cannot perform implicit CSE when performing "
7431 			     "group loads with negative dependence distance\n");
7432 	  return false;
7433 	}
7434 
7435       /* Similarly when the stmt is a load that is both part of a SLP
7436          instance and a loop vectorized stmt via the same-dr mechanism
7437 	 we have to give up.  */
7438       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7439 	  && (STMT_SLP_TYPE (stmt_info)
7440 	      != STMT_SLP_TYPE (vinfo_for_stmt
7441 				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7442 	{
7443 	  if (dump_enabled_p ())
7444 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7445 			     "conflicting SLP types for CSEd load\n");
7446 	  return false;
7447 	}
7448     }
7449   else
7450     group_size = 1;
7451 
7452   vect_memory_access_type memory_access_type;
7453   if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7454 			    &memory_access_type, &gs_info))
7455     return false;
7456 
7457   if (mask)
7458     {
7459       if (memory_access_type == VMAT_CONTIGUOUS)
7460 	{
7461 	  machine_mode vec_mode = TYPE_MODE (vectype);
7462 	  if (!VECTOR_MODE_P (vec_mode)
7463 	      || !can_vec_mask_load_store_p (vec_mode,
7464 					     TYPE_MODE (mask_vectype), true))
7465 	    return false;
7466 	}
7467       else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7468 	{
7469 	  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7470 	  tree masktype
7471 	    = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7472 	  if (TREE_CODE (masktype) == INTEGER_TYPE)
7473 	    {
7474 	      if (dump_enabled_p ())
7475 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7476 				 "masked gather with integer mask not"
7477 				 " supported.");
7478 	      return false;
7479 	    }
7480 	}
7481       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7482 	       && memory_access_type != VMAT_GATHER_SCATTER)
7483 	{
7484 	  if (dump_enabled_p ())
7485 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7486 			     "unsupported access type for masked load.\n");
7487 	  return false;
7488 	}
7489     }
7490 
7491   if (!vec_stmt) /* transformation not required.  */
7492     {
7493       if (!slp)
7494 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7495 
7496       if (loop_vinfo
7497 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7498 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7499 				  memory_access_type, &gs_info);
7500 
7501       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7502       /* The SLP costs are calculated during SLP analysis.  */
7503       if (! slp_node)
7504 	vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7505 			      NULL, NULL, NULL);
7506       return true;
7507     }
7508 
7509   if (!slp)
7510     gcc_assert (memory_access_type
7511 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7512 
7513   if (dump_enabled_p ())
7514     dump_printf_loc (MSG_NOTE, vect_location,
7515                      "transform load. ncopies = %d\n", ncopies);
7516 
7517   /* Transform.  */
7518 
7519   ensure_base_align (dr);
7520 
7521   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7522     {
7523       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7524 				    mask_dt);
7525       return true;
7526     }
7527 
7528   if (memory_access_type == VMAT_ELEMENTWISE
7529       || memory_access_type == VMAT_STRIDED_SLP)
7530     {
7531       gimple_stmt_iterator incr_gsi;
7532       bool insert_after;
7533       gimple *incr;
7534       tree offvar;
7535       tree ivstep;
7536       tree running_off;
7537       vec<constructor_elt, va_gc> *v = NULL;
7538       tree stride_base, stride_step, alias_off;
7539       /* Checked by get_load_store_type.  */
7540       unsigned int const_nunits = nunits.to_constant ();
7541       unsigned HOST_WIDE_INT cst_offset = 0;
7542 
7543       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7544       gcc_assert (!nested_in_vect_loop);
7545 
7546       if (grouped_load)
7547 	{
7548 	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7549 	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7550 	}
7551       else
7552 	{
7553 	  first_stmt = stmt;
7554 	  first_dr = dr;
7555 	}
7556       if (slp && grouped_load)
7557 	{
7558 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7559 	  ref_type = get_group_alias_ptr_type (first_stmt);
7560 	}
7561       else
7562 	{
7563 	  if (grouped_load)
7564 	    cst_offset
7565 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7566 		 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7567 	  group_size = 1;
7568 	  ref_type = reference_alias_ptr_type (DR_REF (dr));
7569 	}
7570 
7571       stride_base
7572 	= fold_build_pointer_plus
7573 	    (DR_BASE_ADDRESS (first_dr),
7574 	     size_binop (PLUS_EXPR,
7575 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7576 			 convert_to_ptrofftype (DR_INIT (first_dr))));
7577       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7578 
7579       /* For a load with loop-invariant (but other than power-of-2)
7580          stride (i.e. not a grouped access) like so:
7581 
7582 	   for (i = 0; i < n; i += stride)
7583 	     ... = array[i];
7584 
7585 	 we generate a new induction variable and new accesses to
7586 	 form a new vector (or vectors, depending on ncopies):
7587 
7588 	   for (j = 0; ; j += VF*stride)
7589 	     tmp1 = array[j];
7590 	     tmp2 = array[j + stride];
7591 	     ...
7592 	     vectemp = {tmp1, tmp2, ...}
7593          */
7594 
7595       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7596 			    build_int_cst (TREE_TYPE (stride_step), vf));
7597 
7598       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7599 
7600       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7601       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7602       create_iv (stride_base, ivstep, NULL,
7603 		 loop, &incr_gsi, insert_after,
7604 		 &offvar, NULL);
7605       incr = gsi_stmt (incr_gsi);
7606       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7607 
7608       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7609 
7610       prev_stmt_info = NULL;
7611       running_off = offvar;
7612       alias_off = build_int_cst (ref_type, 0);
7613       int nloads = const_nunits;
7614       int lnel = 1;
7615       tree ltype = TREE_TYPE (vectype);
7616       tree lvectype = vectype;
7617       auto_vec<tree> dr_chain;
7618       if (memory_access_type == VMAT_STRIDED_SLP)
7619 	{
7620 	  if (group_size < const_nunits)
7621 	    {
7622 	      /* First check if vec_init optab supports construction from
7623 		 vector elts directly.  */
7624 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7625 	      machine_mode vmode;
7626 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
7627 		  && VECTOR_MODE_P (vmode)
7628 		  && targetm.vector_mode_supported_p (vmode)
7629 		  && (convert_optab_handler (vec_init_optab,
7630 					     TYPE_MODE (vectype), vmode)
7631 		      != CODE_FOR_nothing))
7632 		{
7633 		  nloads = const_nunits / group_size;
7634 		  lnel = group_size;
7635 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7636 		}
7637 	      else
7638 		{
7639 		  /* Otherwise avoid emitting a constructor of vector elements
7640 		     by performing the loads using an integer type of the same
7641 		     size, constructing a vector of those and then
7642 		     re-interpreting it as the original vector type.
7643 		     This avoids a huge runtime penalty due to the general
7644 		     inability to perform store forwarding from smaller stores
7645 		     to a larger load.  */
7646 		  unsigned lsize
7647 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7648 		  elmode = int_mode_for_size (lsize, 0).require ();
7649 		  unsigned int lnunits = const_nunits / group_size;
7650 		  /* If we can't construct such a vector fall back to
7651 		     element loads of the original vector type.  */
7652 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
7653 		      && VECTOR_MODE_P (vmode)
7654 		      && targetm.vector_mode_supported_p (vmode)
7655 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
7656 			  != CODE_FOR_nothing))
7657 		    {
7658 		      nloads = lnunits;
7659 		      lnel = group_size;
7660 		      ltype = build_nonstandard_integer_type (lsize, 1);
7661 		      lvectype = build_vector_type (ltype, nloads);
7662 		    }
7663 		}
7664 	    }
7665 	  else
7666 	    {
7667 	      nloads = 1;
7668 	      lnel = const_nunits;
7669 	      ltype = vectype;
7670 	    }
7671 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7672 	}
7673       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7674       else if (nloads == 1)
7675 	ltype = vectype;
7676 
7677       if (slp)
7678 	{
7679 	  /* For SLP permutation support we need to load the whole group,
7680 	     not only the number of vector stmts the permutation result
7681 	     fits in.  */
7682 	  if (slp_perm)
7683 	    {
7684 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7685 		 variable VF.  */
7686 	      unsigned int const_vf = vf.to_constant ();
7687 	      ncopies = CEIL (group_size * const_vf, const_nunits);
7688 	      dr_chain.create (ncopies);
7689 	    }
7690 	  else
7691 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7692 	}
7693       unsigned int group_el = 0;
7694       unsigned HOST_WIDE_INT
7695 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7696       for (j = 0; j < ncopies; j++)
7697 	{
7698 	  if (nloads > 1)
7699 	    vec_alloc (v, nloads);
7700 	  for (i = 0; i < nloads; i++)
7701 	    {
7702 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
7703 					     group_el * elsz + cst_offset);
7704 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7705 	      vect_copy_ref_info (data_ref, DR_REF (first_dr));
7706 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7707 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7708 	      if (nloads > 1)
7709 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7710 					gimple_assign_lhs (new_stmt));
7711 
7712 	      group_el += lnel;
7713 	      if (! slp
7714 		  || group_el == group_size)
7715 		{
7716 		  tree newoff = copy_ssa_name (running_off);
7717 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7718 						      running_off, stride_step);
7719 		  vect_finish_stmt_generation (stmt, incr, gsi);
7720 
7721 		  running_off = newoff;
7722 		  group_el = 0;
7723 		}
7724 	    }
7725 	  if (nloads > 1)
7726 	    {
7727 	      tree vec_inv = build_constructor (lvectype, v);
7728 	      new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7729 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
7730 	      if (lvectype != vectype)
7731 		{
7732 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
7733 						  VIEW_CONVERT_EXPR,
7734 						  build1 (VIEW_CONVERT_EXPR,
7735 							  vectype, new_temp));
7736 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
7737 		}
7738 	    }
7739 
7740 	  if (slp)
7741 	    {
7742 	      if (slp_perm)
7743 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7744 	      else
7745 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7746 	    }
7747 	  else
7748 	    {
7749 	      if (j == 0)
7750 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7751 	      else
7752 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7753 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
7754 	    }
7755 	}
7756       if (slp_perm)
7757 	{
7758 	  unsigned n_perms;
7759 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7760 					slp_node_instance, false, &n_perms);
7761 	}
7762       return true;
7763     }
7764 
7765   if (memory_access_type == VMAT_GATHER_SCATTER
7766       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7767     grouped_load = false;
7768 
7769   if (grouped_load)
7770     {
7771       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7772       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7773       /* For SLP vectorization we directly vectorize a subchain
7774          without permutation.  */
7775       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7776 	first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7777       /* For BB vectorization always use the first stmt to base
7778 	 the data ref pointer on.  */
7779       if (bb_vinfo)
7780 	first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7781 
7782       /* Check if the chain of loads is already vectorized.  */
7783       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7784 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7785 	     ???  But we can only do so if there is exactly one
7786 	     as we have no way to get at the rest.  Leave the CSE
7787 	     opportunity alone.
7788 	     ???  With the group load eventually participating
7789 	     in multiple different permutations (having multiple
7790 	     slp nodes which refer to the same group) the CSE
7791 	     is even wrong code.  See PR56270.  */
7792 	  && !slp)
7793 	{
7794 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7795 	  return true;
7796 	}
7797       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7798       group_gap_adj = 0;
7799 
7800       /* VEC_NUM is the number of vect stmts to be created for this group.  */
7801       if (slp)
7802 	{
7803 	  grouped_load = false;
7804 	  /* For SLP permutation support we need to load the whole group,
7805 	     not only the number of vector stmts the permutation result
7806 	     fits in.  */
7807 	  if (slp_perm)
7808 	    {
7809 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7810 		 variable VF.  */
7811 	      unsigned int const_vf = vf.to_constant ();
7812 	      unsigned int const_nunits = nunits.to_constant ();
7813 	      vec_num = CEIL (group_size * const_vf, const_nunits);
7814 	      group_gap_adj = vf * group_size - nunits * vec_num;
7815 	    }
7816 	  else
7817 	    {
7818 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7819 	      group_gap_adj
7820 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7821 	    }
7822     	}
7823       else
7824 	vec_num = group_size;
7825 
7826       ref_type = get_group_alias_ptr_type (first_stmt);
7827     }
7828   else
7829     {
7830       first_stmt = stmt;
7831       first_dr = dr;
7832       group_size = vec_num = 1;
7833       group_gap_adj = 0;
7834       ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7835     }
7836 
7837   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7838   gcc_assert (alignment_support_scheme);
7839   vec_loop_masks *loop_masks
7840     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7841        ? &LOOP_VINFO_MASKS (loop_vinfo)
7842        : NULL);
7843   /* Targets with store-lane instructions must not require explicit
7844      realignment.  vect_supportable_dr_alignment always returns either
7845      dr_aligned or dr_unaligned_supported for masked operations.  */
7846   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7847 	       && !mask
7848 	       && !loop_masks)
7849 	      || alignment_support_scheme == dr_aligned
7850 	      || alignment_support_scheme == dr_unaligned_supported);
7851 
7852   /* In case the vectorization factor (VF) is bigger than the number
7853      of elements that we can fit in a vectype (nunits), we have to generate
7854      more than one vector stmt - i.e - we need to "unroll" the
7855      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
7856      from one copy of the vector stmt to the next, in the field
7857      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
7858      stages to find the correct vector defs to be used when vectorizing
7859      stmts that use the defs of the current stmt.  The example below
7860      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7861      need to create 4 vectorized stmts):
7862 
7863      before vectorization:
7864                                 RELATED_STMT    VEC_STMT
7865         S1:     x = memref      -               -
7866         S2:     z = x + 1       -               -
7867 
7868      step 1: vectorize stmt S1:
7869         We first create the vector stmt VS1_0, and, as usual, record a
7870         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7871         Next, we create the vector stmt VS1_1, and record a pointer to
7872         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7873         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
7874         stmts and pointers:
7875                                 RELATED_STMT    VEC_STMT
7876         VS1_0:  vx0 = memref0   VS1_1           -
7877         VS1_1:  vx1 = memref1   VS1_2           -
7878         VS1_2:  vx2 = memref2   VS1_3           -
7879         VS1_3:  vx3 = memref3   -               -
7880         S1:     x = load        -               VS1_0
7881         S2:     z = x + 1       -               -
7882 
7883      See in documentation in vect_get_vec_def_for_stmt_copy for how the
7884      information we recorded in RELATED_STMT field is used to vectorize
7885      stmt S2.  */
7886 
7887   /* In case of interleaving (non-unit grouped access):
7888 
7889      S1:  x2 = &base + 2
7890      S2:  x0 = &base
7891      S3:  x1 = &base + 1
7892      S4:  x3 = &base + 3
7893 
7894      Vectorized loads are created in the order of memory accesses
7895      starting from the access of the first stmt of the chain:
7896 
7897      VS1: vx0 = &base
7898      VS2: vx1 = &base + vec_size*1
7899      VS3: vx3 = &base + vec_size*2
7900      VS4: vx4 = &base + vec_size*3
7901 
7902      Then permutation statements are generated:
7903 
7904      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7905      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7906        ...
7907 
7908      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7909      (the order of the data-refs in the output of vect_permute_load_chain
7910      corresponds to the order of scalar stmts in the interleaving chain - see
7911      the documentation of vect_permute_load_chain()).
7912      The generation of permutation stmts and recording them in
7913      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7914 
7915      In case of both multiple types and interleaving, the vector loads and
7916      permutation stmts above are created for every copy.  The result vector
7917      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7918      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
7919 
7920   /* If the data reference is aligned (dr_aligned) or potentially unaligned
7921      on a target that supports unaligned accesses (dr_unaligned_supported)
7922      we generate the following code:
7923          p = initial_addr;
7924          indx = 0;
7925          loop {
7926 	   p = p + indx * vectype_size;
7927            vec_dest = *(p);
7928            indx = indx + 1;
7929          }
7930 
7931      Otherwise, the data reference is potentially unaligned on a target that
7932      does not support unaligned accesses (dr_explicit_realign_optimized) -
7933      then generate the following code, in which the data in each iteration is
7934      obtained by two vector loads, one from the previous iteration, and one
7935      from the current iteration:
7936          p1 = initial_addr;
7937          msq_init = *(floor(p1))
7938          p2 = initial_addr + VS - 1;
7939          realignment_token = call target_builtin;
7940          indx = 0;
7941          loop {
7942            p2 = p2 + indx * vectype_size
7943            lsq = *(floor(p2))
7944            vec_dest = realign_load (msq, lsq, realignment_token)
7945            indx = indx + 1;
7946            msq = lsq;
7947          }   */
7948 
7949   /* If the misalignment remains the same throughout the execution of the
7950      loop, we can create the init_addr and permutation mask at the loop
7951      preheader.  Otherwise, it needs to be created inside the loop.
7952      This can only occur when vectorizing memory accesses in the inner-loop
7953      nested within an outer-loop that is being vectorized.  */
7954 
7955   if (nested_in_vect_loop
7956       && !multiple_p (DR_STEP_ALIGNMENT (dr),
7957 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
7958     {
7959       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7960       compute_in_loop = true;
7961     }
7962 
7963   if ((alignment_support_scheme == dr_explicit_realign_optimized
7964        || alignment_support_scheme == dr_explicit_realign)
7965       && !compute_in_loop)
7966     {
7967       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7968 				    alignment_support_scheme, NULL_TREE,
7969 				    &at_loop);
7970       if (alignment_support_scheme == dr_explicit_realign_optimized)
7971 	{
7972 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7973 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7974 				    size_one_node);
7975 	}
7976     }
7977   else
7978     at_loop = loop;
7979 
7980   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7981     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7982 
7983   tree bump;
7984   tree vec_offset = NULL_TREE;
7985   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7986     {
7987       aggr_type = NULL_TREE;
7988       bump = NULL_TREE;
7989     }
7990   else if (memory_access_type == VMAT_GATHER_SCATTER)
7991     {
7992       aggr_type = elem_type;
7993       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7994 				       &bump, &vec_offset);
7995     }
7996   else
7997     {
7998       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7999 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8000       else
8001 	aggr_type = vectype;
8002       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8003     }
8004 
8005   tree vec_mask = NULL_TREE;
8006   prev_stmt_info = NULL;
8007   poly_uint64 group_elt = 0;
8008   for (j = 0; j < ncopies; j++)
8009     {
8010       /* 1. Create the vector or array pointer update chain.  */
8011       if (j == 0)
8012 	{
8013 	  bool simd_lane_access_p
8014 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8015 	  if (simd_lane_access_p
8016 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8017 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8018 	      && integer_zerop (DR_OFFSET (first_dr))
8019 	      && integer_zerop (DR_INIT (first_dr))
8020 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8021 					get_alias_set (TREE_TYPE (ref_type)))
8022 	      && (alignment_support_scheme == dr_aligned
8023 		  || alignment_support_scheme == dr_unaligned_supported))
8024 	    {
8025 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8026 	      dataref_offset = build_int_cst (ref_type, 0);
8027 	      inv_p = false;
8028 	    }
8029 	  else if (first_stmt_for_drptr
8030 		   && first_stmt != first_stmt_for_drptr)
8031 	    {
8032 	      dataref_ptr
8033 		= vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8034 					    at_loop, offset, &dummy, gsi,
8035 					    &ptr_incr, simd_lane_access_p,
8036 					    &inv_p, byte_offset, bump);
8037 	      /* Adjust the pointer by the difference to first_stmt.  */
8038 	      data_reference_p ptrdr
8039 		= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8040 	      tree diff = fold_convert (sizetype,
8041 					size_binop (MINUS_EXPR,
8042 						    DR_INIT (first_dr),
8043 						    DR_INIT (ptrdr)));
8044 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8045 					     stmt, diff);
8046 	    }
8047 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8048 	    {
8049 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8050 					   &dataref_ptr, &vec_offset);
8051 	      inv_p = false;
8052 	    }
8053 	  else
8054 	    dataref_ptr
8055 	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8056 					  offset, &dummy, gsi, &ptr_incr,
8057 					  simd_lane_access_p, &inv_p,
8058 					  byte_offset, bump);
8059 	  if (mask)
8060 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8061 						     mask_vectype);
8062 	}
8063       else
8064 	{
8065 	  if (dataref_offset)
8066 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8067 					      bump);
8068 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8069 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8070 							 vec_offset);
8071 	  else
8072 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8073 					   stmt, bump);
8074 	  if (mask)
8075 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8076 	}
8077 
8078       if (grouped_load || slp_perm)
8079 	dr_chain.create (vec_num);
8080 
8081       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8082 	{
8083 	  tree vec_array;
8084 
8085 	  vec_array = create_vector_array (vectype, vec_num);
8086 
8087 	  tree final_mask = NULL_TREE;
8088 	  if (loop_masks)
8089 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8090 					     vectype, j);
8091 	  if (vec_mask)
8092 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8093 						  vec_mask, gsi);
8094 
8095 	  gcall *call;
8096 	  if (final_mask)
8097 	    {
8098 	      /* Emit:
8099 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8100 		                                VEC_MASK).  */
8101 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8102 	      tree alias_ptr = build_int_cst (ref_type, align);
8103 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8104 						 dataref_ptr, alias_ptr,
8105 						 final_mask);
8106 	    }
8107 	  else
8108 	    {
8109 	      /* Emit:
8110 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8111 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8112 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8113 	    }
8114 	  gimple_call_set_lhs (call, vec_array);
8115 	  gimple_call_set_nothrow (call, true);
8116 	  new_stmt = call;
8117 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8118 
8119 	  /* Extract each vector into an SSA_NAME.  */
8120 	  for (i = 0; i < vec_num; i++)
8121 	    {
8122 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
8123 					    vec_array, i);
8124 	      dr_chain.quick_push (new_temp);
8125 	    }
8126 
8127 	  /* Record the mapping between SSA_NAMEs and statements.  */
8128 	  vect_record_grouped_load_vectors (stmt, dr_chain);
8129 	}
8130       else
8131 	{
8132 	  for (i = 0; i < vec_num; i++)
8133 	    {
8134 	      tree final_mask = NULL_TREE;
8135 	      if (loop_masks
8136 		  && memory_access_type != VMAT_INVARIANT)
8137 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8138 						 vec_num * ncopies,
8139 						 vectype, vec_num * j + i);
8140 	      if (vec_mask)
8141 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8142 						      vec_mask, gsi);
8143 
8144 	      if (i > 0)
8145 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8146 					       stmt, bump);
8147 
8148 	      /* 2. Create the vector-load in the loop.  */
8149 	      switch (alignment_support_scheme)
8150 		{
8151 		case dr_aligned:
8152 		case dr_unaligned_supported:
8153 		  {
8154 		    unsigned int align, misalign;
8155 
8156 		    if (memory_access_type == VMAT_GATHER_SCATTER)
8157 		      {
8158 			tree scale = size_int (gs_info.scale);
8159 			gcall *call;
8160 			if (loop_masks)
8161 			  call = gimple_build_call_internal
8162 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8163 			     vec_offset, scale, final_mask);
8164 			else
8165 			  call = gimple_build_call_internal
8166 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
8167 			     vec_offset, scale);
8168 			gimple_call_set_nothrow (call, true);
8169 			new_stmt = call;
8170 			data_ref = NULL_TREE;
8171 			break;
8172 		      }
8173 
8174 		    align = DR_TARGET_ALIGNMENT (dr);
8175 		    if (alignment_support_scheme == dr_aligned)
8176 		      {
8177 			gcc_assert (aligned_access_p (first_dr));
8178 			misalign = 0;
8179 		      }
8180 		    else if (DR_MISALIGNMENT (first_dr) == -1)
8181 		      {
8182 			align = dr_alignment (vect_dr_behavior (first_dr));
8183 			misalign = 0;
8184 		      }
8185 		    else
8186 		      misalign = DR_MISALIGNMENT (first_dr);
8187 		    if (dataref_offset == NULL_TREE
8188 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
8189 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8190 					      align, misalign);
8191 
8192 		    if (final_mask)
8193 		      {
8194 			align = least_bit_hwi (misalign | align);
8195 			tree ptr = build_int_cst (ref_type, align);
8196 			gcall *call
8197 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8198 							dataref_ptr, ptr,
8199 							final_mask);
8200 			gimple_call_set_nothrow (call, true);
8201 			new_stmt = call;
8202 			data_ref = NULL_TREE;
8203 		      }
8204 		    else
8205 		      {
8206 			data_ref
8207 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
8208 					 dataref_offset
8209 					 ? dataref_offset
8210 					 : build_int_cst (ref_type, 0));
8211 			if (alignment_support_scheme == dr_aligned)
8212 			  ;
8213 			else if (DR_MISALIGNMENT (first_dr) == -1)
8214 			  TREE_TYPE (data_ref)
8215 			    = build_aligned_type (TREE_TYPE (data_ref),
8216 						  align * BITS_PER_UNIT);
8217 			else
8218 			  TREE_TYPE (data_ref)
8219 			    = build_aligned_type (TREE_TYPE (data_ref),
8220 						  TYPE_ALIGN (elem_type));
8221 		      }
8222 		    break;
8223 		  }
8224 		case dr_explicit_realign:
8225 		  {
8226 		    tree ptr, bump;
8227 
8228 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8229 
8230 		    if (compute_in_loop)
8231 		      msq = vect_setup_realignment (first_stmt, gsi,
8232 						    &realignment_token,
8233 						    dr_explicit_realign,
8234 						    dataref_ptr, NULL);
8235 
8236 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8237 		      ptr = copy_ssa_name (dataref_ptr);
8238 		    else
8239 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8240 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8241 		    new_stmt = gimple_build_assign
8242 				 (ptr, BIT_AND_EXPR, dataref_ptr,
8243 				  build_int_cst
8244 				  (TREE_TYPE (dataref_ptr),
8245 				   -(HOST_WIDE_INT) align));
8246 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8247 		    data_ref
8248 		      = build2 (MEM_REF, vectype, ptr,
8249 				build_int_cst (ref_type, 0));
8250 		    vect_copy_ref_info (data_ref, DR_REF (first_dr));
8251 		    vec_dest = vect_create_destination_var (scalar_dest,
8252 							    vectype);
8253 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
8254 		    new_temp = make_ssa_name (vec_dest, new_stmt);
8255 		    gimple_assign_set_lhs (new_stmt, new_temp);
8256 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8257 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8258 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8259 		    msq = new_temp;
8260 
8261 		    bump = size_binop (MULT_EXPR, vs,
8262 				       TYPE_SIZE_UNIT (elem_type));
8263 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
8264 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8265 		    new_stmt = gimple_build_assign
8266 				 (NULL_TREE, BIT_AND_EXPR, ptr,
8267 				  build_int_cst
8268 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8269 		    ptr = copy_ssa_name (ptr, new_stmt);
8270 		    gimple_assign_set_lhs (new_stmt, ptr);
8271 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8272 		    data_ref
8273 		      = build2 (MEM_REF, vectype, ptr,
8274 				build_int_cst (ref_type, 0));
8275 		    break;
8276 		  }
8277 		case dr_explicit_realign_optimized:
8278 		  {
8279 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8280 		      new_temp = copy_ssa_name (dataref_ptr);
8281 		    else
8282 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8283 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8284 		    new_stmt = gimple_build_assign
8285 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
8286 		       build_int_cst (TREE_TYPE (dataref_ptr),
8287 				     -(HOST_WIDE_INT) align));
8288 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8289 		    data_ref
8290 		      = build2 (MEM_REF, vectype, new_temp,
8291 				build_int_cst (ref_type, 0));
8292 		    break;
8293 		  }
8294 		default:
8295 		  gcc_unreachable ();
8296 		}
8297 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
8298 	      /* DATA_REF is null if we've already built the statement.  */
8299 	      if (data_ref)
8300 		{
8301 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
8302 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
8303 		}
8304 	      new_temp = make_ssa_name (vec_dest, new_stmt);
8305 	      gimple_set_lhs (new_stmt, new_temp);
8306 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8307 
8308 	      /* 3. Handle explicit realignment if necessary/supported.
8309 		 Create in loop:
8310 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
8311 	      if (alignment_support_scheme == dr_explicit_realign_optimized
8312 		  || alignment_support_scheme == dr_explicit_realign)
8313 		{
8314 		  lsq = gimple_assign_lhs (new_stmt);
8315 		  if (!realignment_token)
8316 		    realignment_token = dataref_ptr;
8317 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
8318 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8319 						  msq, lsq, realignment_token);
8320 		  new_temp = make_ssa_name (vec_dest, new_stmt);
8321 		  gimple_assign_set_lhs (new_stmt, new_temp);
8322 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8323 
8324 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
8325 		    {
8326 		      gcc_assert (phi);
8327 		      if (i == vec_num - 1 && j == ncopies - 1)
8328 			add_phi_arg (phi, lsq,
8329 				     loop_latch_edge (containing_loop),
8330 				     UNKNOWN_LOCATION);
8331 		      msq = lsq;
8332 		    }
8333 		}
8334 
8335 	      /* 4. Handle invariant-load.  */
8336 	      if (inv_p && !bb_vinfo)
8337 		{
8338 		  gcc_assert (!grouped_load);
8339 		  /* If we have versioned for aliasing or the loop doesn't
8340 		     have any data dependencies that would preclude this,
8341 		     then we are sure this is a loop invariant load and
8342 		     thus we can insert it on the preheader edge.  */
8343 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8344 		      && !nested_in_vect_loop
8345 		      && hoist_defs_of_uses (stmt, loop))
8346 		    {
8347 		      if (dump_enabled_p ())
8348 			{
8349 			  dump_printf_loc (MSG_NOTE, vect_location,
8350 					   "hoisting out of the vectorized "
8351 					   "loop: ");
8352 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8353 			}
8354 		      tree tem = copy_ssa_name (scalar_dest);
8355 		      gsi_insert_on_edge_immediate
8356 			(loop_preheader_edge (loop),
8357 			 gimple_build_assign (tem,
8358 					      unshare_expr
8359 					        (gimple_assign_rhs1 (stmt))));
8360 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8361 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8362 		      set_vinfo_for_stmt (new_stmt,
8363 					  new_stmt_vec_info (new_stmt, vinfo));
8364 		    }
8365 		  else
8366 		    {
8367 		      gimple_stmt_iterator gsi2 = *gsi;
8368 		      gsi_next (&gsi2);
8369 		      new_temp = vect_init_vector (stmt, scalar_dest,
8370 						   vectype, &gsi2);
8371 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8372 		    }
8373 		}
8374 
8375 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8376 		{
8377 		  tree perm_mask = perm_mask_for_reverse (vectype);
8378 		  new_temp = permute_vec_elements (new_temp, new_temp,
8379 						   perm_mask, stmt, gsi);
8380 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
8381 		}
8382 
8383 	      /* Collect vector loads and later create their permutation in
8384 		 vect_transform_grouped_load ().  */
8385 	      if (grouped_load || slp_perm)
8386 		dr_chain.quick_push (new_temp);
8387 
8388 	      /* Store vector loads in the corresponding SLP_NODE.  */
8389 	      if (slp && !slp_perm)
8390 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8391 
8392 	      /* With SLP permutation we load the gaps as well, without
8393 	         we need to skip the gaps after we manage to fully load
8394 		 all elements.  group_gap_adj is GROUP_SIZE here.  */
8395 	      group_elt += nunits;
8396 	      if (maybe_ne (group_gap_adj, 0U)
8397 		  && !slp_perm
8398 		  && known_eq (group_elt, group_size - group_gap_adj))
8399 		{
8400 		  poly_wide_int bump_val
8401 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8402 		       * group_gap_adj);
8403 		  tree bump = wide_int_to_tree (sizetype, bump_val);
8404 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8405 						 stmt, bump);
8406 		  group_elt = 0;
8407 		}
8408 	    }
8409 	  /* Bump the vector pointer to account for a gap or for excess
8410 	     elements loaded for a permuted SLP load.  */
8411 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8412 	    {
8413 	      poly_wide_int bump_val
8414 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8415 		   * group_gap_adj);
8416 	      tree bump = wide_int_to_tree (sizetype, bump_val);
8417 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8418 					     stmt, bump);
8419 	    }
8420 	}
8421 
8422       if (slp && !slp_perm)
8423 	continue;
8424 
8425       if (slp_perm)
8426         {
8427 	  unsigned n_perms;
8428           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8429                                              slp_node_instance, false,
8430 					     &n_perms))
8431             {
8432               dr_chain.release ();
8433               return false;
8434             }
8435         }
8436       else
8437         {
8438           if (grouped_load)
8439   	    {
8440 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
8441 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8442 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8443 	    }
8444           else
8445 	    {
8446 	      if (j == 0)
8447 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8448 	      else
8449 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8450 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
8451 	    }
8452         }
8453       dr_chain.release ();
8454     }
8455 
8456   return true;
8457 }
8458 
8459 /* Function vect_is_simple_cond.
8460 
8461    Input:
8462    LOOP - the loop that is being vectorized.
8463    COND - Condition that is checked for simple use.
8464 
8465    Output:
8466    *COMP_VECTYPE - the vector type for the comparison.
8467    *DTS - The def types for the arguments of the comparison
8468 
8469    Returns whether a COND can be vectorized.  Checks whether
8470    condition operands are supportable using vec_is_simple_use.  */
8471 
8472 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)8473 vect_is_simple_cond (tree cond, vec_info *vinfo,
8474 		     tree *comp_vectype, enum vect_def_type *dts,
8475 		     tree vectype)
8476 {
8477   tree lhs, rhs;
8478   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8479 
8480   /* Mask case.  */
8481   if (TREE_CODE (cond) == SSA_NAME
8482       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8483     {
8484       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8485       if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8486 			       &dts[0], comp_vectype)
8487 	  || !*comp_vectype
8488 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8489 	return false;
8490       return true;
8491     }
8492 
8493   if (!COMPARISON_CLASS_P (cond))
8494     return false;
8495 
8496   lhs = TREE_OPERAND (cond, 0);
8497   rhs = TREE_OPERAND (cond, 1);
8498 
8499   if (TREE_CODE (lhs) == SSA_NAME)
8500     {
8501       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8502       if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8503 	return false;
8504     }
8505   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8506 	   || TREE_CODE (lhs) == FIXED_CST)
8507     dts[0] = vect_constant_def;
8508   else
8509     return false;
8510 
8511   if (TREE_CODE (rhs) == SSA_NAME)
8512     {
8513       gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8514       if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8515 	return false;
8516     }
8517   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8518 	   || TREE_CODE (rhs) == FIXED_CST)
8519     dts[1] = vect_constant_def;
8520   else
8521     return false;
8522 
8523   if (vectype1 && vectype2
8524       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8525 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8526     return false;
8527 
8528   *comp_vectype = vectype1 ? vectype1 : vectype2;
8529   /* Invariant comparison.  */
8530   if (! *comp_vectype && vectype)
8531     {
8532       tree scalar_type = TREE_TYPE (lhs);
8533       /* If we can widen the comparison to match vectype do so.  */
8534       if (INTEGRAL_TYPE_P (scalar_type)
8535 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8536 			      TYPE_SIZE (TREE_TYPE (vectype))))
8537 	scalar_type = build_nonstandard_integer_type
8538 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8539 	   TYPE_UNSIGNED (scalar_type));
8540       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8541     }
8542 
8543   return true;
8544 }
8545 
8546 /* vectorizable_condition.
8547 
8548    Check if STMT is conditional modify expression that can be vectorized.
8549    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8550    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8551    at GSI.
8552 
8553    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8554    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8555    else clause if it is 2).
8556 
8557    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8558 
8559 bool
vectorizable_condition(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)8560 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8561 			gimple **vec_stmt, tree reduc_def, int reduc_index,
8562 			slp_tree slp_node)
8563 {
8564   tree scalar_dest = NULL_TREE;
8565   tree vec_dest = NULL_TREE;
8566   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8567   tree then_clause, else_clause;
8568   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8569   tree comp_vectype = NULL_TREE;
8570   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8571   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8572   tree vec_compare;
8573   tree new_temp;
8574   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8575   enum vect_def_type dts[4]
8576     = {vect_unknown_def_type, vect_unknown_def_type,
8577        vect_unknown_def_type, vect_unknown_def_type};
8578   int ndts = 4;
8579   int ncopies;
8580   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8581   stmt_vec_info prev_stmt_info = NULL;
8582   int i, j;
8583   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8584   vec<tree> vec_oprnds0 = vNULL;
8585   vec<tree> vec_oprnds1 = vNULL;
8586   vec<tree> vec_oprnds2 = vNULL;
8587   vec<tree> vec_oprnds3 = vNULL;
8588   tree vec_cmp_type;
8589   bool masked = false;
8590 
8591   if (reduc_index && STMT_SLP_TYPE (stmt_info))
8592     return false;
8593 
8594   vect_reduction_type reduction_type
8595     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8596   if (reduction_type == TREE_CODE_REDUCTION)
8597     {
8598       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8599 	return false;
8600 
8601       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8602 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8603 	       && reduc_def))
8604 	return false;
8605 
8606       /* FORNOW: not yet supported.  */
8607       if (STMT_VINFO_LIVE_P (stmt_info))
8608 	{
8609 	  if (dump_enabled_p ())
8610 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8611 			     "value used after loop.\n");
8612 	  return false;
8613 	}
8614     }
8615 
8616   /* Is vectorizable conditional operation?  */
8617   if (!is_gimple_assign (stmt))
8618     return false;
8619 
8620   code = gimple_assign_rhs_code (stmt);
8621 
8622   if (code != COND_EXPR)
8623     return false;
8624 
8625   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8626   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8627 
8628   if (slp_node)
8629     ncopies = 1;
8630   else
8631     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8632 
8633   gcc_assert (ncopies >= 1);
8634   if (reduc_index && ncopies > 1)
8635     return false; /* FORNOW */
8636 
8637   cond_expr = gimple_assign_rhs1 (stmt);
8638   then_clause = gimple_assign_rhs2 (stmt);
8639   else_clause = gimple_assign_rhs3 (stmt);
8640 
8641   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8642 			    &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8643       || !comp_vectype)
8644     return false;
8645 
8646   gimple *def_stmt;
8647   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8648 			   &vectype1))
8649     return false;
8650   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8651 			   &vectype2))
8652     return false;
8653 
8654   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8655     return false;
8656 
8657   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8658     return false;
8659 
8660   masked = !COMPARISON_CLASS_P (cond_expr);
8661   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8662 
8663   if (vec_cmp_type == NULL_TREE)
8664     return false;
8665 
8666   cond_code = TREE_CODE (cond_expr);
8667   if (!masked)
8668     {
8669       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8670       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8671     }
8672 
8673   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8674     {
8675       /* Boolean values may have another representation in vectors
8676 	 and therefore we prefer bit operations over comparison for
8677 	 them (which also works for scalar masks).  We store opcodes
8678 	 to use in bitop1 and bitop2.  Statement is vectorized as
8679 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8680 	 depending on bitop1 and bitop2 arity.  */
8681       switch (cond_code)
8682 	{
8683 	case GT_EXPR:
8684 	  bitop1 = BIT_NOT_EXPR;
8685 	  bitop2 = BIT_AND_EXPR;
8686 	  break;
8687 	case GE_EXPR:
8688 	  bitop1 = BIT_NOT_EXPR;
8689 	  bitop2 = BIT_IOR_EXPR;
8690 	  break;
8691 	case LT_EXPR:
8692 	  bitop1 = BIT_NOT_EXPR;
8693 	  bitop2 = BIT_AND_EXPR;
8694 	  std::swap (cond_expr0, cond_expr1);
8695 	  break;
8696 	case LE_EXPR:
8697 	  bitop1 = BIT_NOT_EXPR;
8698 	  bitop2 = BIT_IOR_EXPR;
8699 	  std::swap (cond_expr0, cond_expr1);
8700 	  break;
8701 	case NE_EXPR:
8702 	  bitop1 = BIT_XOR_EXPR;
8703 	  break;
8704 	case EQ_EXPR:
8705 	  bitop1 = BIT_XOR_EXPR;
8706 	  bitop2 = BIT_NOT_EXPR;
8707 	  break;
8708 	default:
8709 	  return false;
8710 	}
8711       cond_code = SSA_NAME;
8712     }
8713 
8714   if (!vec_stmt)
8715     {
8716       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8717       if (bitop1 != NOP_EXPR)
8718 	{
8719 	  machine_mode mode = TYPE_MODE (comp_vectype);
8720 	  optab optab;
8721 
8722 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8723 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8724 	    return false;
8725 
8726 	  if (bitop2 != NOP_EXPR)
8727 	    {
8728 	      optab = optab_for_tree_code (bitop2, comp_vectype,
8729 					   optab_default);
8730 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8731 		return false;
8732 	    }
8733 	}
8734       if (expand_vec_cond_expr_p (vectype, comp_vectype,
8735 				     cond_code))
8736 	{
8737 	  if (!slp_node)
8738 	    vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8739 	  return true;
8740 	}
8741       return false;
8742     }
8743 
8744   /* Transform.  */
8745 
8746   if (!slp_node)
8747     {
8748       vec_oprnds0.create (1);
8749       vec_oprnds1.create (1);
8750       vec_oprnds2.create (1);
8751       vec_oprnds3.create (1);
8752     }
8753 
8754   /* Handle def.  */
8755   scalar_dest = gimple_assign_lhs (stmt);
8756   if (reduction_type != EXTRACT_LAST_REDUCTION)
8757     vec_dest = vect_create_destination_var (scalar_dest, vectype);
8758 
8759   /* Handle cond expr.  */
8760   for (j = 0; j < ncopies; j++)
8761     {
8762       gimple *new_stmt = NULL;
8763       if (j == 0)
8764 	{
8765           if (slp_node)
8766             {
8767               auto_vec<tree, 4> ops;
8768 	      auto_vec<vec<tree>, 4> vec_defs;
8769 
8770 	      if (masked)
8771 		ops.safe_push (cond_expr);
8772 	      else
8773 		{
8774 		  ops.safe_push (cond_expr0);
8775 		  ops.safe_push (cond_expr1);
8776 		}
8777               ops.safe_push (then_clause);
8778               ops.safe_push (else_clause);
8779               vect_get_slp_defs (ops, slp_node, &vec_defs);
8780 	      vec_oprnds3 = vec_defs.pop ();
8781 	      vec_oprnds2 = vec_defs.pop ();
8782 	      if (!masked)
8783 		vec_oprnds1 = vec_defs.pop ();
8784 	      vec_oprnds0 = vec_defs.pop ();
8785             }
8786           else
8787             {
8788 	      gimple *gtemp;
8789 	      if (masked)
8790 		{
8791 		  vec_cond_lhs
8792 		    = vect_get_vec_def_for_operand (cond_expr, stmt,
8793 						    comp_vectype);
8794 		  vect_is_simple_use (cond_expr, stmt_info->vinfo,
8795 				      &gtemp, &dts[0]);
8796 		}
8797 	      else
8798 		{
8799 		  vec_cond_lhs
8800 		    = vect_get_vec_def_for_operand (cond_expr0,
8801 						    stmt, comp_vectype);
8802 		  vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8803 
8804 		  vec_cond_rhs
8805 		    = vect_get_vec_def_for_operand (cond_expr1,
8806 						    stmt, comp_vectype);
8807 		  vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8808 		}
8809 	      if (reduc_index == 1)
8810 		vec_then_clause = reduc_def;
8811 	      else
8812 		{
8813 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8814 								  stmt);
8815 	          vect_is_simple_use (then_clause, loop_vinfo,
8816 				      &gtemp, &dts[2]);
8817 		}
8818 	      if (reduc_index == 2)
8819 		vec_else_clause = reduc_def;
8820 	      else
8821 		{
8822 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8823 								  stmt);
8824 		  vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8825 		}
8826 	    }
8827 	}
8828       else
8829 	{
8830 	  vec_cond_lhs
8831 	    = vect_get_vec_def_for_stmt_copy (dts[0],
8832 					      vec_oprnds0.pop ());
8833 	  if (!masked)
8834 	    vec_cond_rhs
8835 	      = vect_get_vec_def_for_stmt_copy (dts[1],
8836 						vec_oprnds1.pop ());
8837 
8838 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8839 							    vec_oprnds2.pop ());
8840 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8841 							    vec_oprnds3.pop ());
8842 	}
8843 
8844       if (!slp_node)
8845         {
8846 	  vec_oprnds0.quick_push (vec_cond_lhs);
8847 	  if (!masked)
8848 	    vec_oprnds1.quick_push (vec_cond_rhs);
8849 	  vec_oprnds2.quick_push (vec_then_clause);
8850 	  vec_oprnds3.quick_push (vec_else_clause);
8851 	}
8852 
8853       /* Arguments are ready.  Create the new vector stmt.  */
8854       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8855         {
8856           vec_then_clause = vec_oprnds2[i];
8857           vec_else_clause = vec_oprnds3[i];
8858 
8859 	  if (masked)
8860 	    vec_compare = vec_cond_lhs;
8861 	  else
8862 	    {
8863 	      vec_cond_rhs = vec_oprnds1[i];
8864 	      if (bitop1 == NOP_EXPR)
8865 		vec_compare = build2 (cond_code, vec_cmp_type,
8866 				      vec_cond_lhs, vec_cond_rhs);
8867 	      else
8868 		{
8869 		  new_temp = make_ssa_name (vec_cmp_type);
8870 		  if (bitop1 == BIT_NOT_EXPR)
8871 		    new_stmt = gimple_build_assign (new_temp, bitop1,
8872 						    vec_cond_rhs);
8873 		  else
8874 		    new_stmt
8875 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8876 					     vec_cond_rhs);
8877 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8878 		  if (bitop2 == NOP_EXPR)
8879 		    vec_compare = new_temp;
8880 		  else if (bitop2 == BIT_NOT_EXPR)
8881 		    {
8882 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
8883 		      vec_compare = new_temp;
8884 		      std::swap (vec_then_clause, vec_else_clause);
8885 		    }
8886 		  else
8887 		    {
8888 		      vec_compare = make_ssa_name (vec_cmp_type);
8889 		      new_stmt
8890 			= gimple_build_assign (vec_compare, bitop2,
8891 					       vec_cond_lhs, new_temp);
8892 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8893 		    }
8894 		}
8895 	    }
8896 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
8897 	    {
8898 	      if (!is_gimple_val (vec_compare))
8899 		{
8900 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
8901 		  new_stmt = gimple_build_assign (vec_compare_name,
8902 						  vec_compare);
8903 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8904 		  vec_compare = vec_compare_name;
8905 		}
8906 	      gcc_assert (reduc_index == 2);
8907 	      new_stmt = gimple_build_call_internal
8908 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8909 		 vec_then_clause);
8910 	      gimple_call_set_lhs (new_stmt, scalar_dest);
8911 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8912 	      if (stmt == gsi_stmt (*gsi))
8913 		vect_finish_replace_stmt (stmt, new_stmt);
8914 	      else
8915 		{
8916 		  /* In this case we're moving the definition to later in the
8917 		     block.  That doesn't matter because the only uses of the
8918 		     lhs are in phi statements.  */
8919 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8920 		  gsi_remove (&old_gsi, true);
8921 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8922 		}
8923 	    }
8924 	  else
8925 	    {
8926 	      new_temp = make_ssa_name (vec_dest);
8927 	      new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8928 					      vec_compare, vec_then_clause,
8929 					      vec_else_clause);
8930 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8931 	    }
8932           if (slp_node)
8933             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8934         }
8935 
8936         if (slp_node)
8937           continue;
8938 
8939         if (j == 0)
8940           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8941         else
8942           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8943 
8944         prev_stmt_info = vinfo_for_stmt (new_stmt);
8945     }
8946 
8947   vec_oprnds0.release ();
8948   vec_oprnds1.release ();
8949   vec_oprnds2.release ();
8950   vec_oprnds3.release ();
8951 
8952   return true;
8953 }
8954 
8955 /* vectorizable_comparison.
8956 
8957    Check if STMT is comparison expression that can be vectorized.
8958    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8959    comparison, put it in VEC_STMT, and insert it at GSI.
8960 
8961    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8962 
8963 static bool
vectorizable_comparison(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,slp_tree slp_node)8964 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8965 			 gimple **vec_stmt, tree reduc_def,
8966 			 slp_tree slp_node)
8967 {
8968   tree lhs, rhs1, rhs2;
8969   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8970   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8971   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8972   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8973   tree new_temp;
8974   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8975   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8976   int ndts = 2;
8977   poly_uint64 nunits;
8978   int ncopies;
8979   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8980   stmt_vec_info prev_stmt_info = NULL;
8981   int i, j;
8982   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8983   vec<tree> vec_oprnds0 = vNULL;
8984   vec<tree> vec_oprnds1 = vNULL;
8985   gimple *def_stmt;
8986   tree mask_type;
8987   tree mask;
8988 
8989   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8990     return false;
8991 
8992   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8993     return false;
8994 
8995   mask_type = vectype;
8996   nunits = TYPE_VECTOR_SUBPARTS (vectype);
8997 
8998   if (slp_node)
8999     ncopies = 1;
9000   else
9001     ncopies = vect_get_num_copies (loop_vinfo, vectype);
9002 
9003   gcc_assert (ncopies >= 1);
9004   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9005       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9006 	   && reduc_def))
9007     return false;
9008 
9009   if (STMT_VINFO_LIVE_P (stmt_info))
9010     {
9011       if (dump_enabled_p ())
9012 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9013 			 "value used after loop.\n");
9014       return false;
9015     }
9016 
9017   if (!is_gimple_assign (stmt))
9018     return false;
9019 
9020   code = gimple_assign_rhs_code (stmt);
9021 
9022   if (TREE_CODE_CLASS (code) != tcc_comparison)
9023     return false;
9024 
9025   rhs1 = gimple_assign_rhs1 (stmt);
9026   rhs2 = gimple_assign_rhs2 (stmt);
9027 
9028   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9029 			   &dts[0], &vectype1))
9030     return false;
9031 
9032   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9033 			   &dts[1], &vectype2))
9034     return false;
9035 
9036   if (vectype1 && vectype2
9037       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9038 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9039     return false;
9040 
9041   vectype = vectype1 ? vectype1 : vectype2;
9042 
9043   /* Invariant comparison.  */
9044   if (!vectype)
9045     {
9046       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9047       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9048 	return false;
9049     }
9050   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9051     return false;
9052 
9053   /* Can't compare mask and non-mask types.  */
9054   if (vectype1 && vectype2
9055       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9056     return false;
9057 
9058   /* Boolean values may have another representation in vectors
9059      and therefore we prefer bit operations over comparison for
9060      them (which also works for scalar masks).  We store opcodes
9061      to use in bitop1 and bitop2.  Statement is vectorized as
9062        BITOP2 (rhs1 BITOP1 rhs2) or
9063        rhs1 BITOP2 (BITOP1 rhs2)
9064      depending on bitop1 and bitop2 arity.  */
9065   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9066     {
9067       if (code == GT_EXPR)
9068 	{
9069 	  bitop1 = BIT_NOT_EXPR;
9070 	  bitop2 = BIT_AND_EXPR;
9071 	}
9072       else if (code == GE_EXPR)
9073 	{
9074 	  bitop1 = BIT_NOT_EXPR;
9075 	  bitop2 = BIT_IOR_EXPR;
9076 	}
9077       else if (code == LT_EXPR)
9078 	{
9079 	  bitop1 = BIT_NOT_EXPR;
9080 	  bitop2 = BIT_AND_EXPR;
9081 	  std::swap (rhs1, rhs2);
9082 	  std::swap (dts[0], dts[1]);
9083 	}
9084       else if (code == LE_EXPR)
9085 	{
9086 	  bitop1 = BIT_NOT_EXPR;
9087 	  bitop2 = BIT_IOR_EXPR;
9088 	  std::swap (rhs1, rhs2);
9089 	  std::swap (dts[0], dts[1]);
9090 	}
9091       else
9092 	{
9093 	  bitop1 = BIT_XOR_EXPR;
9094 	  if (code == EQ_EXPR)
9095 	    bitop2 = BIT_NOT_EXPR;
9096 	}
9097     }
9098 
9099   if (!vec_stmt)
9100     {
9101       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9102       if (!slp_node)
9103 	vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9104 				dts, ndts, NULL, NULL);
9105       if (bitop1 == NOP_EXPR)
9106 	return expand_vec_cmp_expr_p (vectype, mask_type, code);
9107       else
9108 	{
9109 	  machine_mode mode = TYPE_MODE (vectype);
9110 	  optab optab;
9111 
9112 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
9113 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9114 	    return false;
9115 
9116 	  if (bitop2 != NOP_EXPR)
9117 	    {
9118 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
9119 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9120 		return false;
9121 	    }
9122 	  return true;
9123 	}
9124     }
9125 
9126   /* Transform.  */
9127   if (!slp_node)
9128     {
9129       vec_oprnds0.create (1);
9130       vec_oprnds1.create (1);
9131     }
9132 
9133   /* Handle def.  */
9134   lhs = gimple_assign_lhs (stmt);
9135   mask = vect_create_destination_var (lhs, mask_type);
9136 
9137   /* Handle cmp expr.  */
9138   for (j = 0; j < ncopies; j++)
9139     {
9140       gassign *new_stmt = NULL;
9141       if (j == 0)
9142 	{
9143 	  if (slp_node)
9144 	    {
9145 	      auto_vec<tree, 2> ops;
9146 	      auto_vec<vec<tree>, 2> vec_defs;
9147 
9148 	      ops.safe_push (rhs1);
9149 	      ops.safe_push (rhs2);
9150 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
9151 	      vec_oprnds1 = vec_defs.pop ();
9152 	      vec_oprnds0 = vec_defs.pop ();
9153 	    }
9154 	  else
9155 	    {
9156 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9157 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9158 	    }
9159 	}
9160       else
9161 	{
9162 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9163 						     vec_oprnds0.pop ());
9164 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9165 						     vec_oprnds1.pop ());
9166 	}
9167 
9168       if (!slp_node)
9169 	{
9170 	  vec_oprnds0.quick_push (vec_rhs1);
9171 	  vec_oprnds1.quick_push (vec_rhs2);
9172 	}
9173 
9174       /* Arguments are ready.  Create the new vector stmt.  */
9175       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9176 	{
9177 	  vec_rhs2 = vec_oprnds1[i];
9178 
9179 	  new_temp = make_ssa_name (mask);
9180 	  if (bitop1 == NOP_EXPR)
9181 	    {
9182 	      new_stmt = gimple_build_assign (new_temp, code,
9183 					      vec_rhs1, vec_rhs2);
9184 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9185 	    }
9186 	  else
9187 	    {
9188 	      if (bitop1 == BIT_NOT_EXPR)
9189 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9190 	      else
9191 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9192 						vec_rhs2);
9193 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9194 	      if (bitop2 != NOP_EXPR)
9195 		{
9196 		  tree res = make_ssa_name (mask);
9197 		  if (bitop2 == BIT_NOT_EXPR)
9198 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
9199 		  else
9200 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9201 						    new_temp);
9202 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
9203 		}
9204 	    }
9205 	  if (slp_node)
9206 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9207 	}
9208 
9209       if (slp_node)
9210 	continue;
9211 
9212       if (j == 0)
9213 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9214       else
9215 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9216 
9217       prev_stmt_info = vinfo_for_stmt (new_stmt);
9218     }
9219 
9220   vec_oprnds0.release ();
9221   vec_oprnds1.release ();
9222 
9223   return true;
9224 }
9225 
9226 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9227    can handle all live statements in the node.  Otherwise return true
9228    if STMT is not live or if vectorizable_live_operation can handle it.
9229    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9230 
9231 static bool
can_vectorize_live_stmts(gimple * stmt,gimple_stmt_iterator * gsi,slp_tree slp_node,gimple ** vec_stmt)9232 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9233 			  slp_tree slp_node, gimple **vec_stmt)
9234 {
9235   if (slp_node)
9236     {
9237       gimple *slp_stmt;
9238       unsigned int i;
9239       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9240 	{
9241 	  stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9242 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
9243 	      && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9244 					       vec_stmt))
9245 	    return false;
9246 	}
9247     }
9248   else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9249 	   && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9250     return false;
9251 
9252   return true;
9253 }
9254 
9255 /* Make sure the statement is vectorizable.  */
9256 
9257 bool
vect_analyze_stmt(gimple * stmt,bool * need_to_vectorize,slp_tree node,slp_instance node_instance)9258 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9259 		   slp_instance node_instance)
9260 {
9261   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9262   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9263   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9264   bool ok;
9265   gimple *pattern_stmt;
9266   gimple_seq pattern_def_seq;
9267 
9268   if (dump_enabled_p ())
9269     {
9270       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9271       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9272     }
9273 
9274   if (gimple_has_volatile_ops (stmt))
9275     {
9276       if (dump_enabled_p ())
9277         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9278                          "not vectorized: stmt has volatile operands\n");
9279 
9280       return false;
9281     }
9282 
9283   /* Skip stmts that do not need to be vectorized. In loops this is expected
9284      to include:
9285      - the COND_EXPR which is the loop exit condition
9286      - any LABEL_EXPRs in the loop
9287      - computations that are used only for array indexing or loop control.
9288      In basic blocks we only analyze statements that are a part of some SLP
9289      instance, therefore, all the statements are relevant.
9290 
9291      Pattern statement needs to be analyzed instead of the original statement
9292      if the original statement is not relevant.  Otherwise, we analyze both
9293      statements.  In basic blocks we are called from some SLP instance
9294      traversal, don't analyze pattern stmts instead, the pattern stmts
9295      already will be part of SLP instance.  */
9296 
9297   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9298   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9299       && !STMT_VINFO_LIVE_P (stmt_info))
9300     {
9301       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9302           && pattern_stmt
9303           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9304               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9305         {
9306           /* Analyze PATTERN_STMT instead of the original stmt.  */
9307           stmt = pattern_stmt;
9308           stmt_info = vinfo_for_stmt (pattern_stmt);
9309           if (dump_enabled_p ())
9310             {
9311               dump_printf_loc (MSG_NOTE, vect_location,
9312                                "==> examining pattern statement: ");
9313               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9314             }
9315         }
9316       else
9317         {
9318           if (dump_enabled_p ())
9319             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9320 
9321           return true;
9322         }
9323     }
9324   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9325 	   && node == NULL
9326            && pattern_stmt
9327            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9328                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9329     {
9330       /* Analyze PATTERN_STMT too.  */
9331       if (dump_enabled_p ())
9332         {
9333           dump_printf_loc (MSG_NOTE, vect_location,
9334                            "==> examining pattern statement: ");
9335           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9336         }
9337 
9338       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9339 			      node_instance))
9340         return false;
9341    }
9342 
9343   if (is_pattern_stmt_p (stmt_info)
9344       && node == NULL
9345       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9346     {
9347       gimple_stmt_iterator si;
9348 
9349       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9350 	{
9351 	  gimple *pattern_def_stmt = gsi_stmt (si);
9352 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9353 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9354 	    {
9355 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
9356 	      if (dump_enabled_p ())
9357 		{
9358 		  dump_printf_loc (MSG_NOTE, vect_location,
9359                                    "==> examining pattern def statement: ");
9360 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9361 		}
9362 
9363 	      if (!vect_analyze_stmt (pattern_def_stmt,
9364 				      need_to_vectorize, node, node_instance))
9365 		return false;
9366 	    }
9367 	}
9368     }
9369 
9370   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9371     {
9372       case vect_internal_def:
9373         break;
9374 
9375       case vect_reduction_def:
9376       case vect_nested_cycle:
9377          gcc_assert (!bb_vinfo
9378 		     && (relevance == vect_used_in_outer
9379 			 || relevance == vect_used_in_outer_by_reduction
9380 			 || relevance == vect_used_by_reduction
9381 			 || relevance == vect_unused_in_scope
9382 			 || relevance == vect_used_only_live));
9383          break;
9384 
9385       case vect_induction_def:
9386 	gcc_assert (!bb_vinfo);
9387 	break;
9388 
9389       case vect_constant_def:
9390       case vect_external_def:
9391       case vect_unknown_def_type:
9392       default:
9393         gcc_unreachable ();
9394     }
9395 
9396   if (STMT_VINFO_RELEVANT_P (stmt_info))
9397     {
9398       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9399       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9400 		  || (is_gimple_call (stmt)
9401 		      && gimple_call_lhs (stmt) == NULL_TREE));
9402       *need_to_vectorize = true;
9403     }
9404 
9405   if (PURE_SLP_STMT (stmt_info) && !node)
9406     {
9407       dump_printf_loc (MSG_NOTE, vect_location,
9408 		       "handled only by SLP analysis\n");
9409       return true;
9410     }
9411 
9412   ok = true;
9413   if (!bb_vinfo
9414       && (STMT_VINFO_RELEVANT_P (stmt_info)
9415 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9416     ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9417 	  || vectorizable_conversion (stmt, NULL, NULL, node)
9418 	  || vectorizable_shift (stmt, NULL, NULL, node)
9419 	  || vectorizable_operation (stmt, NULL, NULL, node)
9420 	  || vectorizable_assignment (stmt, NULL, NULL, node)
9421 	  || vectorizable_load (stmt, NULL, NULL, node, NULL)
9422 	  || vectorizable_call (stmt, NULL, NULL, node)
9423 	  || vectorizable_store (stmt, NULL, NULL, node)
9424 	  || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9425 	  || vectorizable_induction (stmt, NULL, NULL, node)
9426 	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9427 	  || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9428   else
9429     {
9430       if (bb_vinfo)
9431 	ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9432 	      || vectorizable_conversion (stmt, NULL, NULL, node)
9433 	      || vectorizable_shift (stmt, NULL, NULL, node)
9434 	      || vectorizable_operation (stmt, NULL, NULL, node)
9435 	      || vectorizable_assignment (stmt, NULL, NULL, node)
9436 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
9437 	      || vectorizable_call (stmt, NULL, NULL, node)
9438 	      || vectorizable_store (stmt, NULL, NULL, node)
9439 	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9440 	      || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9441     }
9442 
9443   if (!ok)
9444     {
9445       if (dump_enabled_p ())
9446         {
9447           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9448                            "not vectorized: relevant stmt not ");
9449           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9450           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9451         }
9452 
9453       return false;
9454     }
9455 
9456   if (bb_vinfo)
9457     return true;
9458 
9459   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9460       need extra handling, except for vectorizable reductions.  */
9461   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9462       && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9463     {
9464       if (dump_enabled_p ())
9465         {
9466           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9467                            "not vectorized: live stmt not supported: ");
9468           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9469         }
9470 
9471        return false;
9472     }
9473 
9474   return true;
9475 }
9476 
9477 
9478 /* Function vect_transform_stmt.
9479 
9480    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
9481 
9482 bool
vect_transform_stmt(gimple * stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)9483 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9484 		     bool *grouped_store, slp_tree slp_node,
9485                      slp_instance slp_node_instance)
9486 {
9487   bool is_store = false;
9488   gimple *vec_stmt = NULL;
9489   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9490   bool done;
9491 
9492   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9493   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9494 
9495   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9496 		   && nested_in_vect_loop_p
9497 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9498 			 stmt));
9499 
9500   switch (STMT_VINFO_TYPE (stmt_info))
9501     {
9502     case type_demotion_vec_info_type:
9503     case type_promotion_vec_info_type:
9504     case type_conversion_vec_info_type:
9505       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9506       gcc_assert (done);
9507       break;
9508 
9509     case induc_vec_info_type:
9510       done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9511       gcc_assert (done);
9512       break;
9513 
9514     case shift_vec_info_type:
9515       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9516       gcc_assert (done);
9517       break;
9518 
9519     case op_vec_info_type:
9520       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9521       gcc_assert (done);
9522       break;
9523 
9524     case assignment_vec_info_type:
9525       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9526       gcc_assert (done);
9527       break;
9528 
9529     case load_vec_info_type:
9530       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9531                                 slp_node_instance);
9532       gcc_assert (done);
9533       break;
9534 
9535     case store_vec_info_type:
9536       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9537       gcc_assert (done);
9538       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9539 	{
9540 	  /* In case of interleaving, the whole chain is vectorized when the
9541 	     last store in the chain is reached.  Store stmts before the last
9542 	     one are skipped, and there vec_stmt_info shouldn't be freed
9543 	     meanwhile.  */
9544 	  *grouped_store = true;
9545 	  stmt_vec_info group_info
9546 	    = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9547 	  if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9548 	    is_store = true;
9549 	}
9550       else
9551 	is_store = true;
9552       break;
9553 
9554     case condition_vec_info_type:
9555       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9556       gcc_assert (done);
9557       break;
9558 
9559     case comparison_vec_info_type:
9560       done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9561       gcc_assert (done);
9562       break;
9563 
9564     case call_vec_info_type:
9565       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9566       stmt = gsi_stmt (*gsi);
9567       break;
9568 
9569     case call_simd_clone_vec_info_type:
9570       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9571       stmt = gsi_stmt (*gsi);
9572       break;
9573 
9574     case reduc_vec_info_type:
9575       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9576 				     slp_node_instance);
9577       gcc_assert (done);
9578       break;
9579 
9580     default:
9581       if (!STMT_VINFO_LIVE_P (stmt_info))
9582 	{
9583 	  if (dump_enabled_p ())
9584 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9585                              "stmt not supported.\n");
9586 	  gcc_unreachable ();
9587 	}
9588     }
9589 
9590   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9591      This would break hybrid SLP vectorization.  */
9592   if (slp_node)
9593     gcc_assert (!vec_stmt
9594 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9595 
9596   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9597      is being vectorized, but outside the immediately enclosing loop.  */
9598   if (vec_stmt
9599       && nested_p
9600       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9601       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9602           || STMT_VINFO_RELEVANT (stmt_info) ==
9603                                            vect_used_in_outer_by_reduction))
9604     {
9605       struct loop *innerloop = LOOP_VINFO_LOOP (
9606                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9607       imm_use_iterator imm_iter;
9608       use_operand_p use_p;
9609       tree scalar_dest;
9610       gimple *exit_phi;
9611 
9612       if (dump_enabled_p ())
9613         dump_printf_loc (MSG_NOTE, vect_location,
9614                          "Record the vdef for outer-loop vectorization.\n");
9615 
9616       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9617         (to be used when vectorizing outer-loop stmts that use the DEF of
9618         STMT).  */
9619       if (gimple_code (stmt) == GIMPLE_PHI)
9620         scalar_dest = PHI_RESULT (stmt);
9621       else
9622         scalar_dest = gimple_get_lhs (stmt);
9623 
9624       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9625        {
9626          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9627            {
9628              exit_phi = USE_STMT (use_p);
9629              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9630            }
9631        }
9632     }
9633 
9634   /* Handle stmts whose DEF is used outside the loop-nest that is
9635      being vectorized.  */
9636   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9637     {
9638       done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9639       gcc_assert (done);
9640     }
9641 
9642   if (vec_stmt)
9643     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9644 
9645   return is_store;
9646 }
9647 
9648 
9649 /* Remove a group of stores (for SLP or interleaving), free their
9650    stmt_vec_info.  */
9651 
9652 void
vect_remove_stores(gimple * first_stmt)9653 vect_remove_stores (gimple *first_stmt)
9654 {
9655   gimple *next = first_stmt;
9656   gimple *tmp;
9657   gimple_stmt_iterator next_si;
9658 
9659   while (next)
9660     {
9661       stmt_vec_info stmt_info = vinfo_for_stmt (next);
9662 
9663       tmp = GROUP_NEXT_ELEMENT (stmt_info);
9664       if (is_pattern_stmt_p (stmt_info))
9665 	next = STMT_VINFO_RELATED_STMT (stmt_info);
9666       /* Free the attached stmt_vec_info and remove the stmt.  */
9667       next_si = gsi_for_stmt (next);
9668       unlink_stmt_vdef (next);
9669       gsi_remove (&next_si, true);
9670       release_defs (next);
9671       free_stmt_vec_info (next);
9672       next = tmp;
9673     }
9674 }
9675 
9676 
9677 /* Function new_stmt_vec_info.
9678 
9679    Create and initialize a new stmt_vec_info struct for STMT.  */
9680 
9681 stmt_vec_info
new_stmt_vec_info(gimple * stmt,vec_info * vinfo)9682 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9683 {
9684   stmt_vec_info res;
9685   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9686 
9687   STMT_VINFO_TYPE (res) = undef_vec_info_type;
9688   STMT_VINFO_STMT (res) = stmt;
9689   res->vinfo = vinfo;
9690   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9691   STMT_VINFO_LIVE_P (res) = false;
9692   STMT_VINFO_VECTYPE (res) = NULL;
9693   STMT_VINFO_VEC_STMT (res) = NULL;
9694   STMT_VINFO_VECTORIZABLE (res) = true;
9695   STMT_VINFO_IN_PATTERN_P (res) = false;
9696   STMT_VINFO_RELATED_STMT (res) = NULL;
9697   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9698   STMT_VINFO_DATA_REF (res) = NULL;
9699   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9700   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9701 
9702   if (gimple_code (stmt) == GIMPLE_PHI
9703       && is_loop_header_bb_p (gimple_bb (stmt)))
9704     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9705   else
9706     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9707 
9708   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9709   STMT_SLP_TYPE (res) = loop_vect;
9710   STMT_VINFO_NUM_SLP_USES (res) = 0;
9711 
9712   GROUP_FIRST_ELEMENT (res) = NULL;
9713   GROUP_NEXT_ELEMENT (res) = NULL;
9714   GROUP_SIZE (res) = 0;
9715   GROUP_STORE_COUNT (res) = 0;
9716   GROUP_GAP (res) = 0;
9717   GROUP_SAME_DR_STMT (res) = NULL;
9718 
9719   return res;
9720 }
9721 
9722 
9723 /* Create a hash table for stmt_vec_info. */
9724 
9725 void
init_stmt_vec_info_vec(void)9726 init_stmt_vec_info_vec (void)
9727 {
9728   gcc_assert (!stmt_vec_info_vec.exists ());
9729   stmt_vec_info_vec.create (50);
9730 }
9731 
9732 
9733 /* Free hash table for stmt_vec_info. */
9734 
9735 void
free_stmt_vec_info_vec(void)9736 free_stmt_vec_info_vec (void)
9737 {
9738   unsigned int i;
9739   stmt_vec_info info;
9740   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9741     if (info != NULL)
9742       free_stmt_vec_info (STMT_VINFO_STMT (info));
9743   gcc_assert (stmt_vec_info_vec.exists ());
9744   stmt_vec_info_vec.release ();
9745 }
9746 
9747 
9748 /* Free stmt vectorization related info.  */
9749 
9750 void
free_stmt_vec_info(gimple * stmt)9751 free_stmt_vec_info (gimple *stmt)
9752 {
9753   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9754 
9755   if (!stmt_info)
9756     return;
9757 
9758   /* Check if this statement has a related "pattern stmt"
9759      (introduced by the vectorizer during the pattern recognition
9760      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9761      too.  */
9762   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9763     {
9764       stmt_vec_info patt_info
9765 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9766       if (patt_info)
9767 	{
9768 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9769 	  gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9770 	  gimple_set_bb (patt_stmt, NULL);
9771 	  tree lhs = gimple_get_lhs (patt_stmt);
9772 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9773 	    release_ssa_name (lhs);
9774 	  if (seq)
9775 	    {
9776 	      gimple_stmt_iterator si;
9777 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9778 		{
9779 		  gimple *seq_stmt = gsi_stmt (si);
9780 		  gimple_set_bb (seq_stmt, NULL);
9781 		  lhs = gimple_get_lhs (seq_stmt);
9782 		  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9783 		    release_ssa_name (lhs);
9784 		  free_stmt_vec_info (seq_stmt);
9785 		}
9786 	    }
9787 	  free_stmt_vec_info (patt_stmt);
9788 	}
9789     }
9790 
9791   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9792   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9793   set_vinfo_for_stmt (stmt, NULL);
9794   free (stmt_info);
9795 }
9796 
9797 
9798 /* Function get_vectype_for_scalar_type_and_size.
9799 
9800    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9801    by the target.  */
9802 
9803 tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)9804 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9805 {
9806   tree orig_scalar_type = scalar_type;
9807   scalar_mode inner_mode;
9808   machine_mode simd_mode;
9809   poly_uint64 nunits;
9810   tree vectype;
9811 
9812   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9813       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9814     return NULL_TREE;
9815 
9816   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9817 
9818   /* For vector types of elements whose mode precision doesn't
9819      match their types precision we use a element type of mode
9820      precision.  The vectorization routines will have to make sure
9821      they support the proper result truncation/extension.
9822      We also make sure to build vector types with INTEGER_TYPE
9823      component type only.  */
9824   if (INTEGRAL_TYPE_P (scalar_type)
9825       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9826 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
9827     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9828 						  TYPE_UNSIGNED (scalar_type));
9829 
9830   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9831      When the component mode passes the above test simply use a type
9832      corresponding to that mode.  The theory is that any use that
9833      would cause problems with this will disable vectorization anyway.  */
9834   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9835 	   && !INTEGRAL_TYPE_P (scalar_type))
9836     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9837 
9838   /* We can't build a vector type of elements with alignment bigger than
9839      their size.  */
9840   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9841     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9842 						  TYPE_UNSIGNED (scalar_type));
9843 
9844   /* If we felt back to using the mode fail if there was
9845      no scalar type for it.  */
9846   if (scalar_type == NULL_TREE)
9847     return NULL_TREE;
9848 
9849   /* If no size was supplied use the mode the target prefers.   Otherwise
9850      lookup a vector mode of the specified size.  */
9851   if (known_eq (size, 0U))
9852     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9853   else if (!multiple_p (size, nbytes, &nunits)
9854 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9855     return NULL_TREE;
9856   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
9857   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9858     return NULL_TREE;
9859 
9860   vectype = build_vector_type (scalar_type, nunits);
9861 
9862   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9863       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9864     return NULL_TREE;
9865 
9866   /* Re-attach the address-space qualifier if we canonicalized the scalar
9867      type.  */
9868   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9869     return build_qualified_type
9870 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9871 
9872   return vectype;
9873 }
9874 
9875 poly_uint64 current_vector_size;
9876 
9877 /* Function get_vectype_for_scalar_type.
9878 
9879    Returns the vector type corresponding to SCALAR_TYPE as supported
9880    by the target.  */
9881 
9882 tree
get_vectype_for_scalar_type(tree scalar_type)9883 get_vectype_for_scalar_type (tree scalar_type)
9884 {
9885   tree vectype;
9886   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9887 						  current_vector_size);
9888   if (vectype
9889       && known_eq (current_vector_size, 0U))
9890     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9891   return vectype;
9892 }
9893 
9894 /* Function get_mask_type_for_scalar_type.
9895 
9896    Returns the mask type corresponding to a result of comparison
9897    of vectors of specified SCALAR_TYPE as supported by target.  */
9898 
9899 tree
get_mask_type_for_scalar_type(tree scalar_type)9900 get_mask_type_for_scalar_type (tree scalar_type)
9901 {
9902   tree vectype = get_vectype_for_scalar_type (scalar_type);
9903 
9904   if (!vectype)
9905     return NULL;
9906 
9907   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9908 				  current_vector_size);
9909 }
9910 
9911 /* Function get_same_sized_vectype
9912 
9913    Returns a vector type corresponding to SCALAR_TYPE of size
9914    VECTOR_TYPE if supported by the target.  */
9915 
9916 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)9917 get_same_sized_vectype (tree scalar_type, tree vector_type)
9918 {
9919   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9920     return build_same_sized_truth_vector_type (vector_type);
9921 
9922   return get_vectype_for_scalar_type_and_size
9923 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9924 }
9925 
9926 /* Function vect_is_simple_use.
9927 
9928    Input:
9929    VINFO - the vect info of the loop or basic block that is being vectorized.
9930    OPERAND - operand in the loop or bb.
9931    Output:
9932    DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9933    DT - the type of definition
9934 
9935    Returns whether a stmt with OPERAND can be vectorized.
9936    For loops, supportable operands are constants, loop invariants, and operands
9937    that are defined by the current iteration of the loop.  Unsupportable
9938    operands are those that are defined by a previous iteration of the loop (as
9939    is the case in reduction/induction computations).
9940    For basic blocks, supportable operands are constants and bb invariants.
9941    For now, operands defined outside the basic block are not supported.  */
9942 
9943 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt)9944 vect_is_simple_use (tree operand, vec_info *vinfo,
9945                     gimple **def_stmt, enum vect_def_type *dt)
9946 {
9947   *def_stmt = NULL;
9948   *dt = vect_unknown_def_type;
9949 
9950   if (dump_enabled_p ())
9951     {
9952       dump_printf_loc (MSG_NOTE, vect_location,
9953                        "vect_is_simple_use: operand ");
9954       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9955       dump_printf (MSG_NOTE, "\n");
9956     }
9957 
9958   if (CONSTANT_CLASS_P (operand))
9959     {
9960       *dt = vect_constant_def;
9961       return true;
9962     }
9963 
9964   if (is_gimple_min_invariant (operand))
9965     {
9966       *dt = vect_external_def;
9967       return true;
9968     }
9969 
9970   if (TREE_CODE (operand) != SSA_NAME)
9971     {
9972       if (dump_enabled_p ())
9973 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9974 			 "not ssa-name.\n");
9975       return false;
9976     }
9977 
9978   if (SSA_NAME_IS_DEFAULT_DEF (operand))
9979     {
9980       *dt = vect_external_def;
9981       return true;
9982     }
9983 
9984   *def_stmt = SSA_NAME_DEF_STMT (operand);
9985   if (dump_enabled_p ())
9986     {
9987       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9988       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9989     }
9990 
9991   if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9992     *dt = vect_external_def;
9993   else
9994     {
9995       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9996       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9997     }
9998 
9999   if (dump_enabled_p ())
10000     {
10001       dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10002       switch (*dt)
10003 	{
10004 	case vect_uninitialized_def:
10005 	  dump_printf (MSG_NOTE, "uninitialized\n");
10006 	  break;
10007 	case vect_constant_def:
10008 	  dump_printf (MSG_NOTE, "constant\n");
10009 	  break;
10010 	case vect_external_def:
10011 	  dump_printf (MSG_NOTE, "external\n");
10012 	  break;
10013 	case vect_internal_def:
10014 	  dump_printf (MSG_NOTE, "internal\n");
10015 	  break;
10016 	case vect_induction_def:
10017 	  dump_printf (MSG_NOTE, "induction\n");
10018 	  break;
10019 	case vect_reduction_def:
10020 	  dump_printf (MSG_NOTE, "reduction\n");
10021 	  break;
10022 	case vect_double_reduction_def:
10023 	  dump_printf (MSG_NOTE, "double reduction\n");
10024 	  break;
10025 	case vect_nested_cycle:
10026 	  dump_printf (MSG_NOTE, "nested cycle\n");
10027 	  break;
10028 	case vect_unknown_def_type:
10029 	  dump_printf (MSG_NOTE, "unknown\n");
10030 	  break;
10031 	}
10032     }
10033 
10034   if (*dt == vect_unknown_def_type)
10035     {
10036       if (dump_enabled_p ())
10037         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10038                          "Unsupported pattern.\n");
10039       return false;
10040     }
10041 
10042   switch (gimple_code (*def_stmt))
10043     {
10044     case GIMPLE_PHI:
10045     case GIMPLE_ASSIGN:
10046     case GIMPLE_CALL:
10047       break;
10048     default:
10049       if (dump_enabled_p ())
10050         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10051                          "unsupported defining stmt:\n");
10052       return false;
10053     }
10054 
10055   return true;
10056 }
10057 
10058 /* Function vect_is_simple_use.
10059 
10060    Same as vect_is_simple_use but also determines the vector operand
10061    type of OPERAND and stores it to *VECTYPE.  If the definition of
10062    OPERAND is vect_uninitialized_def, vect_constant_def or
10063    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10064    is responsible to compute the best suited vector type for the
10065    scalar operand.  */
10066 
10067 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt,tree * vectype)10068 vect_is_simple_use (tree operand, vec_info *vinfo,
10069 		    gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10070 {
10071   if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10072     return false;
10073 
10074   /* Now get a vector type if the def is internal, otherwise supply
10075      NULL_TREE and leave it up to the caller to figure out a proper
10076      type for the use stmt.  */
10077   if (*dt == vect_internal_def
10078       || *dt == vect_induction_def
10079       || *dt == vect_reduction_def
10080       || *dt == vect_double_reduction_def
10081       || *dt == vect_nested_cycle)
10082     {
10083       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10084 
10085       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10086           && !STMT_VINFO_RELEVANT (stmt_info)
10087           && !STMT_VINFO_LIVE_P (stmt_info))
10088 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10089 
10090       *vectype = STMT_VINFO_VECTYPE (stmt_info);
10091       gcc_assert (*vectype != NULL_TREE);
10092     }
10093   else if (*dt == vect_uninitialized_def
10094 	   || *dt == vect_constant_def
10095 	   || *dt == vect_external_def)
10096     *vectype = NULL_TREE;
10097   else
10098     gcc_unreachable ();
10099 
10100   return true;
10101 }
10102 
10103 
10104 /* Function supportable_widening_operation
10105 
10106    Check whether an operation represented by the code CODE is a
10107    widening operation that is supported by the target platform in
10108    vector form (i.e., when operating on arguments of type VECTYPE_IN
10109    producing a result of type VECTYPE_OUT).
10110 
10111    Widening operations we currently support are NOP (CONVERT), FLOAT
10112    and WIDEN_MULT.  This function checks if these operations are supported
10113    by the target platform either directly (via vector tree-codes), or via
10114    target builtins.
10115 
10116    Output:
10117    - CODE1 and CODE2 are codes of vector operations to be used when
10118    vectorizing the operation, if available.
10119    - MULTI_STEP_CVT determines the number of required intermediate steps in
10120    case of multi-step conversion (like char->short->int - in that case
10121    MULTI_STEP_CVT will be 1).
10122    - INTERM_TYPES contains the intermediate type required to perform the
10123    widening operation (short in the above example).  */
10124 
10125 bool
supportable_widening_operation(enum tree_code code,gimple * stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)10126 supportable_widening_operation (enum tree_code code, gimple *stmt,
10127 				tree vectype_out, tree vectype_in,
10128                                 enum tree_code *code1, enum tree_code *code2,
10129                                 int *multi_step_cvt,
10130                                 vec<tree> *interm_types)
10131 {
10132   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10133   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10134   struct loop *vect_loop = NULL;
10135   machine_mode vec_mode;
10136   enum insn_code icode1, icode2;
10137   optab optab1, optab2;
10138   tree vectype = vectype_in;
10139   tree wide_vectype = vectype_out;
10140   enum tree_code c1, c2;
10141   int i;
10142   tree prev_type, intermediate_type;
10143   machine_mode intermediate_mode, prev_mode;
10144   optab optab3, optab4;
10145 
10146   *multi_step_cvt = 0;
10147   if (loop_info)
10148     vect_loop = LOOP_VINFO_LOOP (loop_info);
10149 
10150   switch (code)
10151     {
10152     case WIDEN_MULT_EXPR:
10153       /* The result of a vectorized widening operation usually requires
10154 	 two vectors (because the widened results do not fit into one vector).
10155 	 The generated vector results would normally be expected to be
10156 	 generated in the same order as in the original scalar computation,
10157 	 i.e. if 8 results are generated in each vector iteration, they are
10158 	 to be organized as follows:
10159 		vect1: [res1,res2,res3,res4],
10160 		vect2: [res5,res6,res7,res8].
10161 
10162 	 However, in the special case that the result of the widening
10163 	 operation is used in a reduction computation only, the order doesn't
10164 	 matter (because when vectorizing a reduction we change the order of
10165 	 the computation).  Some targets can take advantage of this and
10166 	 generate more efficient code.  For example, targets like Altivec,
10167 	 that support widen_mult using a sequence of {mult_even,mult_odd}
10168 	 generate the following vectors:
10169 		vect1: [res1,res3,res5,res7],
10170 		vect2: [res2,res4,res6,res8].
10171 
10172 	 When vectorizing outer-loops, we execute the inner-loop sequentially
10173 	 (each vectorized inner-loop iteration contributes to VF outer-loop
10174 	 iterations in parallel).  We therefore don't allow to change the
10175 	 order of the computation in the inner-loop during outer-loop
10176 	 vectorization.  */
10177       /* TODO: Another case in which order doesn't *really* matter is when we
10178 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
10179 	 Normally, pack_trunc performs an even/odd permute, whereas the
10180 	 repack from an even/odd expansion would be an interleave, which
10181 	 would be significantly simpler for e.g. AVX2.  */
10182       /* In any case, in order to avoid duplicating the code below, recurse
10183 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10184 	 are properly set up for the caller.  If we fail, we'll continue with
10185 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10186       if (vect_loop
10187 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10188 	  && !nested_in_vect_loop_p (vect_loop, stmt)
10189 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10190 					     stmt, vectype_out, vectype_in,
10191 					     code1, code2, multi_step_cvt,
10192 					     interm_types))
10193         {
10194           /* Elements in a vector with vect_used_by_reduction property cannot
10195              be reordered if the use chain with this property does not have the
10196              same operation.  One such an example is s += a * b, where elements
10197              in a and b cannot be reordered.  Here we check if the vector defined
10198              by STMT is only directly used in the reduction statement.  */
10199           tree lhs = gimple_assign_lhs (stmt);
10200           use_operand_p dummy;
10201           gimple *use_stmt;
10202           stmt_vec_info use_stmt_info = NULL;
10203           if (single_imm_use (lhs, &dummy, &use_stmt)
10204               && (use_stmt_info = vinfo_for_stmt (use_stmt))
10205               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10206             return true;
10207         }
10208       c1 = VEC_WIDEN_MULT_LO_EXPR;
10209       c2 = VEC_WIDEN_MULT_HI_EXPR;
10210       break;
10211 
10212     case DOT_PROD_EXPR:
10213       c1 = DOT_PROD_EXPR;
10214       c2 = DOT_PROD_EXPR;
10215       break;
10216 
10217     case SAD_EXPR:
10218       c1 = SAD_EXPR;
10219       c2 = SAD_EXPR;
10220       break;
10221 
10222     case VEC_WIDEN_MULT_EVEN_EXPR:
10223       /* Support the recursion induced just above.  */
10224       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10225       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10226       break;
10227 
10228     case WIDEN_LSHIFT_EXPR:
10229       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10230       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10231       break;
10232 
10233     CASE_CONVERT:
10234       c1 = VEC_UNPACK_LO_EXPR;
10235       c2 = VEC_UNPACK_HI_EXPR;
10236       break;
10237 
10238     case FLOAT_EXPR:
10239       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10240       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10241       break;
10242 
10243     case FIX_TRUNC_EXPR:
10244       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10245 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10246 	 computing the operation.  */
10247       return false;
10248 
10249     default:
10250       gcc_unreachable ();
10251     }
10252 
10253   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10254     std::swap (c1, c2);
10255 
10256   if (code == FIX_TRUNC_EXPR)
10257     {
10258       /* The signedness is determined from output operand.  */
10259       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10260       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10261     }
10262   else
10263     {
10264       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10265       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10266     }
10267 
10268   if (!optab1 || !optab2)
10269     return false;
10270 
10271   vec_mode = TYPE_MODE (vectype);
10272   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10273        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10274     return false;
10275 
10276   *code1 = c1;
10277   *code2 = c2;
10278 
10279   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10280       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10281       /* For scalar masks we may have different boolean
10282 	 vector types having the same QImode.  Thus we
10283 	 add additional check for elements number.  */
10284     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10285 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10286 			 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10287 
10288   /* Check if it's a multi-step conversion that can be done using intermediate
10289      types.  */
10290 
10291   prev_type = vectype;
10292   prev_mode = vec_mode;
10293 
10294   if (!CONVERT_EXPR_CODE_P (code))
10295     return false;
10296 
10297   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10298      intermediate steps in promotion sequence.  We try
10299      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10300      not.  */
10301   interm_types->create (MAX_INTERM_CVT_STEPS);
10302   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10303     {
10304       intermediate_mode = insn_data[icode1].operand[0].mode;
10305       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10306 	{
10307 	  intermediate_type = vect_halve_mask_nunits (prev_type);
10308 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10309 	    return false;
10310 	}
10311       else
10312 	intermediate_type
10313 	  = lang_hooks.types.type_for_mode (intermediate_mode,
10314 					    TYPE_UNSIGNED (prev_type));
10315 
10316       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10317       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10318 
10319       if (!optab3 || !optab4
10320           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10321 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10322 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10323 	  || insn_data[icode2].operand[0].mode != intermediate_mode
10324 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
10325 	      == CODE_FOR_nothing)
10326 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
10327 	      == CODE_FOR_nothing))
10328 	break;
10329 
10330       interm_types->quick_push (intermediate_type);
10331       (*multi_step_cvt)++;
10332 
10333       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10334 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10335 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10336 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10337 			     TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10338 
10339       prev_type = intermediate_type;
10340       prev_mode = intermediate_mode;
10341     }
10342 
10343   interm_types->release ();
10344   return false;
10345 }
10346 
10347 
10348 /* Function supportable_narrowing_operation
10349 
10350    Check whether an operation represented by the code CODE is a
10351    narrowing operation that is supported by the target platform in
10352    vector form (i.e., when operating on arguments of type VECTYPE_IN
10353    and producing a result of type VECTYPE_OUT).
10354 
10355    Narrowing operations we currently support are NOP (CONVERT) and
10356    FIX_TRUNC.  This function checks if these operations are supported by
10357    the target platform directly via vector tree-codes.
10358 
10359    Output:
10360    - CODE1 is the code of a vector operation to be used when
10361    vectorizing the operation, if available.
10362    - MULTI_STEP_CVT determines the number of required intermediate steps in
10363    case of multi-step conversion (like int->short->char - in that case
10364    MULTI_STEP_CVT will be 1).
10365    - INTERM_TYPES contains the intermediate type required to perform the
10366    narrowing operation (short in the above example).   */
10367 
10368 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)10369 supportable_narrowing_operation (enum tree_code code,
10370 				 tree vectype_out, tree vectype_in,
10371 				 enum tree_code *code1, int *multi_step_cvt,
10372                                  vec<tree> *interm_types)
10373 {
10374   machine_mode vec_mode;
10375   enum insn_code icode1;
10376   optab optab1, interm_optab;
10377   tree vectype = vectype_in;
10378   tree narrow_vectype = vectype_out;
10379   enum tree_code c1;
10380   tree intermediate_type, prev_type;
10381   machine_mode intermediate_mode, prev_mode;
10382   int i;
10383   bool uns;
10384 
10385   *multi_step_cvt = 0;
10386   switch (code)
10387     {
10388     CASE_CONVERT:
10389       c1 = VEC_PACK_TRUNC_EXPR;
10390       break;
10391 
10392     case FIX_TRUNC_EXPR:
10393       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10394       break;
10395 
10396     case FLOAT_EXPR:
10397       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10398 	 tree code and optabs used for computing the operation.  */
10399       return false;
10400 
10401     default:
10402       gcc_unreachable ();
10403     }
10404 
10405   if (code == FIX_TRUNC_EXPR)
10406     /* The signedness is determined from output operand.  */
10407     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10408   else
10409     optab1 = optab_for_tree_code (c1, vectype, optab_default);
10410 
10411   if (!optab1)
10412     return false;
10413 
10414   vec_mode = TYPE_MODE (vectype);
10415   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10416     return false;
10417 
10418   *code1 = c1;
10419 
10420   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10421     /* For scalar masks we may have different boolean
10422        vector types having the same QImode.  Thus we
10423        add additional check for elements number.  */
10424     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10425 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10426 			 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10427 
10428   /* Check if it's a multi-step conversion that can be done using intermediate
10429      types.  */
10430   prev_mode = vec_mode;
10431   prev_type = vectype;
10432   if (code == FIX_TRUNC_EXPR)
10433     uns = TYPE_UNSIGNED (vectype_out);
10434   else
10435     uns = TYPE_UNSIGNED (vectype);
10436 
10437   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10438      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10439      costly than signed.  */
10440   if (code == FIX_TRUNC_EXPR && uns)
10441     {
10442       enum insn_code icode2;
10443 
10444       intermediate_type
10445 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10446       interm_optab
10447 	= optab_for_tree_code (c1, intermediate_type, optab_default);
10448       if (interm_optab != unknown_optab
10449 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10450 	  && insn_data[icode1].operand[0].mode
10451 	     == insn_data[icode2].operand[0].mode)
10452 	{
10453 	  uns = false;
10454 	  optab1 = interm_optab;
10455 	  icode1 = icode2;
10456 	}
10457     }
10458 
10459   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10460      intermediate steps in promotion sequence.  We try
10461      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10462   interm_types->create (MAX_INTERM_CVT_STEPS);
10463   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10464     {
10465       intermediate_mode = insn_data[icode1].operand[0].mode;
10466       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10467 	{
10468 	  intermediate_type = vect_double_mask_nunits (prev_type);
10469 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10470 	    return false;
10471 	}
10472       else
10473 	intermediate_type
10474 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10475       interm_optab
10476 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10477 			       optab_default);
10478       if (!interm_optab
10479 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10480 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10481 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10482 	      == CODE_FOR_nothing))
10483 	break;
10484 
10485       interm_types->quick_push (intermediate_type);
10486       (*multi_step_cvt)++;
10487 
10488       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10489 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10490 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10491 			     TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10492 
10493       prev_mode = intermediate_mode;
10494       prev_type = intermediate_type;
10495       optab1 = interm_optab;
10496     }
10497 
10498   interm_types->release ();
10499   return false;
10500 }
10501 
10502 /* Generate and return a statement that sets vector mask MASK such that
10503    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10504 
10505 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)10506 vect_gen_while (tree mask, tree start_index, tree end_index)
10507 {
10508   tree cmp_type = TREE_TYPE (start_index);
10509   tree mask_type = TREE_TYPE (mask);
10510   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10511 						       cmp_type, mask_type,
10512 						       OPTIMIZE_FOR_SPEED));
10513   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10514 					    start_index, end_index,
10515 					    build_zero_cst (mask_type));
10516   gimple_call_set_lhs (call, mask);
10517   return call;
10518 }
10519 
10520 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10521    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10522 
10523 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)10524 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10525 		    tree end_index)
10526 {
10527   tree tmp = make_ssa_name (mask_type);
10528   gcall *call = vect_gen_while (tmp, start_index, end_index);
10529   gimple_seq_add_stmt (seq, call);
10530   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10531 }
10532