1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h"		/* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
40 
41 /* For lang_hooks.types.type_for_mode.  */
42 #include "langhooks.h"
43 
44 /* Return the vectorized type for the given statement.  */
45 
46 tree
stmt_vectype(struct _stmt_vec_info * stmt_info)47 stmt_vectype (struct _stmt_vec_info *stmt_info)
48 {
49   return STMT_VINFO_VECTYPE (stmt_info);
50 }
51 
52 /* Return TRUE iff the given statement is in an inner loop relative to
53    the loop being vectorized.  */
54 bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56 {
57   gimple stmt = STMT_VINFO_STMT (stmt_info);
58   basic_block bb = gimple_bb (stmt);
59   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60   struct loop* loop;
61 
62   if (!loop_vinfo)
63     return false;
64 
65   loop = LOOP_VINFO_LOOP (loop_vinfo);
66 
67   return (bb->loop_father == loop->inner);
68 }
69 
70 /* Record the cost of a statement, either by directly informing the
71    target model or by saving it in a vector for later processing.
72    Return a preliminary estimate of the statement's cost.  */
73 
74 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 		  int misalign, enum vect_cost_model_location where)
78 {
79   if (body_cost_vec)
80     {
81       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82       add_stmt_info_to_vec (body_cost_vec, count, kind,
83 			    stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 			    misalign);
85       return (unsigned)
86 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
87 
88     }
89   else
90     {
91       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92       bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93       void *target_cost_data;
94 
95       if (loop_vinfo)
96 	target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97       else
98 	target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99 
100       return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 			    misalign, where);
102     }
103 }
104 
105 /* Return a variable of type ELEM_TYPE[NELEMS].  */
106 
107 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109 {
110   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 			 "vect_array");
112 }
113 
114 /* ARRAY is an array of vectors created by create_vector_array.
115    Return an SSA_NAME for the vector in index N.  The reference
116    is part of the vectorization of STMT and the vector is associated
117    with scalar destination SCALAR_DEST.  */
118 
119 static tree
read_vector_array(gimple stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 		   tree array, unsigned HOST_WIDE_INT n)
122 {
123   tree vect_type, vect, vect_name, array_ref;
124   gimple new_stmt;
125 
126   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127   vect_type = TREE_TYPE (TREE_TYPE (array));
128   vect = vect_create_destination_var (scalar_dest, vect_type);
129   array_ref = build4 (ARRAY_REF, vect_type, array,
130 		      build_int_cst (size_type_node, n),
131 		      NULL_TREE, NULL_TREE);
132 
133   new_stmt = gimple_build_assign (vect, array_ref);
134   vect_name = make_ssa_name (vect, new_stmt);
135   gimple_assign_set_lhs (new_stmt, vect_name);
136   vect_finish_stmt_generation (stmt, new_stmt, gsi);
137 
138   return vect_name;
139 }
140 
141 /* ARRAY is an array of vectors created by create_vector_array.
142    Emit code to store SSA_NAME VECT in index N of the array.
143    The store is part of the vectorization of STMT.  */
144 
145 static void
write_vector_array(gimple stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 		    tree array, unsigned HOST_WIDE_INT n)
148 {
149   tree array_ref;
150   gimple new_stmt;
151 
152   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 		      build_int_cst (size_type_node, n),
154 		      NULL_TREE, NULL_TREE);
155 
156   new_stmt = gimple_build_assign (array_ref, vect);
157   vect_finish_stmt_generation (stmt, new_stmt, gsi);
158 }
159 
160 /* PTR is a pointer to an array of type TYPE.  Return a representation
161    of *PTR.  The memory reference replaces those in FIRST_DR
162    (and its group).  */
163 
164 static tree
create_array_ref(tree type,tree ptr,struct data_reference * first_dr)165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166 {
167   tree mem_ref, alias_ptr_type;
168 
169   alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171   /* Arrays have the same alignment as their type.  */
172   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173   return mem_ref;
174 }
175 
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
177 
178 /* Function vect_mark_relevant.
179 
180    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
181 
182 static void
vect_mark_relevant(vec<gimple> * worklist,gimple stmt,enum vect_relevant relevant,bool live_p,bool used_in_pattern)183 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
184 		    enum vect_relevant relevant, bool live_p,
185 		    bool used_in_pattern)
186 {
187   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190   gimple pattern_stmt;
191 
192   if (dump_enabled_p ())
193     dump_printf_loc (MSG_NOTE, vect_location,
194                      "mark relevant %d, live %d.", relevant, live_p);
195 
196   /* If this stmt is an original stmt in a pattern, we might need to mark its
197      related pattern stmt instead of the original stmt.  However, such stmts
198      may have their own uses that are not in any pattern, in such cases the
199      stmt itself should be marked.  */
200   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201     {
202       bool found = false;
203       if (!used_in_pattern)
204         {
205           imm_use_iterator imm_iter;
206           use_operand_p use_p;
207           gimple use_stmt;
208           tree lhs;
209 	  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 	  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211 
212           if (is_gimple_assign (stmt))
213             lhs = gimple_assign_lhs (stmt);
214           else
215             lhs = gimple_call_lhs (stmt);
216 
217           /* This use is out of pattern use, if LHS has other uses that are
218              pattern uses, we should mark the stmt itself, and not the pattern
219              stmt.  */
220 	  if (TREE_CODE (lhs) == SSA_NAME)
221 	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 	      {
223 		if (is_gimple_debug (USE_STMT (use_p)))
224 		  continue;
225 		use_stmt = USE_STMT (use_p);
226 
227 		if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 		  continue;
229 
230 		if (vinfo_for_stmt (use_stmt)
231 		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 		  {
233 		    found = true;
234 		    break;
235 		  }
236 	      }
237         }
238 
239       if (!found)
240         {
241           /* This is the last stmt in a sequence that was detected as a
242              pattern that can potentially be vectorized.  Don't mark the stmt
243              as relevant/live because it's not going to be vectorized.
244              Instead mark the pattern-stmt that replaces it.  */
245 
246           pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247 
248           if (dump_enabled_p ())
249             dump_printf_loc (MSG_NOTE, vect_location,
250                              "last stmt in pattern. don't mark"
251                              " relevant/live.");
252           stmt_info = vinfo_for_stmt (pattern_stmt);
253           gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254           save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255           save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256           stmt = pattern_stmt;
257         }
258     }
259 
260   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262     STMT_VINFO_RELEVANT (stmt_info) = relevant;
263 
264   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266     {
267       if (dump_enabled_p ())
268         dump_printf_loc (MSG_NOTE, vect_location,
269                          "already marked relevant/live.");
270       return;
271     }
272 
273   worklist->safe_push (stmt);
274 }
275 
276 
277 /* Function vect_stmt_relevant_p.
278 
279    Return true if STMT in loop that is represented by LOOP_VINFO is
280    "relevant for vectorization".
281 
282    A stmt is considered "relevant for vectorization" if:
283    - it has uses outside the loop.
284    - it has vdefs (it alters memory).
285    - control stmts in the loop (except for the exit condition).
286 
287    CHECKME: what other side effects would the vectorizer allow?  */
288 
289 static bool
vect_stmt_relevant_p(gimple stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)290 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 		      enum vect_relevant *relevant, bool *live_p)
292 {
293   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294   ssa_op_iter op_iter;
295   imm_use_iterator imm_iter;
296   use_operand_p use_p;
297   def_operand_p def_p;
298 
299   *relevant = vect_unused_in_scope;
300   *live_p = false;
301 
302   /* cond stmt other than loop exit cond.  */
303   if (is_ctrl_stmt (stmt)
304       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305          != loop_exit_ctrl_vec_info_type)
306     *relevant = vect_used_in_scope;
307 
308   /* changing memory.  */
309   if (gimple_code (stmt) != GIMPLE_PHI)
310     if (gimple_vdef (stmt))
311       {
312 	if (dump_enabled_p ())
313 	  dump_printf_loc (MSG_NOTE, vect_location,
314                            "vec_stmt_relevant_p: stmt has vdefs.");
315 	*relevant = vect_used_in_scope;
316       }
317 
318   /* uses outside the loop.  */
319   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320     {
321       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 	{
323 	  basic_block bb = gimple_bb (USE_STMT (use_p));
324 	  if (!flow_bb_inside_loop_p (loop, bb))
325 	    {
326 	      if (dump_enabled_p ())
327 		dump_printf_loc (MSG_NOTE, vect_location,
328                                  "vec_stmt_relevant_p: used out of loop.");
329 
330 	      if (is_gimple_debug (USE_STMT (use_p)))
331 		continue;
332 
333 	      /* We expect all such uses to be in the loop exit phis
334 		 (because of loop closed form)   */
335 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 	      gcc_assert (bb == single_exit (loop)->dest);
337 
338               *live_p = true;
339 	    }
340 	}
341     }
342 
343   return (*live_p || *relevant);
344 }
345 
346 
347 /* Function exist_non_indexing_operands_for_use_p
348 
349    USE is one of the uses attached to STMT.  Check if USE is
350    used in STMT for anything other than indexing an array.  */
351 
352 static bool
exist_non_indexing_operands_for_use_p(tree use,gimple stmt)353 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354 {
355   tree operand;
356   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
357 
358   /* USE corresponds to some operand in STMT.  If there is no data
359      reference in STMT, then any operand that corresponds to USE
360      is not indexing an array.  */
361   if (!STMT_VINFO_DATA_REF (stmt_info))
362     return true;
363 
364   /* STMT has a data_ref. FORNOW this means that its of one of
365      the following forms:
366      -1- ARRAY_REF = var
367      -2- var = ARRAY_REF
368      (This should have been verified in analyze_data_refs).
369 
370      'var' in the second case corresponds to a def, not a use,
371      so USE cannot correspond to any operands that are not used
372      for array indexing.
373 
374      Therefore, all we need to check is if STMT falls into the
375      first case, and whether var corresponds to USE.  */
376 
377   if (!gimple_assign_copy_p (stmt))
378     return false;
379   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380     return false;
381   operand = gimple_assign_rhs1 (stmt);
382   if (TREE_CODE (operand) != SSA_NAME)
383     return false;
384 
385   if (operand == use)
386     return true;
387 
388   return false;
389 }
390 
391 
392 /*
393    Function process_use.
394 
395    Inputs:
396    - a USE in STMT in a loop represented by LOOP_VINFO
397    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398      that defined USE.  This is done by calling mark_relevant and passing it
399      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401      be performed.
402 
403    Outputs:
404    Generally, LIVE_P and RELEVANT are used to define the liveness and
405    relevance info of the DEF_STMT of this USE:
406        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408    Exceptions:
409    - case 1: If USE is used only for address computations (e.g. array indexing),
410    which does not need to be directly vectorized, then the liveness/relevance
411    of the respective DEF_STMT is left unchanged.
412    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413    skip DEF_STMT cause it had already been processed.
414    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
415    be modified accordingly.
416 
417    Return true if everything is as expected. Return false otherwise.  */
418 
419 static bool
process_use(gimple stmt,tree use,loop_vec_info loop_vinfo,bool live_p,enum vect_relevant relevant,vec<gimple> * worklist,bool force)420 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
421 	     enum vect_relevant relevant, vec<gimple> *worklist,
422 	     bool force)
423 {
424   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426   stmt_vec_info dstmt_vinfo;
427   basic_block bb, def_bb;
428   tree def;
429   gimple def_stmt;
430   enum vect_def_type dt;
431 
432   /* case 1: we are only interested in uses that need to be vectorized.  Uses
433      that are used for address computation are not considered relevant.  */
434   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
435      return true;
436 
437   if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
438     {
439       if (dump_enabled_p ())
440         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441                          "not vectorized: unsupported use in stmt.");
442       return false;
443     }
444 
445   if (!def_stmt || gimple_nop_p (def_stmt))
446     return true;
447 
448   def_bb = gimple_bb (def_stmt);
449   if (!flow_bb_inside_loop_p (loop, def_bb))
450     {
451       if (dump_enabled_p ())
452 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
453       return true;
454     }
455 
456   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457      DEF_STMT must have already been processed, because this should be the
458      only way that STMT, which is a reduction-phi, was put in the worklist,
459      as there should be no other uses for DEF_STMT in the loop.  So we just
460      check that everything is as expected, and we are done.  */
461   dstmt_vinfo = vinfo_for_stmt (def_stmt);
462   bb = gimple_bb (stmt);
463   if (gimple_code (stmt) == GIMPLE_PHI
464       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465       && gimple_code (def_stmt) != GIMPLE_PHI
466       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467       && bb->loop_father == def_bb->loop_father)
468     {
469       if (dump_enabled_p ())
470 	dump_printf_loc (MSG_NOTE, vect_location,
471                          "reduc-stmt defining reduc-phi in the same nest.");
472       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
475       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
476 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
477       return true;
478     }
479 
480   /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 	outer-loop-header-bb:
482 		d = def_stmt
483 	inner-loop:
484 		stmt # use (d)
485 	outer-loop-tail-bb:
486 		...		  */
487   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488     {
489       if (dump_enabled_p ())
490 	dump_printf_loc (MSG_NOTE, vect_location,
491                          "outer-loop def-stmt defining inner-loop stmt.");
492 
493       switch (relevant)
494 	{
495 	case vect_unused_in_scope:
496 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 		      vect_used_in_scope : vect_unused_in_scope;
498 	  break;
499 
500 	case vect_used_in_outer_by_reduction:
501           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
502 	  relevant = vect_used_by_reduction;
503 	  break;
504 
505 	case vect_used_in_outer:
506           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
507 	  relevant = vect_used_in_scope;
508 	  break;
509 
510 	case vect_used_in_scope:
511 	  break;
512 
513 	default:
514 	  gcc_unreachable ();
515 	}
516     }
517 
518   /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 	outer-loop-header-bb:
520 		...
521 	inner-loop:
522 		d = def_stmt
523 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
524 		stmt # use (d)		*/
525   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526     {
527       if (dump_enabled_p ())
528 	dump_printf_loc (MSG_NOTE, vect_location,
529                          "inner-loop def-stmt defining outer-loop stmt.");
530 
531       switch (relevant)
532         {
533         case vect_unused_in_scope:
534           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
536                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
537           break;
538 
539         case vect_used_by_reduction:
540           relevant = vect_used_in_outer_by_reduction;
541           break;
542 
543         case vect_used_in_scope:
544           relevant = vect_used_in_outer;
545           break;
546 
547         default:
548           gcc_unreachable ();
549         }
550     }
551 
552   vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553                       is_pattern_stmt_p (stmt_vinfo));
554   return true;
555 }
556 
557 
558 /* Function vect_mark_stmts_to_be_vectorized.
559 
560    Not all stmts in the loop need to be vectorized. For example:
561 
562      for i...
563        for j...
564    1.    T0 = i + j
565    2.	 T1 = a[T0]
566 
567    3.    j = j + 1
568 
569    Stmt 1 and 3 do not need to be vectorized, because loop control and
570    addressing of vectorized data-refs are handled differently.
571 
572    This pass detects such stmts.  */
573 
574 bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576 {
577   vec<gimple> worklist;
578   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580   unsigned int nbbs = loop->num_nodes;
581   gimple_stmt_iterator si;
582   gimple stmt;
583   unsigned int i;
584   stmt_vec_info stmt_vinfo;
585   basic_block bb;
586   gimple phi;
587   bool live_p;
588   enum vect_relevant relevant, tmp_relevant;
589   enum vect_def_type def_type;
590 
591   if (dump_enabled_p ())
592     dump_printf_loc (MSG_NOTE, vect_location,
593                      "=== vect_mark_stmts_to_be_vectorized ===");
594 
595   worklist.create (64);
596 
597   /* 1. Init worklist.  */
598   for (i = 0; i < nbbs; i++)
599     {
600       bb = bbs[i];
601       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
602 	{
603 	  phi = gsi_stmt (si);
604 	  if (dump_enabled_p ())
605 	    {
606 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
608 	    }
609 
610 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
611 	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
612 	}
613       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614 	{
615 	  stmt = gsi_stmt (si);
616 	  if (dump_enabled_p ())
617 	    {
618 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620 	    }
621 
622 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
623             vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
624 	}
625     }
626 
627   /* 2. Process_worklist */
628   while (worklist.length () > 0)
629     {
630       use_operand_p use_p;
631       ssa_op_iter iter;
632 
633       stmt = worklist.pop ();
634       if (dump_enabled_p ())
635 	{
636           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
638 	}
639 
640       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 	 liveness and relevance properties of STMT.  */
643       stmt_vinfo = vinfo_for_stmt (stmt);
644       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646 
647       /* Generally, the liveness and relevance properties of STMT are
648 	 propagated as is to the DEF_STMTs of its USEs:
649 	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651 
652 	 One exception is when STMT has been identified as defining a reduction
653 	 variable; in this case we set the liveness/relevance as follows:
654 	   live_p = false
655 	   relevant = vect_used_by_reduction
656 	 This is because we distinguish between two kinds of relevant stmts -
657 	 those that are used by a reduction computation, and those that are
658 	 (also) used by a regular computation.  This allows us later on to
659 	 identify stmts that are used solely by a reduction, and therefore the
660 	 order of the results that they produce does not have to be kept.  */
661 
662       def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663       tmp_relevant = relevant;
664       switch (def_type)
665         {
666           case vect_reduction_def:
667 	    switch (tmp_relevant)
668 	      {
669 	        case vect_unused_in_scope:
670 	          relevant = vect_used_by_reduction;
671 	          break;
672 
673 	        case vect_used_by_reduction:
674 	          if (gimple_code (stmt) == GIMPLE_PHI)
675                     break;
676   	          /* fall through */
677 
678 	        default:
679 	          if (dump_enabled_p ())
680 	            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681                                      "unsupported use of reduction.");
682   	          worklist.release ();
683 	          return false;
684 	      }
685 
686 	    live_p = false;
687 	    break;
688 
689           case vect_nested_cycle:
690             if (tmp_relevant != vect_unused_in_scope
691                 && tmp_relevant != vect_used_in_outer_by_reduction
692                 && tmp_relevant != vect_used_in_outer)
693               {
694                 if (dump_enabled_p ())
695                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696                                    "unsupported use of nested cycle.");
697 
698                 worklist.release ();
699                 return false;
700               }
701 
702             live_p = false;
703             break;
704 
705           case vect_double_reduction_def:
706             if (tmp_relevant != vect_unused_in_scope
707                 && tmp_relevant != vect_used_by_reduction)
708               {
709                 if (dump_enabled_p ())
710                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711                                    "unsupported use of double reduction.");
712 
713                 worklist.release ();
714                 return false;
715               }
716 
717             live_p = false;
718             break;
719 
720           default:
721             break;
722         }
723 
724       if (is_pattern_stmt_p (stmt_vinfo))
725         {
726           /* Pattern statements are not inserted into the code, so
727              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728              have to scan the RHS or function arguments instead.  */
729           if (is_gimple_assign (stmt))
730             {
731 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 	      tree op = gimple_assign_rhs1 (stmt);
733 
734 	      i = 1;
735 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 		{
737 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 				    live_p, relevant, &worklist, false)
739 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 				       live_p, relevant, &worklist, false))
741 		    {
742 		      worklist.release ();
743 		      return false;
744 		    }
745 		  i = 2;
746 		}
747 	      for (; i < gimple_num_ops (stmt); i++)
748                 {
749 		  op = gimple_op (stmt, i);
750                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
751 				    &worklist, false))
752                     {
753                       worklist.release ();
754                       return false;
755                     }
756                  }
757             }
758           else if (is_gimple_call (stmt))
759             {
760               for (i = 0; i < gimple_call_num_args (stmt); i++)
761                 {
762                   tree arg = gimple_call_arg (stmt, i);
763                   if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
764 				    &worklist, false))
765                     {
766                       worklist.release ();
767                       return false;
768                     }
769                 }
770             }
771         }
772       else
773         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774           {
775             tree op = USE_FROM_PTR (use_p);
776             if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
777 			      &worklist, false))
778               {
779                 worklist.release ();
780                 return false;
781               }
782           }
783 
784       if (STMT_VINFO_GATHER_P (stmt_vinfo))
785 	{
786 	  tree off;
787 	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 	  gcc_assert (decl);
789 	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 			    &worklist, true))
791 	    {
792 	      worklist.release ();
793 	      return false;
794 	    }
795 	}
796     } /* while worklist */
797 
798   worklist.release ();
799   return true;
800 }
801 
802 
803 /* Function vect_model_simple_cost.
804 
805    Models cost for simple operations, i.e. those that only emit ncopies of a
806    single op.  Right now, this does not account for multiple insns that could
807    be generated for the single vector op.  We will handle that shortly.  */
808 
809 void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)810 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
811 			enum vect_def_type *dt,
812 			stmt_vector_for_cost *prologue_cost_vec,
813 			stmt_vector_for_cost *body_cost_vec)
814 {
815   int i;
816   int inside_cost = 0, prologue_cost = 0;
817 
818   /* The SLP costs were already calculated during SLP tree build.  */
819   if (PURE_SLP_STMT (stmt_info))
820     return;
821 
822   /* FORNOW: Assuming maximum 2 args per stmts.  */
823   for (i = 0; i < 2; i++)
824     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 					 stmt_info, 0, vect_prologue);
827 
828   /* Pass the inside-of-loop statements to the target-specific cost model.  */
829   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 				  stmt_info, 0, vect_body);
831 
832   if (dump_enabled_p ())
833     dump_printf_loc (MSG_NOTE, vect_location,
834                      "vect_model_simple_cost: inside_cost = %d, "
835                      "prologue_cost = %d .", inside_cost, prologue_cost);
836 }
837 
838 
839 /* Model cost for type demotion and promotion operations.  PWR is normally
840    zero for single-step promotions and demotions.  It will be one if
841    two-step promotion/demotion is required, and so on.  Each additional
842    step doubles the number of instructions required.  */
843 
844 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 				    enum vect_def_type *dt, int pwr)
847 {
848   int i, tmp;
849   int inside_cost = 0, prologue_cost = 0;
850   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852   void *target_cost_data;
853 
854   /* The SLP costs were already calculated during SLP tree build.  */
855   if (PURE_SLP_STMT (stmt_info))
856     return;
857 
858   if (loop_vinfo)
859     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860   else
861     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862 
863   for (i = 0; i < pwr + 1; i++)
864     {
865       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 	(i + 1) : i;
867       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
868 				    vec_promote_demote, stmt_info, 0,
869 				    vect_body);
870     }
871 
872   /* FORNOW: Assuming maximum 2 args per stmts.  */
873   for (i = 0; i < 2; i++)
874     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 				      stmt_info, 0, vect_prologue);
877 
878   if (dump_enabled_p ())
879     dump_printf_loc (MSG_NOTE, vect_location,
880                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
881                      "prologue_cost = %d .", inside_cost, prologue_cost);
882 }
883 
884 /* Function vect_cost_group_size
885 
886    For grouped load or store, return the group_size only if it is the first
887    load or store of a group, else return 1.  This ensures that group size is
888    only returned once per group.  */
889 
890 static int
vect_cost_group_size(stmt_vec_info stmt_info)891 vect_cost_group_size (stmt_vec_info stmt_info)
892 {
893   gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
894 
895   if (first_stmt == STMT_VINFO_STMT (stmt_info))
896     return GROUP_SIZE (stmt_info);
897 
898   return 1;
899 }
900 
901 
902 /* Function vect_model_store_cost
903 
904    Models cost for stores.  In the case of grouped accesses, one access
905    has the overhead of the grouped access attributed to it.  */
906 
907 void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,bool store_lanes_p,enum vect_def_type dt,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)908 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 		       bool store_lanes_p, enum vect_def_type dt,
910 		       slp_tree slp_node,
911 		       stmt_vector_for_cost *prologue_cost_vec,
912 		       stmt_vector_for_cost *body_cost_vec)
913 {
914   int group_size;
915   unsigned int inside_cost = 0, prologue_cost = 0;
916   struct data_reference *first_dr;
917   gimple first_stmt;
918 
919   /* The SLP costs were already calculated during SLP tree build.  */
920   if (PURE_SLP_STMT (stmt_info))
921     return;
922 
923   if (dt == vect_constant_def || dt == vect_external_def)
924     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 				       stmt_info, 0, vect_prologue);
926 
927   /* Grouped access?  */
928   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
929     {
930       if (slp_node)
931         {
932           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933           group_size = 1;
934         }
935       else
936         {
937           first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938           group_size = vect_cost_group_size (stmt_info);
939         }
940 
941       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942     }
943   /* Not a grouped access.  */
944   else
945     {
946       group_size = 1;
947       first_dr = STMT_VINFO_DATA_REF (stmt_info);
948     }
949 
950   /* We assume that the cost of a single store-lanes instruction is
951      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
952      access is instead being provided by a permute-and-store operation,
953      include the cost of the permutes.  */
954   if (!store_lanes_p && group_size > 1)
955     {
956       /* Uses a high and low interleave operation for each needed permute.  */
957 
958       int nstmts = ncopies * exact_log2 (group_size) * group_size;
959       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 				      stmt_info, 0, vect_body);
961 
962       if (dump_enabled_p ())
963         dump_printf_loc (MSG_NOTE, vect_location,
964                          "vect_model_store_cost: strided group_size = %d .",
965                          group_size);
966     }
967 
968   /* Costs of the stores.  */
969   vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
970 
971   if (dump_enabled_p ())
972     dump_printf_loc (MSG_NOTE, vect_location,
973                      "vect_model_store_cost: inside_cost = %d, "
974                      "prologue_cost = %d .", inside_cost, prologue_cost);
975 }
976 
977 
978 /* Calculate cost of DR's memory access.  */
979 void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)980 vect_get_store_cost (struct data_reference *dr, int ncopies,
981 		     unsigned int *inside_cost,
982 		     stmt_vector_for_cost *body_cost_vec)
983 {
984   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
985   gimple stmt = DR_STMT (dr);
986   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
987 
988   switch (alignment_support_scheme)
989     {
990     case dr_aligned:
991       {
992 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 					  vector_store, stmt_info, 0,
994 					  vect_body);
995 
996         if (dump_enabled_p ())
997           dump_printf_loc (MSG_NOTE, vect_location,
998                            "vect_model_store_cost: aligned.");
999         break;
1000       }
1001 
1002     case dr_unaligned_supported:
1003       {
1004         /* Here, we assign an additional cost for the unaligned store.  */
1005 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1006 					  unaligned_store, stmt_info,
1007 					  DR_MISALIGNMENT (dr), vect_body);
1008         if (dump_enabled_p ())
1009           dump_printf_loc (MSG_NOTE, vect_location,
1010                            "vect_model_store_cost: unaligned supported by "
1011                            "hardware.");
1012         break;
1013       }
1014 
1015     case dr_unaligned_unsupported:
1016       {
1017         *inside_cost = VECT_MAX_COST;
1018 
1019         if (dump_enabled_p ())
1020           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021                            "vect_model_store_cost: unsupported access.");
1022         break;
1023       }
1024 
1025     default:
1026       gcc_unreachable ();
1027     }
1028 }
1029 
1030 
1031 /* Function vect_model_load_cost
1032 
1033    Models cost for loads.  In the case of grouped accesses, the last access
1034    has the overhead of the grouped access attributed to it.  Since unaligned
1035    accesses are supported for loads, we also account for the costs of the
1036    access scheme chosen.  */
1037 
1038 void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,bool load_lanes_p,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)1039 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 		      bool load_lanes_p, slp_tree slp_node,
1041 		      stmt_vector_for_cost *prologue_cost_vec,
1042 		      stmt_vector_for_cost *body_cost_vec)
1043 {
1044   int group_size;
1045   gimple first_stmt;
1046   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1047   unsigned int inside_cost = 0, prologue_cost = 0;
1048 
1049   /* The SLP costs were already calculated during SLP tree build.  */
1050   if (PURE_SLP_STMT (stmt_info))
1051     return;
1052 
1053   /* Grouped accesses?  */
1054   first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1055   if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1056     {
1057       group_size = vect_cost_group_size (stmt_info);
1058       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059     }
1060   /* Not a grouped access.  */
1061   else
1062     {
1063       group_size = 1;
1064       first_dr = dr;
1065     }
1066 
1067   /* We assume that the cost of a single load-lanes instruction is
1068      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1069      access is instead being provided by a load-and-permute operation,
1070      include the cost of the permutes.  */
1071   if (!load_lanes_p && group_size > 1)
1072     {
1073       /* Uses an even and odd extract operations for each needed permute.  */
1074       int nstmts = ncopies * exact_log2 (group_size) * group_size;
1075       inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 				       stmt_info, 0, vect_body);
1077 
1078       if (dump_enabled_p ())
1079         dump_printf_loc (MSG_NOTE, vect_location,
1080                          "vect_model_load_cost: strided group_size = %d .",
1081                          group_size);
1082     }
1083 
1084   /* The loads themselves.  */
1085   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086     {
1087       /* N scalar loads plus gathering them into a vector.  */
1088       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1089       inside_cost += record_stmt_cost (body_cost_vec,
1090 				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1091 				       scalar_load, stmt_info, 0, vect_body);
1092       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 				       stmt_info, 0, vect_body);
1094     }
1095   else
1096     vect_get_load_cost (first_dr, ncopies,
1097 			((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 			 || group_size > 1 || slp_node),
1099 			&inside_cost, &prologue_cost,
1100 			prologue_cost_vec, body_cost_vec, true);
1101 
1102   if (dump_enabled_p ())
1103     dump_printf_loc (MSG_NOTE, vect_location,
1104                      "vect_model_load_cost: inside_cost = %d, "
1105                      "prologue_cost = %d .", inside_cost, prologue_cost);
1106 }
1107 
1108 
1109 /* Calculate cost of DR's memory access.  */
1110 void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1111 vect_get_load_cost (struct data_reference *dr, int ncopies,
1112 		    bool add_realign_cost, unsigned int *inside_cost,
1113 		    unsigned int *prologue_cost,
1114 		    stmt_vector_for_cost *prologue_cost_vec,
1115 		    stmt_vector_for_cost *body_cost_vec,
1116 		    bool record_prologue_costs)
1117 {
1118   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1119   gimple stmt = DR_STMT (dr);
1120   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1121 
1122   switch (alignment_support_scheme)
1123     {
1124     case dr_aligned:
1125       {
1126 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 					  stmt_info, 0, vect_body);
1128 
1129         if (dump_enabled_p ())
1130           dump_printf_loc (MSG_NOTE, vect_location,
1131                            "vect_model_load_cost: aligned.");
1132 
1133         break;
1134       }
1135     case dr_unaligned_supported:
1136       {
1137         /* Here, we assign an additional cost for the unaligned load.  */
1138 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139 					  unaligned_load, stmt_info,
1140 					  DR_MISALIGNMENT (dr), vect_body);
1141 
1142         if (dump_enabled_p ())
1143           dump_printf_loc (MSG_NOTE, vect_location,
1144                            "vect_model_load_cost: unaligned supported by "
1145                            "hardware.");
1146 
1147         break;
1148       }
1149     case dr_explicit_realign:
1150       {
1151 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 					  vector_load, stmt_info, 0, vect_body);
1153 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 					  vec_perm, stmt_info, 0, vect_body);
1155 
1156         /* FIXME: If the misalignment remains fixed across the iterations of
1157            the containing loop, the following cost should be added to the
1158            prologue costs.  */
1159         if (targetm.vectorize.builtin_mask_for_load)
1160 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 					    stmt_info, 0, vect_body);
1162 
1163         if (dump_enabled_p ())
1164           dump_printf_loc (MSG_NOTE, vect_location,
1165                            "vect_model_load_cost: explicit realign");
1166 
1167         break;
1168       }
1169     case dr_explicit_realign_optimized:
1170       {
1171         if (dump_enabled_p ())
1172           dump_printf_loc (MSG_NOTE, vect_location,
1173                            "vect_model_load_cost: unaligned software "
1174                            "pipelined.");
1175 
1176         /* Unaligned software pipeline has a load of an address, an initial
1177            load, and possibly a mask operation to "prime" the loop.  However,
1178            if this is an access in a group of loads, which provide grouped
1179            access, then the above cost should only be considered for one
1180            access in the group.  Inside the loop, there is a load op
1181            and a realignment op.  */
1182 
1183         if (add_realign_cost && record_prologue_costs)
1184           {
1185 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 						vector_stmt, stmt_info,
1187 						0, vect_prologue);
1188             if (targetm.vectorize.builtin_mask_for_load)
1189 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 						  vector_stmt, stmt_info,
1191 						  0, vect_prologue);
1192           }
1193 
1194 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 					  stmt_info, 0, vect_body);
1196 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 					  stmt_info, 0, vect_body);
1198 
1199         if (dump_enabled_p ())
1200           dump_printf_loc (MSG_NOTE, vect_location,
1201                            "vect_model_load_cost: explicit realign optimized");
1202 
1203         break;
1204       }
1205 
1206     case dr_unaligned_unsupported:
1207       {
1208         *inside_cost = VECT_MAX_COST;
1209 
1210         if (dump_enabled_p ())
1211           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212                            "vect_model_load_cost: unsupported access.");
1213         break;
1214       }
1215 
1216     default:
1217       gcc_unreachable ();
1218     }
1219 }
1220 
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222    the loop preheader for the vectorized stmt STMT.  */
1223 
1224 static void
vect_init_vector_1(gimple stmt,gimple new_stmt,gimple_stmt_iterator * gsi)1225 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1226 {
1227   if (gsi)
1228     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1229   else
1230     {
1231       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1232       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233 
1234       if (loop_vinfo)
1235         {
1236           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237 	  basic_block new_bb;
1238 	  edge pe;
1239 
1240           if (nested_in_vect_loop_p (loop, stmt))
1241             loop = loop->inner;
1242 
1243 	  pe = loop_preheader_edge (loop);
1244           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1245           gcc_assert (!new_bb);
1246 	}
1247       else
1248        {
1249           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250           basic_block bb;
1251           gimple_stmt_iterator gsi_bb_start;
1252 
1253           gcc_assert (bb_vinfo);
1254           bb = BB_VINFO_BB (bb_vinfo);
1255           gsi_bb_start = gsi_after_labels (bb);
1256           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257        }
1258     }
1259 
1260   if (dump_enabled_p ())
1261     {
1262       dump_printf_loc (MSG_NOTE, vect_location,
1263                        "created new init_stmt: ");
1264       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265     }
1266 }
1267 
1268 /* Function vect_init_vector.
1269 
1270    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1272    vector type a vector with all elements equal to VAL is created first.
1273    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1274    initialization at the loop preheader.
1275    Return the DEF of INIT_STMT.
1276    It will be used in the vectorization of STMT.  */
1277 
1278 tree
vect_init_vector(gimple stmt,tree val,tree type,gimple_stmt_iterator * gsi)1279 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1280 {
1281   tree new_var;
1282   gimple init_stmt;
1283   tree vec_oprnd;
1284   tree new_temp;
1285 
1286   if (TREE_CODE (type) == VECTOR_TYPE
1287       && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1288     {
1289       if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1290 	{
1291 	  if (CONSTANT_CLASS_P (val))
1292 	    val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1293 	  else
1294 	    {
1295 	      new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1296 	      init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1297 							new_temp, val,
1298 							NULL_TREE);
1299 	      vect_init_vector_1 (stmt, init_stmt, gsi);
1300 	      val = new_temp;
1301 	    }
1302 	}
1303       val = build_vector_from_val (type, val);
1304     }
1305 
1306   new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1307   init_stmt = gimple_build_assign  (new_var, val);
1308   new_temp = make_ssa_name (new_var, init_stmt);
1309   gimple_assign_set_lhs (init_stmt, new_temp);
1310   vect_init_vector_1 (stmt, init_stmt, gsi);
1311   vec_oprnd = gimple_assign_lhs (init_stmt);
1312   return vec_oprnd;
1313 }
1314 
1315 
1316 /* Function vect_get_vec_def_for_operand.
1317 
1318    OP is an operand in STMT.  This function returns a (vector) def that will be
1319    used in the vectorized stmt for STMT.
1320 
1321    In the case that OP is an SSA_NAME which is defined in the loop, then
1322    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323 
1324    In case OP is an invariant or constant, a new stmt that creates a vector def
1325    needs to be introduced.  */
1326 
1327 tree
vect_get_vec_def_for_operand(tree op,gimple stmt,tree * scalar_def)1328 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329 {
1330   tree vec_oprnd;
1331   gimple vec_stmt;
1332   gimple def_stmt;
1333   stmt_vec_info def_stmt_info = NULL;
1334   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1335   unsigned int nunits;
1336   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1337   tree def;
1338   enum vect_def_type dt;
1339   bool is_simple_use;
1340   tree vector_type;
1341 
1342   if (dump_enabled_p ())
1343     {
1344       dump_printf_loc (MSG_NOTE, vect_location,
1345                        "vect_get_vec_def_for_operand: ");
1346       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1347     }
1348 
1349   is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 				      &def_stmt, &def, &dt);
1351   gcc_assert (is_simple_use);
1352   if (dump_enabled_p ())
1353     {
1354       int loc_printed = 0;
1355       if (def)
1356         {
1357           dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
1358           loc_printed = 1;
1359           dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1360         }
1361       if (def_stmt)
1362         {
1363           if (loc_printed)
1364             dump_printf (MSG_NOTE, "  def_stmt =  ");
1365           else
1366             dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1367 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1368         }
1369     }
1370 
1371   switch (dt)
1372     {
1373     /* Case 1: operand is a constant.  */
1374     case vect_constant_def:
1375       {
1376 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 	gcc_assert (vector_type);
1378 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1379 
1380 	if (scalar_def)
1381 	  *scalar_def = op;
1382 
1383         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1384         if (dump_enabled_p ())
1385           dump_printf_loc (MSG_NOTE, vect_location,
1386                            "Create vector_cst. nunits = %d", nunits);
1387 
1388         return vect_init_vector (stmt, op, vector_type, NULL);
1389       }
1390 
1391     /* Case 2: operand is defined outside the loop - loop invariant.  */
1392     case vect_external_def:
1393       {
1394 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 	gcc_assert (vector_type);
1396 
1397 	if (scalar_def)
1398 	  *scalar_def = def;
1399 
1400         /* Create 'vec_inv = {inv,inv,..,inv}'  */
1401         if (dump_enabled_p ())
1402           dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1403 
1404         return vect_init_vector (stmt, def, vector_type, NULL);
1405       }
1406 
1407     /* Case 3: operand is defined inside the loop.  */
1408     case vect_internal_def:
1409       {
1410 	if (scalar_def)
1411 	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412 
1413         /* Get the def from the vectorized stmt.  */
1414         def_stmt_info = vinfo_for_stmt (def_stmt);
1415 
1416         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1417         /* Get vectorized pattern statement.  */
1418         if (!vec_stmt
1419             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420             && !STMT_VINFO_RELEVANT (def_stmt_info))
1421           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1423         gcc_assert (vec_stmt);
1424 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 	  vec_oprnd = PHI_RESULT (vec_stmt);
1426 	else if (is_gimple_call (vec_stmt))
1427 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1428 	else
1429 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1430         return vec_oprnd;
1431       }
1432 
1433     /* Case 4: operand is defined by a loop header phi - reduction  */
1434     case vect_reduction_def:
1435     case vect_double_reduction_def:
1436     case vect_nested_cycle:
1437       {
1438 	struct loop *loop;
1439 
1440 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1441 	loop = (gimple_bb (def_stmt))->loop_father;
1442 
1443         /* Get the def before the loop  */
1444         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445         return get_initial_def_for_reduction (stmt, op, scalar_def);
1446      }
1447 
1448     /* Case 5: operand is defined by loop-header phi - induction.  */
1449     case vect_induction_def:
1450       {
1451 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452 
1453         /* Get the def from the vectorized stmt.  */
1454         def_stmt_info = vinfo_for_stmt (def_stmt);
1455         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1456 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 	  vec_oprnd = PHI_RESULT (vec_stmt);
1458 	else
1459 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1460         return vec_oprnd;
1461       }
1462 
1463     default:
1464       gcc_unreachable ();
1465     }
1466 }
1467 
1468 
1469 /* Function vect_get_vec_def_for_stmt_copy
1470 
1471    Return a vector-def for an operand.  This function is used when the
1472    vectorized stmt to be created (by the caller to this function) is a "copy"
1473    created in case the vectorized result cannot fit in one vector, and several
1474    copies of the vector-stmt are required.  In this case the vector-def is
1475    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476    of the stmt that defines VEC_OPRND.
1477    DT is the type of the vector def VEC_OPRND.
1478 
1479    Context:
1480         In case the vectorization factor (VF) is bigger than the number
1481    of elements that can fit in a vectype (nunits), we have to generate
1482    more than one vector stmt to vectorize the scalar stmt.  This situation
1483    arises when there are multiple data-types operated upon in the loop; the
1484    smallest data-type determines the VF, and as a result, when vectorizing
1485    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486    vector stmt (each computing a vector of 'nunits' results, and together
1487    computing 'VF' results in each iteration).  This function is called when
1488    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489    which VF=16 and nunits=4, so the number of copies required is 4):
1490 
1491    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1492 
1493    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1494                         VS1.1:  vx.1 = memref1      VS1.2
1495                         VS1.2:  vx.2 = memref2      VS1.3
1496                         VS1.3:  vx.3 = memref3
1497 
1498    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1499                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1500                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1501                         VSnew.3:  vz3 = vx.3 + ...
1502 
1503    The vectorization of S1 is explained in vectorizable_load.
1504    The vectorization of S2:
1505         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506    the function 'vect_get_vec_def_for_operand' is called to
1507    get the relevant vector-def for each operand of S2.  For operand x it
1508    returns  the vector-def 'vx.0'.
1509 
1510         To create the remaining copies of the vector-stmt (VSnew.j), this
1511    function is called to get the relevant vector-def for each operand.  It is
1512    obtained from the respective VS1.j stmt, which is recorded in the
1513    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514 
1515         For example, to obtain the vector-def 'vx.1' in order to create the
1516    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519    and return its def ('vx.1').
1520    Overall, to create the above sequence this function will be called 3 times:
1521         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1524 
1525 tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527 {
1528   gimple vec_stmt_for_operand;
1529   stmt_vec_info def_stmt_info;
1530 
1531   /* Do nothing; can reuse same def.  */
1532   if (dt == vect_external_def || dt == vect_constant_def )
1533     return vec_oprnd;
1534 
1535   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537   gcc_assert (def_stmt_info);
1538   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539   gcc_assert (vec_stmt_for_operand);
1540   vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543   else
1544     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545   return vec_oprnd;
1546 }
1547 
1548 
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1551 
1552 static void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554 				 vec<tree> *vec_oprnds0,
1555 				 vec<tree> *vec_oprnds1)
1556 {
1557   tree vec_oprnd = vec_oprnds0->pop ();
1558 
1559   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560   vec_oprnds0->quick_push (vec_oprnd);
1561 
1562   if (vec_oprnds1 && vec_oprnds1->length ())
1563     {
1564       vec_oprnd = vec_oprnds1->pop ();
1565       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566       vec_oprnds1->quick_push (vec_oprnd);
1567     }
1568 }
1569 
1570 
1571 /* Get vectorized definitions for OP0 and OP1.
1572    REDUC_INDEX is the index of reduction operand in case of reduction,
1573    and -1 otherwise.  */
1574 
1575 void
vect_get_vec_defs(tree op0,tree op1,gimple stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node,int reduc_index)1576 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1577 		   vec<tree> *vec_oprnds0,
1578 		   vec<tree> *vec_oprnds1,
1579 		   slp_tree slp_node, int reduc_index)
1580 {
1581   if (slp_node)
1582     {
1583       int nops = (op1 == NULL_TREE) ? 1 : 2;
1584       vec<tree> ops;
1585       ops.create (nops);
1586       vec<vec<tree> > vec_defs;
1587       vec_defs.create (nops);
1588 
1589       ops.quick_push (op0);
1590       if (op1)
1591         ops.quick_push (op1);
1592 
1593       vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594 
1595       *vec_oprnds0 = vec_defs[0];
1596       if (op1)
1597 	*vec_oprnds1 = vec_defs[1];
1598 
1599       ops.release ();
1600       vec_defs.release ();
1601     }
1602   else
1603     {
1604       tree vec_oprnd;
1605 
1606       vec_oprnds0->create (1);
1607       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1608       vec_oprnds0->quick_push (vec_oprnd);
1609 
1610       if (op1)
1611 	{
1612 	  vec_oprnds1->create (1);
1613 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1614 	  vec_oprnds1->quick_push (vec_oprnd);
1615 	}
1616     }
1617 }
1618 
1619 
1620 /* Function vect_finish_stmt_generation.
1621 
1622    Insert a new stmt.  */
1623 
1624 void
vect_finish_stmt_generation(gimple stmt,gimple vec_stmt,gimple_stmt_iterator * gsi)1625 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 			     gimple_stmt_iterator *gsi)
1627 {
1628   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1630   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1631 
1632   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633 
1634   if (!gsi_end_p (*gsi)
1635       && gimple_has_mem_ops (vec_stmt))
1636     {
1637       gimple at_stmt = gsi_stmt (*gsi);
1638       tree vuse = gimple_vuse (at_stmt);
1639       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 	{
1641 	  tree vdef = gimple_vdef (at_stmt);
1642 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 	  /* If we have an SSA vuse and insert a store, update virtual
1644 	     SSA form to avoid triggering the renamer.  Do so only
1645 	     if we can easily see all uses - which is what almost always
1646 	     happens with the way vectorized stmts are inserted.  */
1647 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 	      && ((is_gimple_assign (vec_stmt)
1649 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 		  || (is_gimple_call (vec_stmt)
1651 		      && !(gimple_call_flags (vec_stmt)
1652 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 	    {
1654 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 	      gimple_set_vdef (vec_stmt, new_vdef);
1656 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 	    }
1658 	}
1659     }
1660   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661 
1662   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1663                                                    bb_vinfo));
1664 
1665   if (dump_enabled_p ())
1666     {
1667       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1669     }
1670 
1671   gimple_set_location (vec_stmt, gimple_location (stmt));
1672 }
1673 
1674 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
1675    a function declaration if the target has a vectorized version
1676    of the function, or NULL_TREE if the function cannot be vectorized.  */
1677 
1678 tree
vectorizable_function(gimple call,tree vectype_out,tree vectype_in)1679 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680 {
1681   tree fndecl = gimple_call_fndecl (call);
1682 
1683   /* We only handle functions that do not read or clobber memory -- i.e.
1684      const or novops ones.  */
1685   if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686     return NULL_TREE;
1687 
1688   if (!fndecl
1689       || TREE_CODE (fndecl) != FUNCTION_DECL
1690       || !DECL_BUILT_IN (fndecl))
1691     return NULL_TREE;
1692 
1693   return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1694 						        vectype_in);
1695 }
1696 
1697 /* Function vectorizable_call.
1698 
1699    Check if STMT performs a function call that can be vectorized.
1700    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1703 
1704 static bool
vectorizable_call(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)1705 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 		   slp_tree slp_node)
1707 {
1708   tree vec_dest;
1709   tree scalar_dest;
1710   tree op, type;
1711   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713   tree vectype_out, vectype_in;
1714   int nunits_in;
1715   int nunits_out;
1716   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1717   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1718   tree fndecl, new_temp, def, rhs_type;
1719   gimple def_stmt;
1720   enum vect_def_type dt[3]
1721     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1722   gimple new_stmt = NULL;
1723   int ncopies, j;
1724   vec<tree> vargs = vNULL;
1725   enum { NARROW, NONE, WIDEN } modifier;
1726   size_t i, nargs;
1727   tree lhs;
1728 
1729   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1730     return false;
1731 
1732   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1733     return false;
1734 
1735   /* Is STMT a vectorizable call?   */
1736   if (!is_gimple_call (stmt))
1737     return false;
1738 
1739   if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740     return false;
1741 
1742   if (stmt_can_throw_internal (stmt))
1743     return false;
1744 
1745   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746 
1747   /* Process function arguments.  */
1748   rhs_type = NULL_TREE;
1749   vectype_in = NULL_TREE;
1750   nargs = gimple_call_num_args (stmt);
1751 
1752   /* Bail out if the function has more than three arguments, we do not have
1753      interesting builtin functions to vectorize with more than two arguments
1754      except for fma.  No arguments is also not good.  */
1755   if (nargs == 0 || nargs > 3)
1756     return false;
1757 
1758   for (i = 0; i < nargs; i++)
1759     {
1760       tree opvectype;
1761 
1762       op = gimple_call_arg (stmt, i);
1763 
1764       /* We can only handle calls with arguments of the same type.  */
1765       if (rhs_type
1766 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1767 	{
1768 	  if (dump_enabled_p ())
1769 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770                              "argument types differ.");
1771 	  return false;
1772 	}
1773       if (!rhs_type)
1774 	rhs_type = TREE_TYPE (op);
1775 
1776       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1777 				 &def_stmt, &def, &dt[i], &opvectype))
1778 	{
1779 	  if (dump_enabled_p ())
1780 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781                              "use not simple.");
1782 	  return false;
1783 	}
1784 
1785       if (!vectype_in)
1786 	vectype_in = opvectype;
1787       else if (opvectype
1788 	       && opvectype != vectype_in)
1789 	{
1790 	  if (dump_enabled_p ())
1791 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1792                              "argument vector types differ.");
1793 	  return false;
1794 	}
1795     }
1796   /* If all arguments are external or constant defs use a vector type with
1797      the same size as the output vector type.  */
1798   if (!vectype_in)
1799     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1800   if (vec_stmt)
1801     gcc_assert (vectype_in);
1802   if (!vectype_in)
1803     {
1804       if (dump_enabled_p ())
1805         {
1806           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807                            "no vectype for scalar type ");
1808           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1809         }
1810 
1811       return false;
1812     }
1813 
1814   /* FORNOW */
1815   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1816   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1817   if (nunits_in == nunits_out / 2)
1818     modifier = NARROW;
1819   else if (nunits_out == nunits_in)
1820     modifier = NONE;
1821   else if (nunits_out == nunits_in / 2)
1822     modifier = WIDEN;
1823   else
1824     return false;
1825 
1826   /* For now, we only vectorize functions if a target specific builtin
1827      is available.  TODO -- in some cases, it might be profitable to
1828      insert the calls for pieces of the vector, in order to be able
1829      to vectorize other operations in the loop.  */
1830   fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1831   if (fndecl == NULL_TREE)
1832     {
1833       if (dump_enabled_p ())
1834 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835                          "function is not vectorizable.");
1836 
1837       return false;
1838     }
1839 
1840   gcc_assert (!gimple_vuse (stmt));
1841 
1842   if (slp_node || PURE_SLP_STMT (stmt_info))
1843     ncopies = 1;
1844   else if (modifier == NARROW)
1845     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1846   else
1847     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1848 
1849   /* Sanity check: make sure that at least one copy of the vectorized stmt
1850      needs to be generated.  */
1851   gcc_assert (ncopies >= 1);
1852 
1853   if (!vec_stmt) /* transformation not required.  */
1854     {
1855       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1856       if (dump_enabled_p ())
1857         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1858       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1859       return true;
1860     }
1861 
1862   /** Transform.  **/
1863 
1864   if (dump_enabled_p ())
1865     dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1866 
1867   /* Handle def.  */
1868   scalar_dest = gimple_call_lhs (stmt);
1869   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1870 
1871   prev_stmt_info = NULL;
1872   switch (modifier)
1873     {
1874     case NONE:
1875       for (j = 0; j < ncopies; ++j)
1876 	{
1877 	  /* Build argument list for the vectorized call.  */
1878 	  if (j == 0)
1879 	    vargs.create (nargs);
1880 	  else
1881 	    vargs.truncate (0);
1882 
1883 	  if (slp_node)
1884 	    {
1885 	      vec<vec<tree> > vec_defs;
1886 	      vec_defs.create (nargs);
1887 	      vec<tree> vec_oprnds0;
1888 
1889 	      for (i = 0; i < nargs; i++)
1890 		vargs.quick_push (gimple_call_arg (stmt, i));
1891 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1892 	      vec_oprnds0 = vec_defs[0];
1893 
1894 	      /* Arguments are ready.  Create the new vector stmt.  */
1895 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1896 		{
1897 		  size_t k;
1898 		  for (k = 0; k < nargs; k++)
1899 		    {
1900 		      vec<tree> vec_oprndsk = vec_defs[k];
1901 		      vargs[k] = vec_oprndsk[i];
1902 		    }
1903 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1904 		  new_temp = make_ssa_name (vec_dest, new_stmt);
1905 		  gimple_call_set_lhs (new_stmt, new_temp);
1906 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1907 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1908 		}
1909 
1910 	      for (i = 0; i < nargs; i++)
1911 		{
1912 		  vec<tree> vec_oprndsi = vec_defs[i];
1913 		  vec_oprndsi.release ();
1914 		}
1915 	      vec_defs.release ();
1916 	      continue;
1917 	    }
1918 
1919 	  for (i = 0; i < nargs; i++)
1920 	    {
1921 	      op = gimple_call_arg (stmt, i);
1922 	      if (j == 0)
1923 		vec_oprnd0
1924 		  = vect_get_vec_def_for_operand (op, stmt, NULL);
1925 	      else
1926 		{
1927 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1928 		  vec_oprnd0
1929                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1930 		}
1931 
1932 	      vargs.quick_push (vec_oprnd0);
1933 	    }
1934 
1935 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1936 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1937 	  gimple_call_set_lhs (new_stmt, new_temp);
1938 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1939 
1940 	  if (j == 0)
1941 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1942 	  else
1943 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1944 
1945 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1946 	}
1947 
1948       break;
1949 
1950     case NARROW:
1951       for (j = 0; j < ncopies; ++j)
1952 	{
1953 	  /* Build argument list for the vectorized call.  */
1954 	  if (j == 0)
1955 	    vargs.create (nargs * 2);
1956 	  else
1957 	    vargs.truncate (0);
1958 
1959 	  if (slp_node)
1960 	    {
1961 	      vec<vec<tree> > vec_defs;
1962 	      vec_defs.create (nargs);
1963 	      vec<tree> vec_oprnds0;
1964 
1965 	      for (i = 0; i < nargs; i++)
1966 		vargs.quick_push (gimple_call_arg (stmt, i));
1967 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1968 	      vec_oprnds0 = vec_defs[0];
1969 
1970 	      /* Arguments are ready.  Create the new vector stmt.  */
1971 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
1972 		{
1973 		  size_t k;
1974 		  vargs.truncate (0);
1975 		  for (k = 0; k < nargs; k++)
1976 		    {
1977 		      vec<tree> vec_oprndsk = vec_defs[k];
1978 		      vargs.quick_push (vec_oprndsk[i]);
1979 		      vargs.quick_push (vec_oprndsk[i + 1]);
1980 		    }
1981 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1982 		  new_temp = make_ssa_name (vec_dest, new_stmt);
1983 		  gimple_call_set_lhs (new_stmt, new_temp);
1984 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1986 		}
1987 
1988 	      for (i = 0; i < nargs; i++)
1989 		{
1990 		  vec<tree> vec_oprndsi = vec_defs[i];
1991 		  vec_oprndsi.release ();
1992 		}
1993 	      vec_defs.release ();
1994 	      continue;
1995 	    }
1996 
1997 	  for (i = 0; i < nargs; i++)
1998 	    {
1999 	      op = gimple_call_arg (stmt, i);
2000 	      if (j == 0)
2001 		{
2002 		  vec_oprnd0
2003 		    = vect_get_vec_def_for_operand (op, stmt, NULL);
2004 		  vec_oprnd1
2005 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2006 		}
2007 	      else
2008 		{
2009 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2010 		  vec_oprnd0
2011 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2012 		  vec_oprnd1
2013 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2014 		}
2015 
2016 	      vargs.quick_push (vec_oprnd0);
2017 	      vargs.quick_push (vec_oprnd1);
2018 	    }
2019 
2020 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
2021 	  new_temp = make_ssa_name (vec_dest, new_stmt);
2022 	  gimple_call_set_lhs (new_stmt, new_temp);
2023 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024 
2025 	  if (j == 0)
2026 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2027 	  else
2028 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2029 
2030 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2031 	}
2032 
2033       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2034 
2035       break;
2036 
2037     case WIDEN:
2038       /* No current target implements this case.  */
2039       return false;
2040     }
2041 
2042   vargs.release ();
2043 
2044   /* Update the exception handling table with the vector stmt if necessary.  */
2045   if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2046     gimple_purge_dead_eh_edges (gimple_bb (stmt));
2047 
2048   /* The call in STMT might prevent it from being removed in dce.
2049      We however cannot remove it here, due to the way the ssa name
2050      it defines is mapped to the new definition.  So just replace
2051      rhs of the statement with something harmless.  */
2052 
2053   if (slp_node)
2054     return true;
2055 
2056   type = TREE_TYPE (scalar_dest);
2057   if (is_pattern_stmt_p (stmt_info))
2058     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2059   else
2060     lhs = gimple_call_lhs (stmt);
2061   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2062   set_vinfo_for_stmt (new_stmt, stmt_info);
2063   set_vinfo_for_stmt (stmt, NULL);
2064   STMT_VINFO_STMT (stmt_info) = new_stmt;
2065   gsi_replace (gsi, new_stmt, false);
2066   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2067 
2068   return true;
2069 }
2070 
2071 
2072 /* Function vect_gen_widened_results_half
2073 
2074    Create a vector stmt whose code, type, number of arguments, and result
2075    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2076    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
2077    In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078    needs to be created (DECL is a function-decl of a target-builtin).
2079    STMT is the original scalar stmt that we are vectorizing.  */
2080 
2081 static gimple
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple stmt)2082 vect_gen_widened_results_half (enum tree_code code,
2083 			       tree decl,
2084                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
2085 			       tree vec_dest, gimple_stmt_iterator *gsi,
2086 			       gimple stmt)
2087 {
2088   gimple new_stmt;
2089   tree new_temp;
2090 
2091   /* Generate half of the widened result:  */
2092   if (code == CALL_EXPR)
2093     {
2094       /* Target specific support  */
2095       if (op_type == binary_op)
2096 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2097       else
2098 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2099       new_temp = make_ssa_name (vec_dest, new_stmt);
2100       gimple_call_set_lhs (new_stmt, new_temp);
2101     }
2102   else
2103     {
2104       /* Generic support */
2105       gcc_assert (op_type == TREE_CODE_LENGTH (code));
2106       if (op_type != binary_op)
2107 	vec_oprnd1 = NULL;
2108       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2109 					       vec_oprnd1);
2110       new_temp = make_ssa_name (vec_dest, new_stmt);
2111       gimple_assign_set_lhs (new_stmt, new_temp);
2112     }
2113   vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114 
2115   return new_stmt;
2116 }
2117 
2118 
2119 /* Get vectorized definitions for loop-based vectorization.  For the first
2120    operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121    scalar operand), and for the rest we get a copy with
2122    vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124    The vectors are collected into VEC_OPRNDS.  */
2125 
2126 static void
vect_get_loop_based_defs(tree * oprnd,gimple stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)2127 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2128 			  vec<tree> *vec_oprnds, int multi_step_cvt)
2129 {
2130   tree vec_oprnd;
2131 
2132   /* Get first vector operand.  */
2133   /* All the vector operands except the very first one (that is scalar oprnd)
2134      are stmt copies.  */
2135   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2136     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2137   else
2138     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2139 
2140   vec_oprnds->quick_push (vec_oprnd);
2141 
2142   /* Get second vector operand.  */
2143   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2144   vec_oprnds->quick_push (vec_oprnd);
2145 
2146   *oprnd = vec_oprnd;
2147 
2148   /* For conversion in multiple steps, continue to get operands
2149      recursively.  */
2150   if (multi_step_cvt)
2151     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2152 }
2153 
2154 
2155 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156    For multi-step conversions store the resulting vectors and call the function
2157    recursively.  */
2158 
2159 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)2160 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2161 				       int multi_step_cvt, gimple stmt,
2162 				       vec<tree> vec_dsts,
2163 				       gimple_stmt_iterator *gsi,
2164 				       slp_tree slp_node, enum tree_code code,
2165 				       stmt_vec_info *prev_stmt_info)
2166 {
2167   unsigned int i;
2168   tree vop0, vop1, new_tmp, vec_dest;
2169   gimple new_stmt;
2170   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171 
2172   vec_dest = vec_dsts.pop ();
2173 
2174   for (i = 0; i < vec_oprnds->length (); i += 2)
2175     {
2176       /* Create demotion operation.  */
2177       vop0 = (*vec_oprnds)[i];
2178       vop1 = (*vec_oprnds)[i + 1];
2179       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2180       new_tmp = make_ssa_name (vec_dest, new_stmt);
2181       gimple_assign_set_lhs (new_stmt, new_tmp);
2182       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2183 
2184       if (multi_step_cvt)
2185 	/* Store the resulting vector for next recursive call.  */
2186 	(*vec_oprnds)[i/2] = new_tmp;
2187       else
2188 	{
2189 	  /* This is the last step of the conversion sequence. Store the
2190 	     vectors in SLP_NODE or in vector info of the scalar statement
2191 	     (or in STMT_VINFO_RELATED_STMT chain).  */
2192 	  if (slp_node)
2193 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2194 	  else
2195 	    {
2196 	      if (!*prev_stmt_info)
2197 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2198 	      else
2199 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2200 
2201 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
2202 	    }
2203 	}
2204     }
2205 
2206   /* For multi-step demotion operations we first generate demotion operations
2207      from the source type to the intermediate types, and then combine the
2208      results (stored in VEC_OPRNDS) in demotion operation to the destination
2209      type.  */
2210   if (multi_step_cvt)
2211     {
2212       /* At each level of recursion we have half of the operands we had at the
2213 	 previous level.  */
2214       vec_oprnds->truncate ((i+1)/2);
2215       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2216 					     stmt, vec_dsts, gsi, slp_node,
2217 					     VEC_PACK_TRUNC_EXPR,
2218 					     prev_stmt_info);
2219     }
2220 
2221   vec_dsts.quick_push (vec_dest);
2222 }
2223 
2224 
2225 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
2227    the resulting vectors and call the function recursively.  */
2228 
2229 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)2230 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2231 					vec<tree> *vec_oprnds1,
2232 					gimple stmt, tree vec_dest,
2233 					gimple_stmt_iterator *gsi,
2234 					enum tree_code code1,
2235 					enum tree_code code2, tree decl1,
2236 					tree decl2, int op_type)
2237 {
2238   int i;
2239   tree vop0, vop1, new_tmp1, new_tmp2;
2240   gimple new_stmt1, new_stmt2;
2241   vec<tree> vec_tmp = vNULL;
2242 
2243   vec_tmp.create (vec_oprnds0->length () * 2);
2244   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2245     {
2246       if (op_type == binary_op)
2247 	vop1 = (*vec_oprnds1)[i];
2248       else
2249 	vop1 = NULL_TREE;
2250 
2251       /* Generate the two halves of promotion operation.  */
2252       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2253 						 op_type, vec_dest, gsi, stmt);
2254       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2255 						 op_type, vec_dest, gsi, stmt);
2256       if (is_gimple_call (new_stmt1))
2257 	{
2258 	  new_tmp1 = gimple_call_lhs (new_stmt1);
2259 	  new_tmp2 = gimple_call_lhs (new_stmt2);
2260 	}
2261       else
2262 	{
2263 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
2264 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
2265 	}
2266 
2267       /* Store the results for the next step.  */
2268       vec_tmp.quick_push (new_tmp1);
2269       vec_tmp.quick_push (new_tmp2);
2270     }
2271 
2272   vec_oprnds0->release ();
2273   *vec_oprnds0 = vec_tmp;
2274 }
2275 
2276 
2277 /* Check if STMT performs a conversion operation, that can be vectorized.
2278    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2280    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2281 
2282 static bool
vectorizable_conversion(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)2283 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2284 			 gimple *vec_stmt, slp_tree slp_node)
2285 {
2286   tree vec_dest;
2287   tree scalar_dest;
2288   tree op0, op1 = NULL_TREE;
2289   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2290   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2292   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2293   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2294   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2295   tree new_temp;
2296   tree def;
2297   gimple def_stmt;
2298   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2299   gimple new_stmt = NULL;
2300   stmt_vec_info prev_stmt_info;
2301   int nunits_in;
2302   int nunits_out;
2303   tree vectype_out, vectype_in;
2304   int ncopies, i, j;
2305   tree lhs_type, rhs_type;
2306   enum { NARROW, NONE, WIDEN } modifier;
2307   vec<tree> vec_oprnds0 = vNULL;
2308   vec<tree> vec_oprnds1 = vNULL;
2309   tree vop0;
2310   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2311   int multi_step_cvt = 0;
2312   vec<tree> vec_dsts = vNULL;
2313   vec<tree> interm_types = vNULL;
2314   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2315   int op_type;
2316   enum machine_mode rhs_mode;
2317   unsigned short fltsz;
2318 
2319   /* Is STMT a vectorizable conversion?   */
2320 
2321   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2322     return false;
2323 
2324   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2325     return false;
2326 
2327   if (!is_gimple_assign (stmt))
2328     return false;
2329 
2330   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2331     return false;
2332 
2333   code = gimple_assign_rhs_code (stmt);
2334   if (!CONVERT_EXPR_CODE_P (code)
2335       && code != FIX_TRUNC_EXPR
2336       && code != FLOAT_EXPR
2337       && code != WIDEN_MULT_EXPR
2338       && code != WIDEN_LSHIFT_EXPR)
2339     return false;
2340 
2341   op_type = TREE_CODE_LENGTH (code);
2342 
2343   /* Check types of lhs and rhs.  */
2344   scalar_dest = gimple_assign_lhs (stmt);
2345   lhs_type = TREE_TYPE (scalar_dest);
2346   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2347 
2348   op0 = gimple_assign_rhs1 (stmt);
2349   rhs_type = TREE_TYPE (op0);
2350 
2351   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2352       && !((INTEGRAL_TYPE_P (lhs_type)
2353 	    && INTEGRAL_TYPE_P (rhs_type))
2354 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
2355 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
2356     return false;
2357 
2358   if ((INTEGRAL_TYPE_P (lhs_type)
2359        && (TYPE_PRECISION (lhs_type)
2360 	   != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2361       || (INTEGRAL_TYPE_P (rhs_type)
2362 	  && (TYPE_PRECISION (rhs_type)
2363 	      != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2364     {
2365       if (dump_enabled_p ())
2366 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367                          "type conversion to/from bit-precision unsupported.");
2368       return false;
2369     }
2370 
2371   /* Check the operands of the operation.  */
2372   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2373 			     &def_stmt, &def, &dt[0], &vectype_in))
2374     {
2375       if (dump_enabled_p ())
2376 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377                          "use not simple.");
2378       return false;
2379     }
2380   if (op_type == binary_op)
2381     {
2382       bool ok;
2383 
2384       op1 = gimple_assign_rhs2 (stmt);
2385       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2386       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2387 	 OP1.  */
2388       if (CONSTANT_CLASS_P (op0))
2389 	ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2390 				   &def_stmt, &def, &dt[1], &vectype_in);
2391       else
2392 	ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2393 				 &def, &dt[1]);
2394 
2395       if (!ok)
2396 	{
2397           if (dump_enabled_p ())
2398             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399                              "use not simple.");
2400 	  return false;
2401 	}
2402     }
2403 
2404   /* If op0 is an external or constant defs use a vector type of
2405      the same size as the output vector type.  */
2406   if (!vectype_in)
2407     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2408   if (vec_stmt)
2409     gcc_assert (vectype_in);
2410   if (!vectype_in)
2411     {
2412       if (dump_enabled_p ())
2413 	{
2414 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415                            "no vectype for scalar type ");
2416 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2417 	}
2418 
2419       return false;
2420     }
2421 
2422   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2423   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2424   if (nunits_in < nunits_out)
2425     modifier = NARROW;
2426   else if (nunits_out == nunits_in)
2427     modifier = NONE;
2428   else
2429     modifier = WIDEN;
2430 
2431   /* Multiple types in SLP are handled by creating the appropriate number of
2432      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2433      case of SLP.  */
2434   if (slp_node || PURE_SLP_STMT (stmt_info))
2435     ncopies = 1;
2436   else if (modifier == NARROW)
2437     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2438   else
2439     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2440 
2441   /* Sanity check: make sure that at least one copy of the vectorized stmt
2442      needs to be generated.  */
2443   gcc_assert (ncopies >= 1);
2444 
2445   /* Supportable by target?  */
2446   switch (modifier)
2447     {
2448     case NONE:
2449       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2450 	return false;
2451       if (supportable_convert_operation (code, vectype_out, vectype_in,
2452 					 &decl1, &code1))
2453 	break;
2454       /* FALLTHRU */
2455     unsupported:
2456       if (dump_enabled_p ())
2457 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458                          "conversion not supported by target.");
2459       return false;
2460 
2461     case WIDEN:
2462       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2463 					  &code1, &code2, &multi_step_cvt,
2464 					  &interm_types))
2465 	{
2466 	  /* Binary widening operation can only be supported directly by the
2467 	     architecture.  */
2468 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
2469 	  break;
2470 	}
2471 
2472       if (code != FLOAT_EXPR
2473 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2474 	      <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2475 	goto unsupported;
2476 
2477       rhs_mode = TYPE_MODE (rhs_type);
2478       fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2479       for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2480 	   rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2481 	   rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2482 	{
2483 	  cvt_type
2484 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 	  if (cvt_type == NULL_TREE)
2487 	    goto unsupported;
2488 
2489 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
2490 	    {
2491 	      if (!supportable_convert_operation (code, vectype_out,
2492 						  cvt_type, &decl1, &codecvt1))
2493 		goto unsupported;
2494 	    }
2495 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
2496 						    cvt_type, &codecvt1,
2497 						    &codecvt2, &multi_step_cvt,
2498 						    &interm_types))
2499 	    continue;
2500 	  else
2501 	    gcc_assert (multi_step_cvt == 0);
2502 
2503 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2504 					      vectype_in, &code1, &code2,
2505 					      &multi_step_cvt, &interm_types))
2506 	    break;
2507 	}
2508 
2509       if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2510 	goto unsupported;
2511 
2512       if (GET_MODE_SIZE (rhs_mode) == fltsz)
2513 	codecvt2 = ERROR_MARK;
2514       else
2515 	{
2516 	  multi_step_cvt++;
2517 	  interm_types.safe_push (cvt_type);
2518 	  cvt_type = NULL_TREE;
2519 	}
2520       break;
2521 
2522     case NARROW:
2523       gcc_assert (op_type == unary_op);
2524       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2525 					   &code1, &multi_step_cvt,
2526 					   &interm_types))
2527 	break;
2528 
2529       if (code != FIX_TRUNC_EXPR
2530 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531 	      >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532 	goto unsupported;
2533 
2534       rhs_mode = TYPE_MODE (rhs_type);
2535       cvt_type
2536 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2537       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2538       if (cvt_type == NULL_TREE)
2539 	goto unsupported;
2540       if (!supportable_convert_operation (code, cvt_type, vectype_in,
2541 					  &decl1, &codecvt1))
2542 	goto unsupported;
2543       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2544 					   &code1, &multi_step_cvt,
2545 					   &interm_types))
2546 	break;
2547       goto unsupported;
2548 
2549     default:
2550       gcc_unreachable ();
2551     }
2552 
2553   if (!vec_stmt)		/* transformation not required.  */
2554     {
2555       if (dump_enabled_p ())
2556 	dump_printf_loc (MSG_NOTE, vect_location,
2557                          "=== vectorizable_conversion ===");
2558       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2559         {
2560 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2561 	  vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2562 	}
2563       else if (modifier == NARROW)
2564 	{
2565 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2566 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2567 	}
2568       else
2569 	{
2570 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2571 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2572 	}
2573       interm_types.release ();
2574       return true;
2575     }
2576 
2577   /** Transform.  **/
2578   if (dump_enabled_p ())
2579     dump_printf_loc (MSG_NOTE, vect_location,
2580                      "transform conversion. ncopies = %d.", ncopies);
2581 
2582   if (op_type == binary_op)
2583     {
2584       if (CONSTANT_CLASS_P (op0))
2585 	op0 = fold_convert (TREE_TYPE (op1), op0);
2586       else if (CONSTANT_CLASS_P (op1))
2587 	op1 = fold_convert (TREE_TYPE (op0), op1);
2588     }
2589 
2590   /* In case of multi-step conversion, we first generate conversion operations
2591      to the intermediate types, and then from that types to the final one.
2592      We create vector destinations for the intermediate type (TYPES) received
2593      from supportable_*_operation, and store them in the correct order
2594      for future use in vect_create_vectorized_*_stmts ().  */
2595   vec_dsts.create (multi_step_cvt + 1);
2596   vec_dest = vect_create_destination_var (scalar_dest,
2597 					  (cvt_type && modifier == WIDEN)
2598 					  ? cvt_type : vectype_out);
2599   vec_dsts.quick_push (vec_dest);
2600 
2601   if (multi_step_cvt)
2602     {
2603       for (i = interm_types.length () - 1;
2604 	   interm_types.iterate (i, &intermediate_type); i--)
2605 	{
2606 	  vec_dest = vect_create_destination_var (scalar_dest,
2607 						  intermediate_type);
2608 	  vec_dsts.quick_push (vec_dest);
2609 	}
2610     }
2611 
2612   if (cvt_type)
2613     vec_dest = vect_create_destination_var (scalar_dest,
2614 					    modifier == WIDEN
2615 					    ? vectype_out : cvt_type);
2616 
2617   if (!slp_node)
2618     {
2619       if (modifier == WIDEN)
2620 	{
2621 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2622 	  if (op_type == binary_op)
2623 	    vec_oprnds1.create (1);
2624 	}
2625       else if (modifier == NARROW)
2626 	vec_oprnds0.create (
2627 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2628     }
2629   else if (code == WIDEN_LSHIFT_EXPR)
2630     vec_oprnds1.create (slp_node->vec_stmts_size);
2631 
2632   last_oprnd = op0;
2633   prev_stmt_info = NULL;
2634   switch (modifier)
2635     {
2636     case NONE:
2637       for (j = 0; j < ncopies; j++)
2638 	{
2639 	  if (j == 0)
2640 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2641 			       -1);
2642 	  else
2643 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2644 
2645 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2646 	    {
2647 	      /* Arguments are ready, create the new vector stmt.  */
2648 	      if (code1 == CALL_EXPR)
2649 		{
2650 		  new_stmt = gimple_build_call (decl1, 1, vop0);
2651 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2652 		  gimple_call_set_lhs (new_stmt, new_temp);
2653 		}
2654 	      else
2655 		{
2656 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2657 		  new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2658 							   vop0, NULL);
2659 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2660 		  gimple_assign_set_lhs (new_stmt, new_temp);
2661 		}
2662 
2663 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2664 	      if (slp_node)
2665 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2666 	    }
2667 
2668 	  if (j == 0)
2669 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2670 	  else
2671 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2672 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2673 	}
2674       break;
2675 
2676     case WIDEN:
2677       /* In case the vectorization factor (VF) is bigger than the number
2678 	 of elements that we can fit in a vectype (nunits), we have to
2679 	 generate more than one vector stmt - i.e - we need to "unroll"
2680 	 the vector stmt by a factor VF/nunits.  */
2681       for (j = 0; j < ncopies; j++)
2682 	{
2683 	  /* Handle uses.  */
2684 	  if (j == 0)
2685 	    {
2686 	      if (slp_node)
2687 		{
2688 		  if (code == WIDEN_LSHIFT_EXPR)
2689 		    {
2690 		      unsigned int k;
2691 
2692 		      vec_oprnd1 = op1;
2693 		      /* Store vec_oprnd1 for every vector stmt to be created
2694 			 for SLP_NODE.  We check during the analysis that all
2695 			 the shift arguments are the same.  */
2696 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2697 			vec_oprnds1.quick_push (vec_oprnd1);
2698 
2699 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2700 					 slp_node, -1);
2701 		    }
2702 		  else
2703 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2704 				       &vec_oprnds1, slp_node, -1);
2705 		}
2706 	      else
2707 		{
2708 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2709 		  vec_oprnds0.quick_push (vec_oprnd0);
2710 		  if (op_type == binary_op)
2711 		    {
2712 		      if (code == WIDEN_LSHIFT_EXPR)
2713 			vec_oprnd1 = op1;
2714 		      else
2715 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2716 								   NULL);
2717 		      vec_oprnds1.quick_push (vec_oprnd1);
2718 		    }
2719 		}
2720 	    }
2721 	  else
2722 	    {
2723 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2724 	      vec_oprnds0.truncate (0);
2725 	      vec_oprnds0.quick_push (vec_oprnd0);
2726 	      if (op_type == binary_op)
2727 		{
2728 		  if (code == WIDEN_LSHIFT_EXPR)
2729 		    vec_oprnd1 = op1;
2730 		  else
2731 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2732 								 vec_oprnd1);
2733 		  vec_oprnds1.truncate (0);
2734 		  vec_oprnds1.quick_push (vec_oprnd1);
2735 		}
2736 	    }
2737 
2738 	  /* Arguments are ready.  Create the new vector stmts.  */
2739 	  for (i = multi_step_cvt; i >= 0; i--)
2740 	    {
2741 	      tree this_dest = vec_dsts[i];
2742 	      enum tree_code c1 = code1, c2 = code2;
2743 	      if (i == 0 && codecvt2 != ERROR_MARK)
2744 		{
2745 		  c1 = codecvt1;
2746 		  c2 = codecvt2;
2747 		}
2748 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2749 						      &vec_oprnds1,
2750 						      stmt, this_dest, gsi,
2751 						      c1, c2, decl1, decl2,
2752 						      op_type);
2753 	    }
2754 
2755 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2756 	    {
2757 	      if (cvt_type)
2758 		{
2759 		  if (codecvt1 == CALL_EXPR)
2760 		    {
2761 		      new_stmt = gimple_build_call (decl1, 1, vop0);
2762 		      new_temp = make_ssa_name (vec_dest, new_stmt);
2763 		      gimple_call_set_lhs (new_stmt, new_temp);
2764 		    }
2765 		  else
2766 		    {
2767 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2768 		      new_temp = make_ssa_name (vec_dest, NULL);
2769 		      new_stmt = gimple_build_assign_with_ops (codecvt1,
2770 							       new_temp,
2771 							       vop0, NULL);
2772 		    }
2773 
2774 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2775 		}
2776 	      else
2777 		new_stmt = SSA_NAME_DEF_STMT (vop0);
2778 
2779 	      if (slp_node)
2780 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2781 	      else
2782 		{
2783 		  if (!prev_stmt_info)
2784 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2785 		  else
2786 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2787 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
2788 		}
2789 	    }
2790 	}
2791 
2792       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2793       break;
2794 
2795     case NARROW:
2796       /* In case the vectorization factor (VF) is bigger than the number
2797 	 of elements that we can fit in a vectype (nunits), we have to
2798 	 generate more than one vector stmt - i.e - we need to "unroll"
2799 	 the vector stmt by a factor VF/nunits.  */
2800       for (j = 0; j < ncopies; j++)
2801 	{
2802 	  /* Handle uses.  */
2803 	  if (slp_node)
2804 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2805 			       slp_node, -1);
2806 	  else
2807 	    {
2808 	      vec_oprnds0.truncate (0);
2809 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2810 					vect_pow2 (multi_step_cvt) - 1);
2811 	    }
2812 
2813 	  /* Arguments are ready.  Create the new vector stmts.  */
2814 	  if (cvt_type)
2815 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2816 	      {
2817 		if (codecvt1 == CALL_EXPR)
2818 		  {
2819 		    new_stmt = gimple_build_call (decl1, 1, vop0);
2820 		    new_temp = make_ssa_name (vec_dest, new_stmt);
2821 		    gimple_call_set_lhs (new_stmt, new_temp);
2822 		  }
2823 		else
2824 		  {
2825 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2826 		    new_temp = make_ssa_name (vec_dest, NULL);
2827 		    new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2828 							     vop0, NULL);
2829 		  }
2830 
2831 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
2832 		vec_oprnds0[i] = new_temp;
2833 	      }
2834 
2835 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2836 						 stmt, vec_dsts, gsi,
2837 						 slp_node, code1,
2838 						 &prev_stmt_info);
2839 	}
2840 
2841       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2842       break;
2843     }
2844 
2845   vec_oprnds0.release ();
2846   vec_oprnds1.release ();
2847   vec_dsts.release ();
2848   interm_types.release ();
2849 
2850   return true;
2851 }
2852 
2853 
2854 /* Function vectorizable_assignment.
2855 
2856    Check if STMT performs an assignment (copy) that can be vectorized.
2857    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2858    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2859    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2860 
2861 static bool
vectorizable_assignment(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)2862 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2863 			 gimple *vec_stmt, slp_tree slp_node)
2864 {
2865   tree vec_dest;
2866   tree scalar_dest;
2867   tree op;
2868   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2870   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2871   tree new_temp;
2872   tree def;
2873   gimple def_stmt;
2874   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2875   unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2876   int ncopies;
2877   int i, j;
2878   vec<tree> vec_oprnds = vNULL;
2879   tree vop;
2880   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2881   gimple new_stmt = NULL;
2882   stmt_vec_info prev_stmt_info = NULL;
2883   enum tree_code code;
2884   tree vectype_in;
2885 
2886   /* Multiple types in SLP are handled by creating the appropriate number of
2887      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2888      case of SLP.  */
2889   if (slp_node || PURE_SLP_STMT (stmt_info))
2890     ncopies = 1;
2891   else
2892     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2893 
2894   gcc_assert (ncopies >= 1);
2895 
2896   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2897     return false;
2898 
2899   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2900     return false;
2901 
2902   /* Is vectorizable assignment?  */
2903   if (!is_gimple_assign (stmt))
2904     return false;
2905 
2906   scalar_dest = gimple_assign_lhs (stmt);
2907   if (TREE_CODE (scalar_dest) != SSA_NAME)
2908     return false;
2909 
2910   code = gimple_assign_rhs_code (stmt);
2911   if (gimple_assign_single_p (stmt)
2912       || code == PAREN_EXPR
2913       || CONVERT_EXPR_CODE_P (code))
2914     op = gimple_assign_rhs1 (stmt);
2915   else
2916     return false;
2917 
2918   if (code == VIEW_CONVERT_EXPR)
2919     op = TREE_OPERAND (op, 0);
2920 
2921   if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2922 			     &def_stmt, &def, &dt[0], &vectype_in))
2923     {
2924       if (dump_enabled_p ())
2925         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926                          "use not simple.");
2927       return false;
2928     }
2929 
2930   /* We can handle NOP_EXPR conversions that do not change the number
2931      of elements or the vector size.  */
2932   if ((CONVERT_EXPR_CODE_P (code)
2933        || code == VIEW_CONVERT_EXPR)
2934       && (!vectype_in
2935 	  || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2936 	  || (GET_MODE_SIZE (TYPE_MODE (vectype))
2937 	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2938     return false;
2939 
2940   /* We do not handle bit-precision changes.  */
2941   if ((CONVERT_EXPR_CODE_P (code)
2942        || code == VIEW_CONVERT_EXPR)
2943       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2944       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2945 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2946 	  || ((TYPE_PRECISION (TREE_TYPE (op))
2947 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2948       /* But a conversion that does not change the bit-pattern is ok.  */
2949       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2950 	    > TYPE_PRECISION (TREE_TYPE (op)))
2951 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
2952     {
2953       if (dump_enabled_p ())
2954         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2955                          "type conversion to/from bit-precision "
2956                          "unsupported.");
2957       return false;
2958     }
2959 
2960   if (!vec_stmt) /* transformation not required.  */
2961     {
2962       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2963       if (dump_enabled_p ())
2964         dump_printf_loc (MSG_NOTE, vect_location,
2965                          "=== vectorizable_assignment ===");
2966       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2967       return true;
2968     }
2969 
2970   /** Transform.  **/
2971   if (dump_enabled_p ())
2972     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2973 
2974   /* Handle def.  */
2975   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2976 
2977   /* Handle use.  */
2978   for (j = 0; j < ncopies; j++)
2979     {
2980       /* Handle uses.  */
2981       if (j == 0)
2982         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2983       else
2984         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2985 
2986       /* Arguments are ready. create the new vector stmt.  */
2987       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2988        {
2989 	 if (CONVERT_EXPR_CODE_P (code)
2990 	     || code == VIEW_CONVERT_EXPR)
2991 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2992          new_stmt = gimple_build_assign (vec_dest, vop);
2993          new_temp = make_ssa_name (vec_dest, new_stmt);
2994          gimple_assign_set_lhs (new_stmt, new_temp);
2995          vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996          if (slp_node)
2997            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2998        }
2999 
3000       if (slp_node)
3001         continue;
3002 
3003       if (j == 0)
3004         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3005       else
3006         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3007 
3008       prev_stmt_info = vinfo_for_stmt (new_stmt);
3009     }
3010 
3011   vec_oprnds.release ();
3012   return true;
3013 }
3014 
3015 
3016 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3017    either as shift by a scalar or by a vector.  */
3018 
3019 bool
vect_supportable_shift(enum tree_code code,tree scalar_type)3020 vect_supportable_shift (enum tree_code code, tree scalar_type)
3021 {
3022 
3023   enum machine_mode vec_mode;
3024   optab optab;
3025   int icode;
3026   tree vectype;
3027 
3028   vectype = get_vectype_for_scalar_type (scalar_type);
3029   if (!vectype)
3030     return false;
3031 
3032   optab = optab_for_tree_code (code, vectype, optab_scalar);
3033   if (!optab
3034       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3035     {
3036       optab = optab_for_tree_code (code, vectype, optab_vector);
3037       if (!optab
3038           || (optab_handler (optab, TYPE_MODE (vectype))
3039                       == CODE_FOR_nothing))
3040         return false;
3041     }
3042 
3043   vec_mode = TYPE_MODE (vectype);
3044   icode = (int) optab_handler (optab, vec_mode);
3045   if (icode == CODE_FOR_nothing)
3046     return false;
3047 
3048   return true;
3049 }
3050 
3051 
3052 /* Function vectorizable_shift.
3053 
3054    Check if STMT performs a shift operation that can be vectorized.
3055    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3056    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3057    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3058 
3059 static bool
vectorizable_shift(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)3060 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3061                     gimple *vec_stmt, slp_tree slp_node)
3062 {
3063   tree vec_dest;
3064   tree scalar_dest;
3065   tree op0, op1 = NULL;
3066   tree vec_oprnd1 = NULL_TREE;
3067   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3068   tree vectype;
3069   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3070   enum tree_code code;
3071   enum machine_mode vec_mode;
3072   tree new_temp;
3073   optab optab;
3074   int icode;
3075   enum machine_mode optab_op2_mode;
3076   tree def;
3077   gimple def_stmt;
3078   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3079   gimple new_stmt = NULL;
3080   stmt_vec_info prev_stmt_info;
3081   int nunits_in;
3082   int nunits_out;
3083   tree vectype_out;
3084   tree op1_vectype;
3085   int ncopies;
3086   int j, i;
3087   vec<tree> vec_oprnds0 = vNULL;
3088   vec<tree> vec_oprnds1 = vNULL;
3089   tree vop0, vop1;
3090   unsigned int k;
3091   bool scalar_shift_arg = true;
3092   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3093   int vf;
3094 
3095   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3096     return false;
3097 
3098   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3099     return false;
3100 
3101   /* Is STMT a vectorizable binary/unary operation?   */
3102   if (!is_gimple_assign (stmt))
3103     return false;
3104 
3105   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3106     return false;
3107 
3108   code = gimple_assign_rhs_code (stmt);
3109 
3110   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3111       || code == RROTATE_EXPR))
3112     return false;
3113 
3114   scalar_dest = gimple_assign_lhs (stmt);
3115   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3116   if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3117       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3118     {
3119       if (dump_enabled_p ())
3120         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3121                          "bit-precision shifts not supported.");
3122       return false;
3123     }
3124 
3125   op0 = gimple_assign_rhs1 (stmt);
3126   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3127                              &def_stmt, &def, &dt[0], &vectype))
3128     {
3129       if (dump_enabled_p ())
3130         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131                          "use not simple.");
3132       return false;
3133     }
3134   /* If op0 is an external or constant def use a vector type with
3135      the same size as the output vector type.  */
3136   if (!vectype)
3137     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3138   if (vec_stmt)
3139     gcc_assert (vectype);
3140   if (!vectype)
3141     {
3142       if (dump_enabled_p ())
3143         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144                          "no vectype for scalar type ");
3145       return false;
3146     }
3147 
3148   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3149   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3150   if (nunits_out != nunits_in)
3151     return false;
3152 
3153   op1 = gimple_assign_rhs2 (stmt);
3154   if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3155 			     &def, &dt[1], &op1_vectype))
3156     {
3157       if (dump_enabled_p ())
3158         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3159                          "use not simple.");
3160       return false;
3161     }
3162 
3163   if (loop_vinfo)
3164     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3165   else
3166     vf = 1;
3167 
3168   /* Multiple types in SLP are handled by creating the appropriate number of
3169      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3170      case of SLP.  */
3171   if (slp_node || PURE_SLP_STMT (stmt_info))
3172     ncopies = 1;
3173   else
3174     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3175 
3176   gcc_assert (ncopies >= 1);
3177 
3178   /* Determine whether the shift amount is a vector, or scalar.  If the
3179      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
3180 
3181   if (dt[1] == vect_internal_def && !slp_node)
3182     scalar_shift_arg = false;
3183   else if (dt[1] == vect_constant_def
3184 	   || dt[1] == vect_external_def
3185 	   || dt[1] == vect_internal_def)
3186     {
3187       /* In SLP, need to check whether the shift count is the same,
3188 	 in loops if it is a constant or invariant, it is always
3189 	 a scalar shift.  */
3190       if (slp_node)
3191 	{
3192 	  vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3193 	  gimple slpstmt;
3194 
3195 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3196 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3197 	      scalar_shift_arg = false;
3198 	}
3199     }
3200   else
3201     {
3202       if (dump_enabled_p ())
3203         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3204                          "operand mode requires invariant argument.");
3205       return false;
3206     }
3207 
3208   /* Vector shifted by vector.  */
3209   if (!scalar_shift_arg)
3210     {
3211       optab = optab_for_tree_code (code, vectype, optab_vector);
3212       if (dump_enabled_p ())
3213         dump_printf_loc (MSG_NOTE, vect_location,
3214                          "vector/vector shift/rotate found.");
3215 
3216       if (!op1_vectype)
3217 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3218       if (op1_vectype == NULL_TREE
3219 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3220 	{
3221 	  if (dump_enabled_p ())
3222 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223                              "unusable type for last operand in"
3224                              " vector/vector shift/rotate.");
3225 	  return false;
3226 	}
3227     }
3228   /* See if the machine has a vector shifted by scalar insn and if not
3229      then see if it has a vector shifted by vector insn.  */
3230   else
3231     {
3232       optab = optab_for_tree_code (code, vectype, optab_scalar);
3233       if (optab
3234           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3235         {
3236           if (dump_enabled_p ())
3237             dump_printf_loc (MSG_NOTE, vect_location,
3238                              "vector/scalar shift/rotate found.");
3239         }
3240       else
3241         {
3242           optab = optab_for_tree_code (code, vectype, optab_vector);
3243           if (optab
3244                && (optab_handler (optab, TYPE_MODE (vectype))
3245                       != CODE_FOR_nothing))
3246             {
3247 	      scalar_shift_arg = false;
3248 
3249               if (dump_enabled_p ())
3250                 dump_printf_loc (MSG_NOTE, vect_location,
3251                                  "vector/vector shift/rotate found.");
3252 
3253               /* Unlike the other binary operators, shifts/rotates have
3254                  the rhs being int, instead of the same type as the lhs,
3255                  so make sure the scalar is the right type if we are
3256 		 dealing with vectors of long long/long/short/char.  */
3257               if (dt[1] == vect_constant_def)
3258                 op1 = fold_convert (TREE_TYPE (vectype), op1);
3259 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3260 						   TREE_TYPE (op1)))
3261 		{
3262 		  if (slp_node
3263 		      && TYPE_MODE (TREE_TYPE (vectype))
3264 			 != TYPE_MODE (TREE_TYPE (op1)))
3265 		    {
3266                       if (dump_enabled_p ())
3267                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268                                          "unusable type for last operand in"
3269                                          " vector/vector shift/rotate.");
3270 			return false;
3271 		    }
3272 		  if (vec_stmt && !slp_node)
3273 		    {
3274 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
3275 		      op1 = vect_init_vector (stmt, op1,
3276 					      TREE_TYPE (vectype), NULL);
3277 		    }
3278 		}
3279             }
3280         }
3281     }
3282 
3283   /* Supportable by target?  */
3284   if (!optab)
3285     {
3286       if (dump_enabled_p ())
3287         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288                          "no optab.");
3289       return false;
3290     }
3291   vec_mode = TYPE_MODE (vectype);
3292   icode = (int) optab_handler (optab, vec_mode);
3293   if (icode == CODE_FOR_nothing)
3294     {
3295       if (dump_enabled_p ())
3296         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297                          "op not supported by target.");
3298       /* Check only during analysis.  */
3299       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3300           || (vf < vect_min_worthwhile_factor (code)
3301               && !vec_stmt))
3302         return false;
3303       if (dump_enabled_p ())
3304         dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3305     }
3306 
3307   /* Worthwhile without SIMD support?  Check only during analysis.  */
3308   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3309       && vf < vect_min_worthwhile_factor (code)
3310       && !vec_stmt)
3311     {
3312       if (dump_enabled_p ())
3313         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314                          "not worthwhile without SIMD support.");
3315       return false;
3316     }
3317 
3318   if (!vec_stmt) /* transformation not required.  */
3319     {
3320       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3321       if (dump_enabled_p ())
3322         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3323       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3324       return true;
3325     }
3326 
3327   /** Transform.  **/
3328 
3329   if (dump_enabled_p ())
3330     dump_printf_loc (MSG_NOTE, vect_location,
3331                      "transform binary/unary operation.");
3332 
3333   /* Handle def.  */
3334   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3335 
3336   prev_stmt_info = NULL;
3337   for (j = 0; j < ncopies; j++)
3338     {
3339       /* Handle uses.  */
3340       if (j == 0)
3341         {
3342           if (scalar_shift_arg)
3343             {
3344               /* Vector shl and shr insn patterns can be defined with scalar
3345                  operand 2 (shift operand).  In this case, use constant or loop
3346                  invariant op1 directly, without extending it to vector mode
3347                  first.  */
3348               optab_op2_mode = insn_data[icode].operand[2].mode;
3349               if (!VECTOR_MODE_P (optab_op2_mode))
3350                 {
3351                   if (dump_enabled_p ())
3352                     dump_printf_loc (MSG_NOTE, vect_location,
3353                                      "operand 1 using scalar mode.");
3354                   vec_oprnd1 = op1;
3355                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3356                   vec_oprnds1.quick_push (vec_oprnd1);
3357                   if (slp_node)
3358                     {
3359                       /* Store vec_oprnd1 for every vector stmt to be created
3360                          for SLP_NODE.  We check during the analysis that all
3361                          the shift arguments are the same.
3362                          TODO: Allow different constants for different vector
3363                          stmts generated for an SLP instance.  */
3364                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3365                         vec_oprnds1.quick_push (vec_oprnd1);
3366                     }
3367                 }
3368             }
3369 
3370           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3371              (a special case for certain kind of vector shifts); otherwise,
3372              operand 1 should be of a vector type (the usual case).  */
3373           if (vec_oprnd1)
3374             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3375                                slp_node, -1);
3376           else
3377             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3378                                slp_node, -1);
3379         }
3380       else
3381         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3382 
3383       /* Arguments are ready.  Create the new vector stmt.  */
3384       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3385         {
3386           vop1 = vec_oprnds1[i];
3387           new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3388           new_temp = make_ssa_name (vec_dest, new_stmt);
3389           gimple_assign_set_lhs (new_stmt, new_temp);
3390           vect_finish_stmt_generation (stmt, new_stmt, gsi);
3391           if (slp_node)
3392             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3393         }
3394 
3395       if (slp_node)
3396         continue;
3397 
3398       if (j == 0)
3399         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3400       else
3401         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3402       prev_stmt_info = vinfo_for_stmt (new_stmt);
3403     }
3404 
3405   vec_oprnds0.release ();
3406   vec_oprnds1.release ();
3407 
3408   return true;
3409 }
3410 
3411 
3412 static tree permute_vec_elements (tree, tree, tree, gimple,
3413 				  gimple_stmt_iterator *);
3414 
3415 
3416 /* Function vectorizable_operation.
3417 
3418    Check if STMT performs a binary, unary or ternary operation that can
3419    be vectorized.
3420    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3421    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3422    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3423 
3424 static bool
vectorizable_operation(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)3425 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3426 			gimple *vec_stmt, slp_tree slp_node)
3427 {
3428   tree vec_dest;
3429   tree scalar_dest;
3430   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3431   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3432   tree vectype;
3433   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3434   enum tree_code code;
3435   enum machine_mode vec_mode;
3436   tree new_temp;
3437   int op_type;
3438   optab optab;
3439   int icode;
3440   tree def;
3441   gimple def_stmt;
3442   enum vect_def_type dt[3]
3443     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3444   gimple new_stmt = NULL;
3445   stmt_vec_info prev_stmt_info;
3446   int nunits_in;
3447   int nunits_out;
3448   tree vectype_out;
3449   int ncopies;
3450   int j, i;
3451   vec<tree> vec_oprnds0 = vNULL;
3452   vec<tree> vec_oprnds1 = vNULL;
3453   vec<tree> vec_oprnds2 = vNULL;
3454   tree vop0, vop1, vop2;
3455   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3456   int vf;
3457 
3458   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3459     return false;
3460 
3461   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3462     return false;
3463 
3464   /* Is STMT a vectorizable binary/unary operation?   */
3465   if (!is_gimple_assign (stmt))
3466     return false;
3467 
3468   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3469     return false;
3470 
3471   code = gimple_assign_rhs_code (stmt);
3472 
3473   /* For pointer addition, we should use the normal plus for
3474      the vector addition.  */
3475   if (code == POINTER_PLUS_EXPR)
3476     code = PLUS_EXPR;
3477 
3478   /* Support only unary or binary operations.  */
3479   op_type = TREE_CODE_LENGTH (code);
3480   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3481     {
3482       if (dump_enabled_p ())
3483         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3484                          "num. args = %d (not unary/binary/ternary op).",
3485                          op_type);
3486       return false;
3487     }
3488 
3489   scalar_dest = gimple_assign_lhs (stmt);
3490   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3491 
3492   /* Most operations cannot handle bit-precision types without extra
3493      truncations.  */
3494   if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3495        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3496       /* Exception are bitwise binary operations.  */
3497       && code != BIT_IOR_EXPR
3498       && code != BIT_XOR_EXPR
3499       && code != BIT_AND_EXPR)
3500     {
3501       if (dump_enabled_p ())
3502         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3503                          "bit-precision arithmetic not supported.");
3504       return false;
3505     }
3506 
3507   op0 = gimple_assign_rhs1 (stmt);
3508   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3509 			     &def_stmt, &def, &dt[0], &vectype))
3510     {
3511       if (dump_enabled_p ())
3512         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513                          "use not simple.");
3514       return false;
3515     }
3516   /* If op0 is an external or constant def use a vector type with
3517      the same size as the output vector type.  */
3518   if (!vectype)
3519     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3520   if (vec_stmt)
3521     gcc_assert (vectype);
3522   if (!vectype)
3523     {
3524       if (dump_enabled_p ())
3525         {
3526           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527                            "no vectype for scalar type ");
3528           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3529                              TREE_TYPE (op0));
3530         }
3531 
3532       return false;
3533     }
3534 
3535   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3536   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3537   if (nunits_out != nunits_in)
3538     return false;
3539 
3540   if (op_type == binary_op || op_type == ternary_op)
3541     {
3542       op1 = gimple_assign_rhs2 (stmt);
3543       if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3544 			       &def, &dt[1]))
3545 	{
3546 	  if (dump_enabled_p ())
3547 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548                              "use not simple.");
3549 	  return false;
3550 	}
3551     }
3552   if (op_type == ternary_op)
3553     {
3554       op2 = gimple_assign_rhs3 (stmt);
3555       if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3556 			       &def, &dt[2]))
3557 	{
3558 	  if (dump_enabled_p ())
3559 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560                              "use not simple.");
3561 	  return false;
3562 	}
3563     }
3564 
3565   if (loop_vinfo)
3566     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3567   else
3568     vf = 1;
3569 
3570   /* Multiple types in SLP are handled by creating the appropriate number of
3571      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3572      case of SLP.  */
3573   if (slp_node || PURE_SLP_STMT (stmt_info))
3574     ncopies = 1;
3575   else
3576     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577 
3578   gcc_assert (ncopies >= 1);
3579 
3580   /* Shifts are handled in vectorizable_shift ().  */
3581   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3582       || code == RROTATE_EXPR)
3583    return false;
3584 
3585   /* Supportable by target?  */
3586 
3587   vec_mode = TYPE_MODE (vectype);
3588   if (code == MULT_HIGHPART_EXPR)
3589     {
3590       if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3591 	icode = LAST_INSN_CODE;
3592       else
3593 	icode = CODE_FOR_nothing;
3594     }
3595   else
3596     {
3597       optab = optab_for_tree_code (code, vectype, optab_default);
3598       if (!optab)
3599 	{
3600           if (dump_enabled_p ())
3601             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602                              "no optab.");
3603 	  return false;
3604 	}
3605       icode = (int) optab_handler (optab, vec_mode);
3606     }
3607 
3608   if (icode == CODE_FOR_nothing)
3609     {
3610       if (dump_enabled_p ())
3611 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3612                          "op not supported by target.");
3613       /* Check only during analysis.  */
3614       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3615 	  || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3616         return false;
3617       if (dump_enabled_p ())
3618 	dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3619     }
3620 
3621   /* Worthwhile without SIMD support?  Check only during analysis.  */
3622   if (!VECTOR_MODE_P (vec_mode)
3623       && !vec_stmt
3624       && vf < vect_min_worthwhile_factor (code))
3625     {
3626       if (dump_enabled_p ())
3627         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628                          "not worthwhile without SIMD support.");
3629       return false;
3630     }
3631 
3632   if (!vec_stmt) /* transformation not required.  */
3633     {
3634       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3635       if (dump_enabled_p ())
3636         dump_printf_loc (MSG_NOTE, vect_location,
3637                          "=== vectorizable_operation ===");
3638       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3639       return true;
3640     }
3641 
3642   /** Transform.  **/
3643 
3644   if (dump_enabled_p ())
3645     dump_printf_loc (MSG_NOTE, vect_location,
3646                      "transform binary/unary operation.");
3647 
3648   /* Handle def.  */
3649   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3650 
3651   /* In case the vectorization factor (VF) is bigger than the number
3652      of elements that we can fit in a vectype (nunits), we have to generate
3653      more than one vector stmt - i.e - we need to "unroll" the
3654      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
3655      from one copy of the vector stmt to the next, in the field
3656      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
3657      stages to find the correct vector defs to be used when vectorizing
3658      stmts that use the defs of the current stmt.  The example below
3659      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3660      we need to create 4 vectorized stmts):
3661 
3662      before vectorization:
3663                                 RELATED_STMT    VEC_STMT
3664         S1:     x = memref      -               -
3665         S2:     z = x + 1       -               -
3666 
3667      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3668              there):
3669                                 RELATED_STMT    VEC_STMT
3670         VS1_0:  vx0 = memref0   VS1_1           -
3671         VS1_1:  vx1 = memref1   VS1_2           -
3672         VS1_2:  vx2 = memref2   VS1_3           -
3673         VS1_3:  vx3 = memref3   -               -
3674         S1:     x = load        -               VS1_0
3675         S2:     z = x + 1       -               -
3676 
3677      step2: vectorize stmt S2 (done here):
3678         To vectorize stmt S2 we first need to find the relevant vector
3679         def for the first operand 'x'.  This is, as usual, obtained from
3680         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3681         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
3682         relevant vector def 'vx0'.  Having found 'vx0' we can generate
3683         the vector stmt VS2_0, and as usual, record it in the
3684         STMT_VINFO_VEC_STMT of stmt S2.
3685         When creating the second copy (VS2_1), we obtain the relevant vector
3686         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3687         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
3688         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
3689         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3690         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
3691         chain of stmts and pointers:
3692                                 RELATED_STMT    VEC_STMT
3693         VS1_0:  vx0 = memref0   VS1_1           -
3694         VS1_1:  vx1 = memref1   VS1_2           -
3695         VS1_2:  vx2 = memref2   VS1_3           -
3696         VS1_3:  vx3 = memref3   -               -
3697         S1:     x = load        -               VS1_0
3698         VS2_0:  vz0 = vx0 + v1  VS2_1           -
3699         VS2_1:  vz1 = vx1 + v1  VS2_2           -
3700         VS2_2:  vz2 = vx2 + v1  VS2_3           -
3701         VS2_3:  vz3 = vx3 + v1  -               -
3702         S2:     z = x + 1       -               VS2_0  */
3703 
3704   prev_stmt_info = NULL;
3705   for (j = 0; j < ncopies; j++)
3706     {
3707       /* Handle uses.  */
3708       if (j == 0)
3709 	{
3710 	  if (op_type == binary_op || op_type == ternary_op)
3711 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3712 			       slp_node, -1);
3713 	  else
3714 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3715 			       slp_node, -1);
3716 	  if (op_type == ternary_op)
3717 	    {
3718 	      vec_oprnds2.create (1);
3719 	      vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3720 		                                                    stmt,
3721 								    NULL));
3722 	    }
3723 	}
3724       else
3725 	{
3726 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3727 	  if (op_type == ternary_op)
3728 	    {
3729 	      tree vec_oprnd = vec_oprnds2.pop ();
3730 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3731 							           vec_oprnd));
3732 	    }
3733 	}
3734 
3735       /* Arguments are ready.  Create the new vector stmt.  */
3736       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3737         {
3738 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
3739 		  ? vec_oprnds1[i] : NULL_TREE);
3740 	  vop2 = ((op_type == ternary_op)
3741 		  ? vec_oprnds2[i] : NULL_TREE);
3742 	  new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3743 						   vop0, vop1, vop2);
3744 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3745 	  gimple_assign_set_lhs (new_stmt, new_temp);
3746 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3747           if (slp_node)
3748 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3749         }
3750 
3751       if (slp_node)
3752         continue;
3753 
3754       if (j == 0)
3755 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3756       else
3757 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3758       prev_stmt_info = vinfo_for_stmt (new_stmt);
3759     }
3760 
3761   vec_oprnds0.release ();
3762   vec_oprnds1.release ();
3763   vec_oprnds2.release ();
3764 
3765   return true;
3766 }
3767 
3768 
3769 /* Function vectorizable_store.
3770 
3771    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3772    can be vectorized.
3773    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3774    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3775    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3776 
3777 static bool
vectorizable_store(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)3778 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3779 		    slp_tree slp_node)
3780 {
3781   tree scalar_dest;
3782   tree data_ref;
3783   tree op;
3784   tree vec_oprnd = NULL_TREE;
3785   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3786   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3787   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3788   tree elem_type;
3789   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3790   struct loop *loop = NULL;
3791   enum machine_mode vec_mode;
3792   tree dummy;
3793   enum dr_alignment_support alignment_support_scheme;
3794   tree def;
3795   gimple def_stmt;
3796   enum vect_def_type dt;
3797   stmt_vec_info prev_stmt_info = NULL;
3798   tree dataref_ptr = NULL_TREE;
3799   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3800   int ncopies;
3801   int j;
3802   gimple next_stmt, first_stmt = NULL;
3803   bool grouped_store = false;
3804   bool store_lanes_p = false;
3805   unsigned int group_size, i;
3806   vec<tree> dr_chain = vNULL;
3807   vec<tree> oprnds = vNULL;
3808   vec<tree> result_chain = vNULL;
3809   bool inv_p;
3810   vec<tree> vec_oprnds = vNULL;
3811   bool slp = (slp_node != NULL);
3812   unsigned int vec_num;
3813   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3814   tree aggr_type;
3815 
3816   if (loop_vinfo)
3817     loop = LOOP_VINFO_LOOP (loop_vinfo);
3818 
3819   /* Multiple types in SLP are handled by creating the appropriate number of
3820      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3821      case of SLP.  */
3822   if (slp || PURE_SLP_STMT (stmt_info))
3823     ncopies = 1;
3824   else
3825     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3826 
3827   gcc_assert (ncopies >= 1);
3828 
3829   /* FORNOW. This restriction should be relaxed.  */
3830   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3831     {
3832       if (dump_enabled_p ())
3833         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3834                          "multiple types in nested loop.");
3835       return false;
3836     }
3837 
3838   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3839     return false;
3840 
3841   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3842     return false;
3843 
3844   /* Is vectorizable store? */
3845 
3846   if (!is_gimple_assign (stmt))
3847     return false;
3848 
3849   scalar_dest = gimple_assign_lhs (stmt);
3850   if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3851       && is_pattern_stmt_p (stmt_info))
3852     scalar_dest = TREE_OPERAND (scalar_dest, 0);
3853   if (TREE_CODE (scalar_dest) != ARRAY_REF
3854       && TREE_CODE (scalar_dest) != INDIRECT_REF
3855       && TREE_CODE (scalar_dest) != COMPONENT_REF
3856       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3857       && TREE_CODE (scalar_dest) != REALPART_EXPR
3858       && TREE_CODE (scalar_dest) != MEM_REF)
3859     return false;
3860 
3861   gcc_assert (gimple_assign_single_p (stmt));
3862   op = gimple_assign_rhs1 (stmt);
3863   if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3864 			   &def, &dt))
3865     {
3866       if (dump_enabled_p ())
3867         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3868                          "use not simple.");
3869       return false;
3870     }
3871 
3872   elem_type = TREE_TYPE (vectype);
3873   vec_mode = TYPE_MODE (vectype);
3874 
3875   /* FORNOW. In some cases can vectorize even if data-type not supported
3876      (e.g. - array initialization with 0).  */
3877   if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3878     return false;
3879 
3880   if (!STMT_VINFO_DATA_REF (stmt_info))
3881     return false;
3882 
3883   if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3884 			    ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3885 			    size_zero_node) < 0)
3886     {
3887       if (dump_enabled_p ())
3888         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3889                          "negative step for store.");
3890       return false;
3891     }
3892 
3893   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3894     {
3895       grouped_store = true;
3896       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3897       if (!slp && !PURE_SLP_STMT (stmt_info))
3898 	{
3899 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3900 	  if (vect_store_lanes_supported (vectype, group_size))
3901 	    store_lanes_p = true;
3902 	  else if (!vect_grouped_store_supported (vectype, group_size))
3903 	    return false;
3904 	}
3905 
3906       if (first_stmt == stmt)
3907 	{
3908           /* STMT is the leader of the group. Check the operands of all the
3909              stmts of the group.  */
3910           next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3911           while (next_stmt)
3912             {
3913 	      gcc_assert (gimple_assign_single_p (next_stmt));
3914 	      op = gimple_assign_rhs1 (next_stmt);
3915               if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3916 				       &def_stmt, &def, &dt))
3917                 {
3918                   if (dump_enabled_p ())
3919                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3920                                      "use not simple.");
3921                   return false;
3922                 }
3923               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3924             }
3925         }
3926     }
3927 
3928   if (!vec_stmt) /* transformation not required.  */
3929     {
3930       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3931       vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3932 			     NULL, NULL, NULL);
3933       return true;
3934     }
3935 
3936   /** Transform.  **/
3937 
3938   if (grouped_store)
3939     {
3940       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3941       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3942 
3943       GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3944 
3945       /* FORNOW */
3946       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3947 
3948       /* We vectorize all the stmts of the interleaving group when we
3949 	 reach the last stmt in the group.  */
3950       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3951 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3952 	  && !slp)
3953 	{
3954 	  *vec_stmt = NULL;
3955 	  return true;
3956 	}
3957 
3958       if (slp)
3959         {
3960           grouped_store = false;
3961           /* VEC_NUM is the number of vect stmts to be created for this
3962              group.  */
3963           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3964           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3965           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3966 	  op = gimple_assign_rhs1 (first_stmt);
3967         }
3968       else
3969         /* VEC_NUM is the number of vect stmts to be created for this
3970            group.  */
3971 	vec_num = group_size;
3972     }
3973   else
3974     {
3975       first_stmt = stmt;
3976       first_dr = dr;
3977       group_size = vec_num = 1;
3978     }
3979 
3980   if (dump_enabled_p ())
3981     dump_printf_loc (MSG_NOTE, vect_location,
3982                      "transform store. ncopies = %d", ncopies);
3983 
3984   dr_chain.create (group_size);
3985   oprnds.create (group_size);
3986 
3987   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3988   gcc_assert (alignment_support_scheme);
3989   /* Targets with store-lane instructions must not require explicit
3990      realignment.  */
3991   gcc_assert (!store_lanes_p
3992 	      || alignment_support_scheme == dr_aligned
3993 	      || alignment_support_scheme == dr_unaligned_supported);
3994 
3995   if (store_lanes_p)
3996     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3997   else
3998     aggr_type = vectype;
3999 
4000   /* In case the vectorization factor (VF) is bigger than the number
4001      of elements that we can fit in a vectype (nunits), we have to generate
4002      more than one vector stmt - i.e - we need to "unroll" the
4003      vector stmt by a factor VF/nunits.  For more details see documentation in
4004      vect_get_vec_def_for_copy_stmt.  */
4005 
4006   /* In case of interleaving (non-unit grouped access):
4007 
4008         S1:  &base + 2 = x2
4009         S2:  &base = x0
4010         S3:  &base + 1 = x1
4011         S4:  &base + 3 = x3
4012 
4013      We create vectorized stores starting from base address (the access of the
4014      first stmt in the chain (S2 in the above example), when the last store stmt
4015      of the chain (S4) is reached:
4016 
4017         VS1: &base = vx2
4018 	VS2: &base + vec_size*1 = vx0
4019 	VS3: &base + vec_size*2 = vx1
4020 	VS4: &base + vec_size*3 = vx3
4021 
4022      Then permutation statements are generated:
4023 
4024 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4025 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4026 	...
4027 
4028      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4029      (the order of the data-refs in the output of vect_permute_store_chain
4030      corresponds to the order of scalar stmts in the interleaving chain - see
4031      the documentation of vect_permute_store_chain()).
4032 
4033      In case of both multiple types and interleaving, above vector stores and
4034      permutation stmts are created for every copy.  The result vector stmts are
4035      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4036      STMT_VINFO_RELATED_STMT for the next copies.
4037   */
4038 
4039   prev_stmt_info = NULL;
4040   for (j = 0; j < ncopies; j++)
4041     {
4042       gimple new_stmt;
4043       gimple ptr_incr;
4044 
4045       if (j == 0)
4046 	{
4047           if (slp)
4048             {
4049 	      /* Get vectorized arguments for SLP_NODE.  */
4050               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4051                                  NULL, slp_node, -1);
4052 
4053               vec_oprnd = vec_oprnds[0];
4054             }
4055           else
4056             {
4057 	      /* For interleaved stores we collect vectorized defs for all the
4058 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4059 		 used as an input to vect_permute_store_chain(), and OPRNDS as
4060 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4061 
4062 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4063 		 OPRNDS are of size 1.  */
4064 	      next_stmt = first_stmt;
4065 	      for (i = 0; i < group_size; i++)
4066 		{
4067 		  /* Since gaps are not supported for interleaved stores,
4068 		     GROUP_SIZE is the exact number of stmts in the chain.
4069 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
4070 		     there is no interleaving, GROUP_SIZE is 1, and only one
4071 		     iteration of the loop will be executed.  */
4072 		  gcc_assert (next_stmt
4073 			      && gimple_assign_single_p (next_stmt));
4074 		  op = gimple_assign_rhs1 (next_stmt);
4075 
4076 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4077 							    NULL);
4078 		  dr_chain.quick_push (vec_oprnd);
4079 		  oprnds.quick_push (vec_oprnd);
4080 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4081 		}
4082 	    }
4083 
4084 	  /* We should have catched mismatched types earlier.  */
4085 	  gcc_assert (useless_type_conversion_p (vectype,
4086 						 TREE_TYPE (vec_oprnd)));
4087 	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4088 						  NULL_TREE, &dummy, gsi,
4089 						  &ptr_incr, false, &inv_p);
4090 	  gcc_assert (bb_vinfo || !inv_p);
4091 	}
4092       else
4093 	{
4094 	  /* For interleaved stores we created vectorized defs for all the
4095 	     defs stored in OPRNDS in the previous iteration (previous copy).
4096 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
4097 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4098 	     next copy.
4099 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4100 	     OPRNDS are of size 1.  */
4101 	  for (i = 0; i < group_size; i++)
4102 	    {
4103 	      op = oprnds[i];
4104 	      vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4105 				  &def, &dt);
4106 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4107 	      dr_chain[i] = vec_oprnd;
4108 	      oprnds[i] = vec_oprnd;
4109 	    }
4110 	  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4111 					 TYPE_SIZE_UNIT (aggr_type));
4112 	}
4113 
4114       if (store_lanes_p)
4115 	{
4116 	  tree vec_array;
4117 
4118 	  /* Combine all the vectors into an array.  */
4119 	  vec_array = create_vector_array (vectype, vec_num);
4120 	  for (i = 0; i < vec_num; i++)
4121 	    {
4122 	      vec_oprnd = dr_chain[i];
4123 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4124 	    }
4125 
4126 	  /* Emit:
4127 	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
4128 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4129 	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4130 	  gimple_call_set_lhs (new_stmt, data_ref);
4131 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4132 	}
4133       else
4134 	{
4135 	  new_stmt = NULL;
4136 	  if (grouped_store)
4137 	    {
4138 	      if (j == 0)
4139 		result_chain.create (group_size);
4140 	      /* Permute.  */
4141 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4142 					&result_chain);
4143 	    }
4144 
4145 	  next_stmt = first_stmt;
4146 	  for (i = 0; i < vec_num; i++)
4147 	    {
4148 	      unsigned align, misalign;
4149 
4150 	      if (i > 0)
4151 		/* Bump the vector pointer.  */
4152 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4153 					       stmt, NULL_TREE);
4154 
4155 	      if (slp)
4156 		vec_oprnd = vec_oprnds[i];
4157 	      else if (grouped_store)
4158 		/* For grouped stores vectorized defs are interleaved in
4159 		   vect_permute_store_chain().  */
4160 		vec_oprnd = result_chain[i];
4161 
4162 	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4163 				 build_int_cst (reference_alias_ptr_type
4164 						(DR_REF (first_dr)), 0));
4165 	      align = TYPE_ALIGN_UNIT (vectype);
4166 	      if (aligned_access_p (first_dr))
4167 		misalign = 0;
4168 	      else if (DR_MISALIGNMENT (first_dr) == -1)
4169 		{
4170 		  TREE_TYPE (data_ref)
4171 		    = build_aligned_type (TREE_TYPE (data_ref),
4172 					  TYPE_ALIGN (elem_type));
4173 		  align = TYPE_ALIGN_UNIT (elem_type);
4174 		  misalign = 0;
4175 		}
4176 	      else
4177 		{
4178 		  TREE_TYPE (data_ref)
4179 		    = build_aligned_type (TREE_TYPE (data_ref),
4180 					  TYPE_ALIGN (elem_type));
4181 		  misalign = DR_MISALIGNMENT (first_dr);
4182 		}
4183 	      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4184 				      misalign);
4185 
4186 	      /* Arguments are ready.  Create the new vector stmt.  */
4187 	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4188 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189 
4190 	      if (slp)
4191 		continue;
4192 
4193 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4194 	      if (!next_stmt)
4195 		break;
4196 	    }
4197 	}
4198       if (!slp)
4199 	{
4200 	  if (j == 0)
4201 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4202 	  else
4203 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4204 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4205 	}
4206     }
4207 
4208   dr_chain.release ();
4209   oprnds.release ();
4210   result_chain.release ();
4211   vec_oprnds.release ();
4212 
4213   return true;
4214 }
4215 
4216 /* Given a vector type VECTYPE and permutation SEL returns
4217    the VECTOR_CST mask that implements the permutation of the
4218    vector elements.  If that is impossible to do, returns NULL.  */
4219 
4220 tree
vect_gen_perm_mask(tree vectype,unsigned char * sel)4221 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4222 {
4223   tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4224   int i, nunits;
4225 
4226   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4227 
4228   if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4229     return NULL;
4230 
4231   mask_elt_type = lang_hooks.types.type_for_mode
4232 		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4233   mask_type = get_vectype_for_scalar_type (mask_elt_type);
4234 
4235   mask_elts = XALLOCAVEC (tree, nunits);
4236   for (i = nunits - 1; i >= 0; i--)
4237     mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4238   mask_vec = build_vector (mask_type, mask_elts);
4239 
4240   return mask_vec;
4241 }
4242 
4243 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4244    reversal of the vector elements.  If that is impossible to do,
4245    returns NULL.  */
4246 
4247 static tree
perm_mask_for_reverse(tree vectype)4248 perm_mask_for_reverse (tree vectype)
4249 {
4250   int i, nunits;
4251   unsigned char *sel;
4252 
4253   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4254   sel = XALLOCAVEC (unsigned char, nunits);
4255 
4256   for (i = 0; i < nunits; ++i)
4257     sel[i] = nunits - 1 - i;
4258 
4259   return vect_gen_perm_mask (vectype, sel);
4260 }
4261 
4262 /* Given a vector variable X and Y, that was generated for the scalar
4263    STMT, generate instructions to permute the vector elements of X and Y
4264    using permutation mask MASK_VEC, insert them at *GSI and return the
4265    permuted vector variable.  */
4266 
4267 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple stmt,gimple_stmt_iterator * gsi)4268 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4269 		      gimple_stmt_iterator *gsi)
4270 {
4271   tree vectype = TREE_TYPE (x);
4272   tree perm_dest, data_ref;
4273   gimple perm_stmt;
4274 
4275   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4276   data_ref = make_ssa_name (perm_dest, NULL);
4277 
4278   /* Generate the permute statement.  */
4279   perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4280 					    x, y, mask_vec);
4281   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4282 
4283   return data_ref;
4284 }
4285 
4286 /* vectorizable_load.
4287 
4288    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4289    can be vectorized.
4290    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4291    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4292    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4293 
4294 static bool
vectorizable_load(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)4295 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4296 		   slp_tree slp_node, slp_instance slp_node_instance)
4297 {
4298   tree scalar_dest;
4299   tree vec_dest = NULL;
4300   tree data_ref = NULL;
4301   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4302   stmt_vec_info prev_stmt_info;
4303   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4304   struct loop *loop = NULL;
4305   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4306   bool nested_in_vect_loop = false;
4307   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4308   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4309   tree elem_type;
4310   tree new_temp;
4311   enum machine_mode mode;
4312   gimple new_stmt = NULL;
4313   tree dummy;
4314   enum dr_alignment_support alignment_support_scheme;
4315   tree dataref_ptr = NULL_TREE;
4316   gimple ptr_incr;
4317   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4318   int ncopies;
4319   int i, j, group_size;
4320   tree msq = NULL_TREE, lsq;
4321   tree offset = NULL_TREE;
4322   tree realignment_token = NULL_TREE;
4323   gimple phi = NULL;
4324   vec<tree> dr_chain = vNULL;
4325   bool grouped_load = false;
4326   bool load_lanes_p = false;
4327   gimple first_stmt;
4328   bool inv_p;
4329   bool negative = false;
4330   bool compute_in_loop = false;
4331   struct loop *at_loop;
4332   int vec_num;
4333   bool slp = (slp_node != NULL);
4334   bool slp_perm = false;
4335   enum tree_code code;
4336   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4337   int vf;
4338   tree aggr_type;
4339   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4340   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4341   int gather_scale = 1;
4342   enum vect_def_type gather_dt = vect_unknown_def_type;
4343 
4344   if (loop_vinfo)
4345     {
4346       loop = LOOP_VINFO_LOOP (loop_vinfo);
4347       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4348       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4349     }
4350   else
4351     vf = 1;
4352 
4353   /* Multiple types in SLP are handled by creating the appropriate number of
4354      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4355      case of SLP.  */
4356   if (slp || PURE_SLP_STMT (stmt_info))
4357     ncopies = 1;
4358   else
4359     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4360 
4361   gcc_assert (ncopies >= 1);
4362 
4363   /* FORNOW. This restriction should be relaxed.  */
4364   if (nested_in_vect_loop && ncopies > 1)
4365     {
4366       if (dump_enabled_p ())
4367         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4368                          "multiple types in nested loop.");
4369       return false;
4370     }
4371 
4372   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4373     return false;
4374 
4375   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4376     return false;
4377 
4378   /* Is vectorizable load? */
4379   if (!is_gimple_assign (stmt))
4380     return false;
4381 
4382   scalar_dest = gimple_assign_lhs (stmt);
4383   if (TREE_CODE (scalar_dest) != SSA_NAME)
4384     return false;
4385 
4386   code = gimple_assign_rhs_code (stmt);
4387   if (code != ARRAY_REF
4388       && code != INDIRECT_REF
4389       && code != COMPONENT_REF
4390       && code != IMAGPART_EXPR
4391       && code != REALPART_EXPR
4392       && code != MEM_REF
4393       && TREE_CODE_CLASS (code) != tcc_declaration)
4394     return false;
4395 
4396   if (!STMT_VINFO_DATA_REF (stmt_info))
4397     return false;
4398 
4399   elem_type = TREE_TYPE (vectype);
4400   mode = TYPE_MODE (vectype);
4401 
4402   /* FORNOW. In some cases can vectorize even if data-type not supported
4403     (e.g. - data copies).  */
4404   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4405     {
4406       if (dump_enabled_p ())
4407         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4408                          "Aligned load, but unsupported type.");
4409       return false;
4410     }
4411 
4412   /* Check if the load is a part of an interleaving chain.  */
4413   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4414     {
4415       grouped_load = true;
4416       /* FORNOW */
4417       gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4418 
4419       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4420       if (!slp && !PURE_SLP_STMT (stmt_info))
4421 	{
4422 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4423 	  if (vect_load_lanes_supported (vectype, group_size))
4424 	    load_lanes_p = true;
4425 	  else if (!vect_grouped_load_supported (vectype, group_size))
4426 	    return false;
4427 	}
4428     }
4429 
4430 
4431   if (STMT_VINFO_GATHER_P (stmt_info))
4432     {
4433       gimple def_stmt;
4434       tree def;
4435       gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4436 				       &gather_off, &gather_scale);
4437       gcc_assert (gather_decl);
4438       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4439 				 &def_stmt, &def, &gather_dt,
4440 				 &gather_off_vectype))
4441 	{
4442 	  if (dump_enabled_p ())
4443 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4444                              "gather index use not simple.");
4445 	  return false;
4446 	}
4447     }
4448   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4449     ;
4450   else
4451     {
4452       negative = tree_int_cst_compare (nested_in_vect_loop
4453 				       ? STMT_VINFO_DR_STEP (stmt_info)
4454 				       : DR_STEP (dr),
4455 				       size_zero_node) < 0;
4456       if (negative && ncopies > 1)
4457 	{
4458 	  if (dump_enabled_p ())
4459 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4460                              "multiple types with negative step.");
4461 	  return false;
4462 	}
4463 
4464       if (negative)
4465 	{
4466 	  gcc_assert (!grouped_load);
4467 	  alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4468 	  if (alignment_support_scheme != dr_aligned
4469 	      && alignment_support_scheme != dr_unaligned_supported)
4470 	    {
4471               if (dump_enabled_p ())
4472                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4473                                  "negative step but alignment required.");
4474 	      return false;
4475 	    }
4476 	  if (!perm_mask_for_reverse (vectype))
4477 	    {
4478               if (dump_enabled_p ())
4479                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4480                                  "negative step and reversing not supported.");
4481 	      return false;
4482 	    }
4483 	}
4484     }
4485 
4486   if (!vec_stmt) /* transformation not required.  */
4487     {
4488       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4489       vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4490       return true;
4491     }
4492 
4493   if (dump_enabled_p ())
4494     dump_printf_loc (MSG_NOTE, vect_location,
4495                      "transform load. ncopies = %d", ncopies);
4496 
4497   /** Transform.  **/
4498 
4499   if (STMT_VINFO_GATHER_P (stmt_info))
4500     {
4501       tree vec_oprnd0 = NULL_TREE, op;
4502       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4503       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4504       tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4505       edge pe = loop_preheader_edge (loop);
4506       gimple_seq seq;
4507       basic_block new_bb;
4508       enum { NARROW, NONE, WIDEN } modifier;
4509       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4510 
4511       if (nunits == gather_off_nunits)
4512 	modifier = NONE;
4513       else if (nunits == gather_off_nunits / 2)
4514 	{
4515 	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4516 	  modifier = WIDEN;
4517 
4518 	  for (i = 0; i < gather_off_nunits; ++i)
4519 	    sel[i] = i | nunits;
4520 
4521 	  perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4522 	  gcc_assert (perm_mask != NULL_TREE);
4523 	}
4524       else if (nunits == gather_off_nunits * 2)
4525 	{
4526 	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4527 	  modifier = NARROW;
4528 
4529 	  for (i = 0; i < nunits; ++i)
4530 	    sel[i] = i < gather_off_nunits
4531 		     ? i : i + nunits - gather_off_nunits;
4532 
4533 	  perm_mask = vect_gen_perm_mask (vectype, sel);
4534 	  gcc_assert (perm_mask != NULL_TREE);
4535 	  ncopies *= 2;
4536 	}
4537       else
4538 	gcc_unreachable ();
4539 
4540       rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4541       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4542       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4543       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4544       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4545       scaletype = TREE_VALUE (arglist);
4546       gcc_checking_assert (types_compatible_p (srctype, rettype)
4547 			   && types_compatible_p (srctype, masktype));
4548 
4549       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4550 
4551       ptr = fold_convert (ptrtype, gather_base);
4552       if (!is_gimple_min_invariant (ptr))
4553 	{
4554 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4555 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4556 	  gcc_assert (!new_bb);
4557 	}
4558 
4559       /* Currently we support only unconditional gather loads,
4560 	 so mask should be all ones.  */
4561       if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4562 	mask = build_int_cst (TREE_TYPE (masktype), -1);
4563       else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4564 	{
4565 	  REAL_VALUE_TYPE r;
4566 	  long tmp[6];
4567 	  for (j = 0; j < 6; ++j)
4568 	    tmp[j] = -1;
4569 	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4570 	  mask = build_real (TREE_TYPE (masktype), r);
4571 	}
4572       else
4573 	gcc_unreachable ();
4574       mask = build_vector_from_val (masktype, mask);
4575       mask = vect_init_vector (stmt, mask, masktype, NULL);
4576 
4577       scale = build_int_cst (scaletype, gather_scale);
4578 
4579       prev_stmt_info = NULL;
4580       for (j = 0; j < ncopies; ++j)
4581 	{
4582 	  if (modifier == WIDEN && (j & 1))
4583 	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4584 				       perm_mask, stmt, gsi);
4585 	  else if (j == 0)
4586 	    op = vec_oprnd0
4587 	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4588 	  else
4589 	    op = vec_oprnd0
4590 	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4591 
4592 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4593 	    {
4594 	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4595 			  == TYPE_VECTOR_SUBPARTS (idxtype));
4596 	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4597 	      var = make_ssa_name (var, NULL);
4598 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4599 	      new_stmt
4600 		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4601 						op, NULL_TREE);
4602 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4603 	      op = var;
4604 	    }
4605 
4606 	  new_stmt
4607 	    = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4608 
4609 	  if (!useless_type_conversion_p (vectype, rettype))
4610 	    {
4611 	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4612 			  == TYPE_VECTOR_SUBPARTS (rettype));
4613 	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4614 	      op = make_ssa_name (var, new_stmt);
4615 	      gimple_call_set_lhs (new_stmt, op);
4616 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4617 	      var = make_ssa_name (vec_dest, NULL);
4618 	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4619 	      new_stmt
4620 		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4621 						NULL_TREE);
4622 	    }
4623 	  else
4624 	    {
4625 	      var = make_ssa_name (vec_dest, new_stmt);
4626 	      gimple_call_set_lhs (new_stmt, var);
4627 	    }
4628 
4629 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4630 
4631 	  if (modifier == NARROW)
4632 	    {
4633 	      if ((j & 1) == 0)
4634 		{
4635 		  prev_res = var;
4636 		  continue;
4637 		}
4638 	      var = permute_vec_elements (prev_res, var,
4639 					  perm_mask, stmt, gsi);
4640 	      new_stmt = SSA_NAME_DEF_STMT (var);
4641 	    }
4642 
4643 	  if (prev_stmt_info == NULL)
4644 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4645 	  else
4646 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4647 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4648 	}
4649       return true;
4650     }
4651   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4652     {
4653       gimple_stmt_iterator incr_gsi;
4654       bool insert_after;
4655       gimple incr;
4656       tree offvar;
4657       tree ivstep;
4658       tree running_off;
4659       vec<constructor_elt, va_gc> *v = NULL;
4660       gimple_seq stmts = NULL;
4661       tree stride_base, stride_step, alias_off;
4662 
4663       gcc_assert (!nested_in_vect_loop);
4664 
4665       stride_base
4666 	= fold_build_pointer_plus
4667 	    (unshare_expr (DR_BASE_ADDRESS (dr)),
4668 	     size_binop (PLUS_EXPR,
4669 			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4670 			 convert_to_ptrofftype (DR_INIT(dr))));
4671       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4672 
4673       /* For a load with loop-invariant (but other than power-of-2)
4674          stride (i.e. not a grouped access) like so:
4675 
4676 	   for (i = 0; i < n; i += stride)
4677 	     ... = array[i];
4678 
4679 	 we generate a new induction variable and new accesses to
4680 	 form a new vector (or vectors, depending on ncopies):
4681 
4682 	   for (j = 0; ; j += VF*stride)
4683 	     tmp1 = array[j];
4684 	     tmp2 = array[j + stride];
4685 	     ...
4686 	     vectemp = {tmp1, tmp2, ...}
4687          */
4688 
4689       ivstep = stride_step;
4690       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4691 			    build_int_cst (TREE_TYPE (ivstep), vf));
4692 
4693       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4694 
4695       create_iv (stride_base, ivstep, NULL,
4696 		 loop, &incr_gsi, insert_after,
4697 		 &offvar, NULL);
4698       incr = gsi_stmt (incr_gsi);
4699       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4700 
4701       stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4702       if (stmts)
4703 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4704 
4705       prev_stmt_info = NULL;
4706       running_off = offvar;
4707       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4708       for (j = 0; j < ncopies; j++)
4709 	{
4710 	  tree vec_inv;
4711 
4712 	  vec_alloc (v, nunits);
4713 	  for (i = 0; i < nunits; i++)
4714 	    {
4715 	      tree newref, newoff;
4716 	      gimple incr;
4717 	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
4718 			       running_off, alias_off);
4719 
4720 	      newref = force_gimple_operand_gsi (gsi, newref, true,
4721 						 NULL_TREE, true,
4722 						 GSI_SAME_STMT);
4723 	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4724 	      newoff = copy_ssa_name (running_off, NULL);
4725 	      incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4726 						   running_off, stride_step);
4727 	      vect_finish_stmt_generation (stmt, incr, gsi);
4728 
4729 	      running_off = newoff;
4730 	    }
4731 
4732 	  vec_inv = build_constructor (vectype, v);
4733 	  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4734 	  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4735 
4736 	  if (j == 0)
4737 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4738 	  else
4739 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4740 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4741 	}
4742       return true;
4743     }
4744 
4745   if (grouped_load)
4746     {
4747       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4748       if (slp
4749           && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4750 	  && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4751         first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4752 
4753       /* Check if the chain of loads is already vectorized.  */
4754       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4755 	{
4756 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4757 	  return true;
4758 	}
4759       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4760       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4761 
4762       /* VEC_NUM is the number of vect stmts to be created for this group.  */
4763       if (slp)
4764 	{
4765 	  grouped_load = false;
4766 	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4767           if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
4768             slp_perm = true;
4769     	}
4770       else
4771 	vec_num = group_size;
4772     }
4773   else
4774     {
4775       first_stmt = stmt;
4776       first_dr = dr;
4777       group_size = vec_num = 1;
4778     }
4779 
4780   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4781   gcc_assert (alignment_support_scheme);
4782   /* Targets with load-lane instructions must not require explicit
4783      realignment.  */
4784   gcc_assert (!load_lanes_p
4785 	      || alignment_support_scheme == dr_aligned
4786 	      || alignment_support_scheme == dr_unaligned_supported);
4787 
4788   /* In case the vectorization factor (VF) is bigger than the number
4789      of elements that we can fit in a vectype (nunits), we have to generate
4790      more than one vector stmt - i.e - we need to "unroll" the
4791      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4792      from one copy of the vector stmt to the next, in the field
4793      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4794      stages to find the correct vector defs to be used when vectorizing
4795      stmts that use the defs of the current stmt.  The example below
4796      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4797      need to create 4 vectorized stmts):
4798 
4799      before vectorization:
4800                                 RELATED_STMT    VEC_STMT
4801         S1:     x = memref      -               -
4802         S2:     z = x + 1       -               -
4803 
4804      step 1: vectorize stmt S1:
4805         We first create the vector stmt VS1_0, and, as usual, record a
4806         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4807         Next, we create the vector stmt VS1_1, and record a pointer to
4808         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4809         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
4810         stmts and pointers:
4811                                 RELATED_STMT    VEC_STMT
4812         VS1_0:  vx0 = memref0   VS1_1           -
4813         VS1_1:  vx1 = memref1   VS1_2           -
4814         VS1_2:  vx2 = memref2   VS1_3           -
4815         VS1_3:  vx3 = memref3   -               -
4816         S1:     x = load        -               VS1_0
4817         S2:     z = x + 1       -               -
4818 
4819      See in documentation in vect_get_vec_def_for_stmt_copy for how the
4820      information we recorded in RELATED_STMT field is used to vectorize
4821      stmt S2.  */
4822 
4823   /* In case of interleaving (non-unit grouped access):
4824 
4825      S1:  x2 = &base + 2
4826      S2:  x0 = &base
4827      S3:  x1 = &base + 1
4828      S4:  x3 = &base + 3
4829 
4830      Vectorized loads are created in the order of memory accesses
4831      starting from the access of the first stmt of the chain:
4832 
4833      VS1: vx0 = &base
4834      VS2: vx1 = &base + vec_size*1
4835      VS3: vx3 = &base + vec_size*2
4836      VS4: vx4 = &base + vec_size*3
4837 
4838      Then permutation statements are generated:
4839 
4840      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4841      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4842        ...
4843 
4844      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4845      (the order of the data-refs in the output of vect_permute_load_chain
4846      corresponds to the order of scalar stmts in the interleaving chain - see
4847      the documentation of vect_permute_load_chain()).
4848      The generation of permutation stmts and recording them in
4849      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4850 
4851      In case of both multiple types and interleaving, the vector loads and
4852      permutation stmts above are created for every copy.  The result vector
4853      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4854      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
4855 
4856   /* If the data reference is aligned (dr_aligned) or potentially unaligned
4857      on a target that supports unaligned accesses (dr_unaligned_supported)
4858      we generate the following code:
4859          p = initial_addr;
4860          indx = 0;
4861          loop {
4862 	   p = p + indx * vectype_size;
4863            vec_dest = *(p);
4864            indx = indx + 1;
4865          }
4866 
4867      Otherwise, the data reference is potentially unaligned on a target that
4868      does not support unaligned accesses (dr_explicit_realign_optimized) -
4869      then generate the following code, in which the data in each iteration is
4870      obtained by two vector loads, one from the previous iteration, and one
4871      from the current iteration:
4872          p1 = initial_addr;
4873          msq_init = *(floor(p1))
4874          p2 = initial_addr + VS - 1;
4875          realignment_token = call target_builtin;
4876          indx = 0;
4877          loop {
4878            p2 = p2 + indx * vectype_size
4879            lsq = *(floor(p2))
4880            vec_dest = realign_load (msq, lsq, realignment_token)
4881            indx = indx + 1;
4882            msq = lsq;
4883          }   */
4884 
4885   /* If the misalignment remains the same throughout the execution of the
4886      loop, we can create the init_addr and permutation mask at the loop
4887      preheader.  Otherwise, it needs to be created inside the loop.
4888      This can only occur when vectorizing memory accesses in the inner-loop
4889      nested within an outer-loop that is being vectorized.  */
4890 
4891   if (nested_in_vect_loop
4892       && (TREE_INT_CST_LOW (DR_STEP (dr))
4893 	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4894     {
4895       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4896       compute_in_loop = true;
4897     }
4898 
4899   if ((alignment_support_scheme == dr_explicit_realign_optimized
4900        || alignment_support_scheme == dr_explicit_realign)
4901       && !compute_in_loop)
4902     {
4903       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4904 				    alignment_support_scheme, NULL_TREE,
4905 				    &at_loop);
4906       if (alignment_support_scheme == dr_explicit_realign_optimized)
4907 	{
4908 	  phi = SSA_NAME_DEF_STMT (msq);
4909 	  offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4910 	}
4911     }
4912   else
4913     at_loop = loop;
4914 
4915   if (negative)
4916     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4917 
4918   if (load_lanes_p)
4919     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4920   else
4921     aggr_type = vectype;
4922 
4923   prev_stmt_info = NULL;
4924   for (j = 0; j < ncopies; j++)
4925     {
4926       /* 1. Create the vector or array pointer update chain.  */
4927       if (j == 0)
4928         dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4929 						offset, &dummy, gsi,
4930 						&ptr_incr, false, &inv_p);
4931       else
4932         dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4933 				       TYPE_SIZE_UNIT (aggr_type));
4934 
4935       if (grouped_load || slp_perm)
4936 	dr_chain.create (vec_num);
4937 
4938       if (load_lanes_p)
4939 	{
4940 	  tree vec_array;
4941 
4942 	  vec_array = create_vector_array (vectype, vec_num);
4943 
4944 	  /* Emit:
4945 	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
4946 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4947 	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4948 	  gimple_call_set_lhs (new_stmt, vec_array);
4949 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4950 
4951 	  /* Extract each vector into an SSA_NAME.  */
4952 	  for (i = 0; i < vec_num; i++)
4953 	    {
4954 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
4955 					    vec_array, i);
4956 	      dr_chain.quick_push (new_temp);
4957 	    }
4958 
4959 	  /* Record the mapping between SSA_NAMEs and statements.  */
4960 	  vect_record_grouped_load_vectors (stmt, dr_chain);
4961 	}
4962       else
4963 	{
4964 	  for (i = 0; i < vec_num; i++)
4965 	    {
4966 	      if (i > 0)
4967 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4968 					       stmt, NULL_TREE);
4969 
4970 	      /* 2. Create the vector-load in the loop.  */
4971 	      switch (alignment_support_scheme)
4972 		{
4973 		case dr_aligned:
4974 		case dr_unaligned_supported:
4975 		  {
4976 		    unsigned int align, misalign;
4977 
4978 		    data_ref
4979 		      = build2 (MEM_REF, vectype, dataref_ptr,
4980 				build_int_cst (reference_alias_ptr_type
4981 					       (DR_REF (first_dr)), 0));
4982 		    align = TYPE_ALIGN_UNIT (vectype);
4983 		    if (alignment_support_scheme == dr_aligned)
4984 		      {
4985 			gcc_assert (aligned_access_p (first_dr));
4986 			misalign = 0;
4987 		      }
4988 		    else if (DR_MISALIGNMENT (first_dr) == -1)
4989 		      {
4990 			TREE_TYPE (data_ref)
4991 			  = build_aligned_type (TREE_TYPE (data_ref),
4992 						TYPE_ALIGN (elem_type));
4993 			align = TYPE_ALIGN_UNIT (elem_type);
4994 			misalign = 0;
4995 		      }
4996 		    else
4997 		      {
4998 			TREE_TYPE (data_ref)
4999 			  = build_aligned_type (TREE_TYPE (data_ref),
5000 						TYPE_ALIGN (elem_type));
5001 			misalign = DR_MISALIGNMENT (first_dr);
5002 		      }
5003 		    set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5004 					    align, misalign);
5005 		    break;
5006 		  }
5007 		case dr_explicit_realign:
5008 		  {
5009 		    tree ptr, bump;
5010 		    tree vs_minus_1;
5011 
5012 		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5013 
5014 		    if (compute_in_loop)
5015 		      msq = vect_setup_realignment (first_stmt, gsi,
5016 						    &realignment_token,
5017 						    dr_explicit_realign,
5018 						    dataref_ptr, NULL);
5019 
5020 		    ptr = copy_ssa_name (dataref_ptr, NULL);
5021 		    new_stmt = gimple_build_assign_with_ops
5022 				 (BIT_AND_EXPR, ptr, dataref_ptr,
5023 				  build_int_cst
5024 				  (TREE_TYPE (dataref_ptr),
5025 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5026 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5027 		    data_ref
5028 		      = build2 (MEM_REF, vectype, ptr,
5029 				build_int_cst (reference_alias_ptr_type
5030 						 (DR_REF (first_dr)), 0));
5031 		    vec_dest = vect_create_destination_var (scalar_dest,
5032 							    vectype);
5033 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
5034 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5035 		    gimple_assign_set_lhs (new_stmt, new_temp);
5036 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5037 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5038 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5039 		    msq = new_temp;
5040 
5041 		    bump = size_binop (MULT_EXPR, vs_minus_1,
5042 				       TYPE_SIZE_UNIT (elem_type));
5043 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5044 		    new_stmt = gimple_build_assign_with_ops
5045 				 (BIT_AND_EXPR, NULL_TREE, ptr,
5046 				  build_int_cst
5047 				  (TREE_TYPE (ptr),
5048 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5049 		    ptr = copy_ssa_name (dataref_ptr, new_stmt);
5050 		    gimple_assign_set_lhs (new_stmt, ptr);
5051 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5052 		    data_ref
5053 		      = build2 (MEM_REF, vectype, ptr,
5054 				build_int_cst (reference_alias_ptr_type
5055 						 (DR_REF (first_dr)), 0));
5056 		    break;
5057 		  }
5058 		case dr_explicit_realign_optimized:
5059 		  new_temp = copy_ssa_name (dataref_ptr, NULL);
5060 		  new_stmt = gimple_build_assign_with_ops
5061 			       (BIT_AND_EXPR, new_temp, dataref_ptr,
5062 				build_int_cst
5063 				  (TREE_TYPE (dataref_ptr),
5064 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5065 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5066 		  data_ref
5067 		    = build2 (MEM_REF, vectype, new_temp,
5068 			      build_int_cst (reference_alias_ptr_type
5069 					       (DR_REF (first_dr)), 0));
5070 		  break;
5071 		default:
5072 		  gcc_unreachable ();
5073 		}
5074 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
5075 	      new_stmt = gimple_build_assign (vec_dest, data_ref);
5076 	      new_temp = make_ssa_name (vec_dest, new_stmt);
5077 	      gimple_assign_set_lhs (new_stmt, new_temp);
5078 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5079 
5080 	      /* 3. Handle explicit realignment if necessary/supported.
5081 		 Create in loop:
5082 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
5083 	      if (alignment_support_scheme == dr_explicit_realign_optimized
5084 		  || alignment_support_scheme == dr_explicit_realign)
5085 		{
5086 		  lsq = gimple_assign_lhs (new_stmt);
5087 		  if (!realignment_token)
5088 		    realignment_token = dataref_ptr;
5089 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
5090 		  new_stmt
5091 		    = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5092 						    vec_dest, msq, lsq,
5093 						    realignment_token);
5094 		  new_temp = make_ssa_name (vec_dest, new_stmt);
5095 		  gimple_assign_set_lhs (new_stmt, new_temp);
5096 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5097 
5098 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
5099 		    {
5100 		      gcc_assert (phi);
5101 		      if (i == vec_num - 1 && j == ncopies - 1)
5102 			add_phi_arg (phi, lsq,
5103 				     loop_latch_edge (containing_loop),
5104 				     UNKNOWN_LOCATION);
5105 		      msq = lsq;
5106 		    }
5107 		}
5108 
5109 	      /* 4. Handle invariant-load.  */
5110 	      if (inv_p && !bb_vinfo)
5111 		{
5112 		  gimple_stmt_iterator gsi2 = *gsi;
5113 		  gcc_assert (!grouped_load);
5114 		  gsi_next (&gsi2);
5115 		  new_temp = vect_init_vector (stmt, scalar_dest,
5116 					       vectype, &gsi2);
5117 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
5118 		}
5119 
5120 	      if (negative)
5121 		{
5122 		  tree perm_mask = perm_mask_for_reverse (vectype);
5123 		  new_temp = permute_vec_elements (new_temp, new_temp,
5124 						   perm_mask, stmt, gsi);
5125 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
5126 		}
5127 
5128 	      /* Collect vector loads and later create their permutation in
5129 		 vect_transform_grouped_load ().  */
5130 	      if (grouped_load || slp_perm)
5131 		dr_chain.quick_push (new_temp);
5132 
5133 	      /* Store vector loads in the corresponding SLP_NODE.  */
5134 	      if (slp && !slp_perm)
5135 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5136 	    }
5137 	}
5138 
5139       if (slp && !slp_perm)
5140 	continue;
5141 
5142       if (slp_perm)
5143         {
5144           if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5145                                              slp_node_instance, false))
5146             {
5147               dr_chain.release ();
5148               return false;
5149             }
5150         }
5151       else
5152         {
5153           if (grouped_load)
5154   	    {
5155 	      if (!load_lanes_p)
5156 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5157 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5158 	    }
5159           else
5160 	    {
5161 	      if (j == 0)
5162 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5163 	      else
5164 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5165 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
5166 	    }
5167         }
5168       dr_chain.release ();
5169     }
5170 
5171   return true;
5172 }
5173 
5174 /* Function vect_is_simple_cond.
5175 
5176    Input:
5177    LOOP - the loop that is being vectorized.
5178    COND - Condition that is checked for simple use.
5179 
5180    Output:
5181    *COMP_VECTYPE - the vector type for the comparison.
5182 
5183    Returns whether a COND can be vectorized.  Checks whether
5184    condition operands are supportable using vec_is_simple_use.  */
5185 
5186 static bool
vect_is_simple_cond(tree cond,gimple stmt,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,tree * comp_vectype)5187 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5188 		     bb_vec_info bb_vinfo, tree *comp_vectype)
5189 {
5190   tree lhs, rhs;
5191   tree def;
5192   enum vect_def_type dt;
5193   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5194 
5195   if (!COMPARISON_CLASS_P (cond))
5196     return false;
5197 
5198   lhs = TREE_OPERAND (cond, 0);
5199   rhs = TREE_OPERAND (cond, 1);
5200 
5201   if (TREE_CODE (lhs) == SSA_NAME)
5202     {
5203       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5204       if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5205 				 &lhs_def_stmt, &def, &dt, &vectype1))
5206 	return false;
5207     }
5208   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5209 	   && TREE_CODE (lhs) != FIXED_CST)
5210     return false;
5211 
5212   if (TREE_CODE (rhs) == SSA_NAME)
5213     {
5214       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5215       if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5216 				 &rhs_def_stmt, &def, &dt, &vectype2))
5217 	return false;
5218     }
5219   else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5220 	   && TREE_CODE (rhs) != FIXED_CST)
5221     return false;
5222 
5223   *comp_vectype = vectype1 ? vectype1 : vectype2;
5224   return true;
5225 }
5226 
5227 /* vectorizable_condition.
5228 
5229    Check if STMT is conditional modify expression that can be vectorized.
5230    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5231    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
5232    at GSI.
5233 
5234    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5235    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5236    else caluse if it is 2).
5237 
5238    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5239 
5240 bool
vectorizable_condition(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)5241 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5242 			gimple *vec_stmt, tree reduc_def, int reduc_index,
5243 			slp_tree slp_node)
5244 {
5245   tree scalar_dest = NULL_TREE;
5246   tree vec_dest = NULL_TREE;
5247   tree cond_expr, then_clause, else_clause;
5248   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5249   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5250   tree comp_vectype = NULL_TREE;
5251   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5252   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5253   tree vec_compare, vec_cond_expr;
5254   tree new_temp;
5255   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5256   tree def;
5257   enum vect_def_type dt, dts[4];
5258   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5259   int ncopies;
5260   enum tree_code code;
5261   stmt_vec_info prev_stmt_info = NULL;
5262   int i, j;
5263   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5264   vec<tree> vec_oprnds0 = vNULL;
5265   vec<tree> vec_oprnds1 = vNULL;
5266   vec<tree> vec_oprnds2 = vNULL;
5267   vec<tree> vec_oprnds3 = vNULL;
5268   tree vec_cmp_type = vectype;
5269 
5270   if (slp_node || PURE_SLP_STMT (stmt_info))
5271     ncopies = 1;
5272   else
5273     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5274 
5275   gcc_assert (ncopies >= 1);
5276   if (reduc_index && ncopies > 1)
5277     return false; /* FORNOW */
5278 
5279   if (reduc_index && STMT_SLP_TYPE (stmt_info))
5280     return false;
5281 
5282   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5283     return false;
5284 
5285   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5286       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5287            && reduc_def))
5288     return false;
5289 
5290   /* FORNOW: not yet supported.  */
5291   if (STMT_VINFO_LIVE_P (stmt_info))
5292     {
5293       if (dump_enabled_p ())
5294         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5295                          "value used after loop.");
5296       return false;
5297     }
5298 
5299   /* Is vectorizable conditional operation?  */
5300   if (!is_gimple_assign (stmt))
5301     return false;
5302 
5303   code = gimple_assign_rhs_code (stmt);
5304 
5305   if (code != COND_EXPR)
5306     return false;
5307 
5308   cond_expr = gimple_assign_rhs1 (stmt);
5309   then_clause = gimple_assign_rhs2 (stmt);
5310   else_clause = gimple_assign_rhs3 (stmt);
5311 
5312   if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5313 			    &comp_vectype)
5314       || !comp_vectype)
5315     return false;
5316 
5317   if (TREE_CODE (then_clause) == SSA_NAME)
5318     {
5319       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5320       if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5321 			       &then_def_stmt, &def, &dt))
5322 	return false;
5323     }
5324   else if (TREE_CODE (then_clause) != INTEGER_CST
5325 	   && TREE_CODE (then_clause) != REAL_CST
5326 	   && TREE_CODE (then_clause) != FIXED_CST)
5327     return false;
5328 
5329   if (TREE_CODE (else_clause) == SSA_NAME)
5330     {
5331       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5332       if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5333 			       &else_def_stmt, &def, &dt))
5334 	return false;
5335     }
5336   else if (TREE_CODE (else_clause) != INTEGER_CST
5337 	   && TREE_CODE (else_clause) != REAL_CST
5338 	   && TREE_CODE (else_clause) != FIXED_CST)
5339     return false;
5340 
5341   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5342     {
5343       unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5344       tree cmp_type = build_nonstandard_integer_type (prec, 1);
5345       vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5346       if (vec_cmp_type == NULL_TREE)
5347 	return false;
5348     }
5349 
5350   if (!vec_stmt)
5351     {
5352       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5353       return expand_vec_cond_expr_p (vectype, comp_vectype);
5354     }
5355 
5356   /* Transform.  */
5357 
5358   if (!slp_node)
5359     {
5360       vec_oprnds0.create (1);
5361       vec_oprnds1.create (1);
5362       vec_oprnds2.create (1);
5363       vec_oprnds3.create (1);
5364     }
5365 
5366   /* Handle def.  */
5367   scalar_dest = gimple_assign_lhs (stmt);
5368   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5369 
5370   /* Handle cond expr.  */
5371   for (j = 0; j < ncopies; j++)
5372     {
5373       gimple new_stmt = NULL;
5374       if (j == 0)
5375 	{
5376           if (slp_node)
5377             {
5378               vec<tree> ops;
5379 	      ops.create (4);
5380 	      vec<vec<tree> > vec_defs;
5381 
5382 	      vec_defs.create (4);
5383               ops.safe_push (TREE_OPERAND (cond_expr, 0));
5384               ops.safe_push (TREE_OPERAND (cond_expr, 1));
5385               ops.safe_push (then_clause);
5386               ops.safe_push (else_clause);
5387               vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5388 	      vec_oprnds3 = vec_defs.pop ();
5389 	      vec_oprnds2 = vec_defs.pop ();
5390 	      vec_oprnds1 = vec_defs.pop ();
5391 	      vec_oprnds0 = vec_defs.pop ();
5392 
5393               ops.release ();
5394               vec_defs.release ();
5395             }
5396           else
5397             {
5398 	      gimple gtemp;
5399 	      vec_cond_lhs =
5400 	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5401 					    stmt, NULL);
5402 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5403 				  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5404 
5405 	      vec_cond_rhs =
5406 		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5407 						stmt, NULL);
5408 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5409 				  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5410 	      if (reduc_index == 1)
5411 		vec_then_clause = reduc_def;
5412 	      else
5413 		{
5414 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5415 		 		  			      stmt, NULL);
5416 	          vect_is_simple_use (then_clause, stmt, loop_vinfo,
5417 					  NULL, &gtemp, &def, &dts[2]);
5418 		}
5419 	      if (reduc_index == 2)
5420 		vec_else_clause = reduc_def;
5421 	      else
5422 		{
5423 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5424 							      stmt, NULL);
5425 		  vect_is_simple_use (else_clause, stmt, loop_vinfo,
5426 				  NULL, &gtemp, &def, &dts[3]);
5427 		}
5428 	    }
5429 	}
5430       else
5431 	{
5432 	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5433 							 vec_oprnds0.pop ());
5434 	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5435 							 vec_oprnds1.pop ());
5436 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5437 							    vec_oprnds2.pop ());
5438 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5439 							    vec_oprnds3.pop ());
5440 	}
5441 
5442       if (!slp_node)
5443         {
5444 	  vec_oprnds0.quick_push (vec_cond_lhs);
5445 	  vec_oprnds1.quick_push (vec_cond_rhs);
5446 	  vec_oprnds2.quick_push (vec_then_clause);
5447 	  vec_oprnds3.quick_push (vec_else_clause);
5448 	}
5449 
5450       /* Arguments are ready.  Create the new vector stmt.  */
5451       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5452         {
5453           vec_cond_rhs = vec_oprnds1[i];
5454           vec_then_clause = vec_oprnds2[i];
5455           vec_else_clause = vec_oprnds3[i];
5456 
5457 	  vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5458 				vec_cond_lhs, vec_cond_rhs);
5459           vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5460  		         vec_compare, vec_then_clause, vec_else_clause);
5461 
5462           new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5463           new_temp = make_ssa_name (vec_dest, new_stmt);
5464           gimple_assign_set_lhs (new_stmt, new_temp);
5465           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5466           if (slp_node)
5467             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5468         }
5469 
5470         if (slp_node)
5471           continue;
5472 
5473         if (j == 0)
5474           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5475         else
5476           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5477 
5478         prev_stmt_info = vinfo_for_stmt (new_stmt);
5479     }
5480 
5481   vec_oprnds0.release ();
5482   vec_oprnds1.release ();
5483   vec_oprnds2.release ();
5484   vec_oprnds3.release ();
5485 
5486   return true;
5487 }
5488 
5489 
5490 /* Make sure the statement is vectorizable.  */
5491 
5492 bool
vect_analyze_stmt(gimple stmt,bool * need_to_vectorize,slp_tree node)5493 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5494 {
5495   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5496   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5497   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5498   bool ok;
5499   tree scalar_type, vectype;
5500   gimple pattern_stmt;
5501   gimple_seq pattern_def_seq;
5502 
5503   if (dump_enabled_p ())
5504     {
5505       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5506       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5507     }
5508 
5509   if (gimple_has_volatile_ops (stmt))
5510     {
5511       if (dump_enabled_p ())
5512         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5513                          "not vectorized: stmt has volatile operands");
5514 
5515       return false;
5516     }
5517 
5518   /* Skip stmts that do not need to be vectorized. In loops this is expected
5519      to include:
5520      - the COND_EXPR which is the loop exit condition
5521      - any LABEL_EXPRs in the loop
5522      - computations that are used only for array indexing or loop control.
5523      In basic blocks we only analyze statements that are a part of some SLP
5524      instance, therefore, all the statements are relevant.
5525 
5526      Pattern statement needs to be analyzed instead of the original statement
5527      if the original statement is not relevant.  Otherwise, we analyze both
5528      statements.  In basic blocks we are called from some SLP instance
5529      traversal, don't analyze pattern stmts instead, the pattern stmts
5530      already will be part of SLP instance.  */
5531 
5532   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5533   if (!STMT_VINFO_RELEVANT_P (stmt_info)
5534       && !STMT_VINFO_LIVE_P (stmt_info))
5535     {
5536       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5537           && pattern_stmt
5538           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5539               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5540         {
5541           /* Analyze PATTERN_STMT instead of the original stmt.  */
5542           stmt = pattern_stmt;
5543           stmt_info = vinfo_for_stmt (pattern_stmt);
5544           if (dump_enabled_p ())
5545             {
5546               dump_printf_loc (MSG_NOTE, vect_location,
5547                                "==> examining pattern statement: ");
5548               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5549             }
5550         }
5551       else
5552         {
5553           if (dump_enabled_p ())
5554             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5555 
5556           return true;
5557         }
5558     }
5559   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5560 	   && node == NULL
5561            && pattern_stmt
5562            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5563                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5564     {
5565       /* Analyze PATTERN_STMT too.  */
5566       if (dump_enabled_p ())
5567         {
5568           dump_printf_loc (MSG_NOTE, vect_location,
5569                            "==> examining pattern statement: ");
5570           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5571         }
5572 
5573       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5574         return false;
5575    }
5576 
5577   if (is_pattern_stmt_p (stmt_info)
5578       && node == NULL
5579       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5580     {
5581       gimple_stmt_iterator si;
5582 
5583       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5584 	{
5585 	  gimple pattern_def_stmt = gsi_stmt (si);
5586 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5587 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5588 	    {
5589 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
5590 	      if (dump_enabled_p ())
5591 		{
5592 		  dump_printf_loc (MSG_NOTE, vect_location,
5593                                    "==> examining pattern def statement: ");
5594 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5595 		}
5596 
5597 	      if (!vect_analyze_stmt (pattern_def_stmt,
5598 				      need_to_vectorize, node))
5599 		return false;
5600 	    }
5601 	}
5602     }
5603 
5604   switch (STMT_VINFO_DEF_TYPE (stmt_info))
5605     {
5606       case vect_internal_def:
5607         break;
5608 
5609       case vect_reduction_def:
5610       case vect_nested_cycle:
5611          gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5612                      || relevance == vect_used_in_outer_by_reduction
5613                      || relevance == vect_unused_in_scope));
5614          break;
5615 
5616       case vect_induction_def:
5617       case vect_constant_def:
5618       case vect_external_def:
5619       case vect_unknown_def_type:
5620       default:
5621         gcc_unreachable ();
5622     }
5623 
5624   if (bb_vinfo)
5625     {
5626       gcc_assert (PURE_SLP_STMT (stmt_info));
5627 
5628       scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5629       if (dump_enabled_p ())
5630         {
5631           dump_printf_loc (MSG_NOTE, vect_location,
5632                            "get vectype for scalar type:  ");
5633           dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5634         }
5635 
5636       vectype = get_vectype_for_scalar_type (scalar_type);
5637       if (!vectype)
5638         {
5639           if (dump_enabled_p ())
5640             {
5641                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5642                                 "not SLPed: unsupported data-type ");
5643                dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5644                                   scalar_type);
5645             }
5646           return false;
5647         }
5648 
5649       if (dump_enabled_p ())
5650         {
5651           dump_printf_loc (MSG_NOTE, vect_location, "vectype:  ");
5652           dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5653         }
5654 
5655       STMT_VINFO_VECTYPE (stmt_info) = vectype;
5656    }
5657 
5658   if (STMT_VINFO_RELEVANT_P (stmt_info))
5659     {
5660       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5661       gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5662       *need_to_vectorize = true;
5663     }
5664 
5665    ok = true;
5666    if (!bb_vinfo
5667        && (STMT_VINFO_RELEVANT_P (stmt_info)
5668            || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5669       ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5670             || vectorizable_shift (stmt, NULL, NULL, NULL)
5671             || vectorizable_operation (stmt, NULL, NULL, NULL)
5672             || vectorizable_assignment (stmt, NULL, NULL, NULL)
5673             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5674 	    || vectorizable_call (stmt, NULL, NULL, NULL)
5675             || vectorizable_store (stmt, NULL, NULL, NULL)
5676             || vectorizable_reduction (stmt, NULL, NULL, NULL)
5677             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5678     else
5679       {
5680         if (bb_vinfo)
5681 	  ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5682 		|| vectorizable_shift (stmt, NULL, NULL, node)
5683                 || vectorizable_operation (stmt, NULL, NULL, node)
5684                 || vectorizable_assignment (stmt, NULL, NULL, node)
5685                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5686 		|| vectorizable_call (stmt, NULL, NULL, node)
5687                 || vectorizable_store (stmt, NULL, NULL, node)
5688                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5689       }
5690 
5691   if (!ok)
5692     {
5693       if (dump_enabled_p ())
5694         {
5695           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5696                            "not vectorized: relevant stmt not ");
5697           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5698           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5699         }
5700 
5701       return false;
5702     }
5703 
5704   if (bb_vinfo)
5705     return true;
5706 
5707   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5708       need extra handling, except for vectorizable reductions.  */
5709   if (STMT_VINFO_LIVE_P (stmt_info)
5710       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5711     ok = vectorizable_live_operation (stmt, NULL, NULL);
5712 
5713   if (!ok)
5714     {
5715       if (dump_enabled_p ())
5716         {
5717           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718                            "not vectorized: live stmt not ");
5719           dump_printf (MSG_MISSED_OPTIMIZATION,  "supported: ");
5720           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5721         }
5722 
5723        return false;
5724     }
5725 
5726   return true;
5727 }
5728 
5729 
5730 /* Function vect_transform_stmt.
5731 
5732    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
5733 
5734 bool
vect_transform_stmt(gimple stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)5735 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5736 		     bool *grouped_store, slp_tree slp_node,
5737                      slp_instance slp_node_instance)
5738 {
5739   bool is_store = false;
5740   gimple vec_stmt = NULL;
5741   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5742   bool done;
5743 
5744   switch (STMT_VINFO_TYPE (stmt_info))
5745     {
5746     case type_demotion_vec_info_type:
5747     case type_promotion_vec_info_type:
5748     case type_conversion_vec_info_type:
5749       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5750       gcc_assert (done);
5751       break;
5752 
5753     case induc_vec_info_type:
5754       gcc_assert (!slp_node);
5755       done = vectorizable_induction (stmt, gsi, &vec_stmt);
5756       gcc_assert (done);
5757       break;
5758 
5759     case shift_vec_info_type:
5760       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5761       gcc_assert (done);
5762       break;
5763 
5764     case op_vec_info_type:
5765       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5766       gcc_assert (done);
5767       break;
5768 
5769     case assignment_vec_info_type:
5770       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5771       gcc_assert (done);
5772       break;
5773 
5774     case load_vec_info_type:
5775       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5776                                 slp_node_instance);
5777       gcc_assert (done);
5778       break;
5779 
5780     case store_vec_info_type:
5781       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5782       gcc_assert (done);
5783       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5784 	{
5785 	  /* In case of interleaving, the whole chain is vectorized when the
5786 	     last store in the chain is reached.  Store stmts before the last
5787 	     one are skipped, and there vec_stmt_info shouldn't be freed
5788 	     meanwhile.  */
5789 	  *grouped_store = true;
5790 	  if (STMT_VINFO_VEC_STMT (stmt_info))
5791 	    is_store = true;
5792 	  }
5793       else
5794 	is_store = true;
5795       break;
5796 
5797     case condition_vec_info_type:
5798       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5799       gcc_assert (done);
5800       break;
5801 
5802     case call_vec_info_type:
5803       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5804       stmt = gsi_stmt (*gsi);
5805       break;
5806 
5807     case reduc_vec_info_type:
5808       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5809       gcc_assert (done);
5810       break;
5811 
5812     default:
5813       if (!STMT_VINFO_LIVE_P (stmt_info))
5814 	{
5815 	  if (dump_enabled_p ())
5816 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817                              "stmt not supported.");
5818 	  gcc_unreachable ();
5819 	}
5820     }
5821 
5822   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5823      is being vectorized, but outside the immediately enclosing loop.  */
5824   if (vec_stmt
5825       && STMT_VINFO_LOOP_VINFO (stmt_info)
5826       && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5827                                 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5828       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5829       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5830           || STMT_VINFO_RELEVANT (stmt_info) ==
5831                                            vect_used_in_outer_by_reduction))
5832     {
5833       struct loop *innerloop = LOOP_VINFO_LOOP (
5834                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5835       imm_use_iterator imm_iter;
5836       use_operand_p use_p;
5837       tree scalar_dest;
5838       gimple exit_phi;
5839 
5840       if (dump_enabled_p ())
5841         dump_printf_loc (MSG_NOTE, vect_location,
5842                          "Record the vdef for outer-loop vectorization.");
5843 
5844       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5845         (to be used when vectorizing outer-loop stmts that use the DEF of
5846         STMT).  */
5847       if (gimple_code (stmt) == GIMPLE_PHI)
5848         scalar_dest = PHI_RESULT (stmt);
5849       else
5850         scalar_dest = gimple_assign_lhs (stmt);
5851 
5852       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5853        {
5854          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5855            {
5856              exit_phi = USE_STMT (use_p);
5857              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5858            }
5859        }
5860     }
5861 
5862   /* Handle stmts whose DEF is used outside the loop-nest that is
5863      being vectorized.  */
5864   if (STMT_VINFO_LIVE_P (stmt_info)
5865       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5866     {
5867       done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5868       gcc_assert (done);
5869     }
5870 
5871   if (vec_stmt)
5872     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5873 
5874   return is_store;
5875 }
5876 
5877 
5878 /* Remove a group of stores (for SLP or interleaving), free their
5879    stmt_vec_info.  */
5880 
5881 void
vect_remove_stores(gimple first_stmt)5882 vect_remove_stores (gimple first_stmt)
5883 {
5884   gimple next = first_stmt;
5885   gimple tmp;
5886   gimple_stmt_iterator next_si;
5887 
5888   while (next)
5889     {
5890       stmt_vec_info stmt_info = vinfo_for_stmt (next);
5891 
5892       tmp = GROUP_NEXT_ELEMENT (stmt_info);
5893       if (is_pattern_stmt_p (stmt_info))
5894 	next = STMT_VINFO_RELATED_STMT (stmt_info);
5895       /* Free the attached stmt_vec_info and remove the stmt.  */
5896       next_si = gsi_for_stmt (next);
5897       unlink_stmt_vdef (next);
5898       gsi_remove (&next_si, true);
5899       release_defs (next);
5900       free_stmt_vec_info (next);
5901       next = tmp;
5902     }
5903 }
5904 
5905 
5906 /* Function new_stmt_vec_info.
5907 
5908    Create and initialize a new stmt_vec_info struct for STMT.  */
5909 
5910 stmt_vec_info
new_stmt_vec_info(gimple stmt,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)5911 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5912                    bb_vec_info bb_vinfo)
5913 {
5914   stmt_vec_info res;
5915   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5916 
5917   STMT_VINFO_TYPE (res) = undef_vec_info_type;
5918   STMT_VINFO_STMT (res) = stmt;
5919   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5920   STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5921   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5922   STMT_VINFO_LIVE_P (res) = false;
5923   STMT_VINFO_VECTYPE (res) = NULL;
5924   STMT_VINFO_VEC_STMT (res) = NULL;
5925   STMT_VINFO_VECTORIZABLE (res) = true;
5926   STMT_VINFO_IN_PATTERN_P (res) = false;
5927   STMT_VINFO_RELATED_STMT (res) = NULL;
5928   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5929   STMT_VINFO_DATA_REF (res) = NULL;
5930 
5931   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5932   STMT_VINFO_DR_OFFSET (res) = NULL;
5933   STMT_VINFO_DR_INIT (res) = NULL;
5934   STMT_VINFO_DR_STEP (res) = NULL;
5935   STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5936 
5937   if (gimple_code (stmt) == GIMPLE_PHI
5938       && is_loop_header_bb_p (gimple_bb (stmt)))
5939     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5940   else
5941     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5942 
5943   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
5944   STMT_SLP_TYPE (res) = loop_vect;
5945   GROUP_FIRST_ELEMENT (res) = NULL;
5946   GROUP_NEXT_ELEMENT (res) = NULL;
5947   GROUP_SIZE (res) = 0;
5948   GROUP_STORE_COUNT (res) = 0;
5949   GROUP_GAP (res) = 0;
5950   GROUP_SAME_DR_STMT (res) = NULL;
5951   GROUP_READ_WRITE_DEPENDENCE (res) = false;
5952 
5953   return res;
5954 }
5955 
5956 
5957 /* Create a hash table for stmt_vec_info. */
5958 
5959 void
init_stmt_vec_info_vec(void)5960 init_stmt_vec_info_vec (void)
5961 {
5962   gcc_assert (!stmt_vec_info_vec.exists ());
5963   stmt_vec_info_vec.create (50);
5964 }
5965 
5966 
5967 /* Free hash table for stmt_vec_info. */
5968 
5969 void
free_stmt_vec_info_vec(void)5970 free_stmt_vec_info_vec (void)
5971 {
5972   unsigned int i;
5973   vec_void_p info;
5974   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
5975     if (info != NULL)
5976       free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
5977   gcc_assert (stmt_vec_info_vec.exists ());
5978   stmt_vec_info_vec.release ();
5979 }
5980 
5981 
5982 /* Free stmt vectorization related info.  */
5983 
5984 void
free_stmt_vec_info(gimple stmt)5985 free_stmt_vec_info (gimple stmt)
5986 {
5987   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5988 
5989   if (!stmt_info)
5990     return;
5991 
5992   /* Check if this statement has a related "pattern stmt"
5993      (introduced by the vectorizer during the pattern recognition
5994      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5995      too.  */
5996   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5997     {
5998       stmt_vec_info patt_info
5999 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6000       if (patt_info)
6001 	{
6002 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6003 	  if (seq)
6004 	    {
6005 	      gimple_stmt_iterator si;
6006 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6007 		free_stmt_vec_info (gsi_stmt (si));
6008 	    }
6009 	  free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6010 	}
6011     }
6012 
6013   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6014   set_vinfo_for_stmt (stmt, NULL);
6015   free (stmt_info);
6016 }
6017 
6018 
6019 /* Function get_vectype_for_scalar_type_and_size.
6020 
6021    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
6022    by the target.  */
6023 
6024 static tree
get_vectype_for_scalar_type_and_size(tree scalar_type,unsigned size)6025 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6026 {
6027   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6028   enum machine_mode simd_mode;
6029   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6030   int nunits;
6031   tree vectype;
6032 
6033   if (nbytes == 0)
6034     return NULL_TREE;
6035 
6036   if (GET_MODE_CLASS (inner_mode) != MODE_INT
6037       && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6038     return NULL_TREE;
6039 
6040   /* For vector types of elements whose mode precision doesn't
6041      match their types precision we use a element type of mode
6042      precision.  The vectorization routines will have to make sure
6043      they support the proper result truncation/extension.
6044      We also make sure to build vector types with INTEGER_TYPE
6045      component type only.  */
6046   if (INTEGRAL_TYPE_P (scalar_type)
6047       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6048 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
6049     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6050 						  TYPE_UNSIGNED (scalar_type));
6051 
6052   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6053      When the component mode passes the above test simply use a type
6054      corresponding to that mode.  The theory is that any use that
6055      would cause problems with this will disable vectorization anyway.  */
6056   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6057 	   && !INTEGRAL_TYPE_P (scalar_type)
6058 	   && !POINTER_TYPE_P (scalar_type))
6059     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6060 
6061   /* We can't build a vector type of elements with alignment bigger than
6062      their size.  */
6063   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6064     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6065 						  TYPE_UNSIGNED (scalar_type));
6066 
6067   /* If we felt back to using the mode fail if there was
6068      no scalar type for it.  */
6069   if (scalar_type == NULL_TREE)
6070     return NULL_TREE;
6071 
6072   /* If no size was supplied use the mode the target prefers.   Otherwise
6073      lookup a vector mode of the specified size.  */
6074   if (size == 0)
6075     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6076   else
6077     simd_mode = mode_for_vector (inner_mode, size / nbytes);
6078   nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6079   if (nunits <= 1)
6080     return NULL_TREE;
6081 
6082   vectype = build_vector_type (scalar_type, nunits);
6083   if (dump_enabled_p ())
6084     {
6085       dump_printf_loc (MSG_NOTE, vect_location,
6086                        "get vectype with %d units of type ", nunits);
6087       dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6088     }
6089 
6090   if (!vectype)
6091     return NULL_TREE;
6092 
6093   if (dump_enabled_p ())
6094     {
6095       dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6096       dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
6097     }
6098 
6099   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6100       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6101     {
6102       if (dump_enabled_p ())
6103         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6104                          "mode not supported by target.");
6105       return NULL_TREE;
6106     }
6107 
6108   return vectype;
6109 }
6110 
6111 unsigned int current_vector_size;
6112 
6113 /* Function get_vectype_for_scalar_type.
6114 
6115    Returns the vector type corresponding to SCALAR_TYPE as supported
6116    by the target.  */
6117 
6118 tree
get_vectype_for_scalar_type(tree scalar_type)6119 get_vectype_for_scalar_type (tree scalar_type)
6120 {
6121   tree vectype;
6122   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6123 						  current_vector_size);
6124   if (vectype
6125       && current_vector_size == 0)
6126     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6127   return vectype;
6128 }
6129 
6130 /* Function get_same_sized_vectype
6131 
6132    Returns a vector type corresponding to SCALAR_TYPE of size
6133    VECTOR_TYPE if supported by the target.  */
6134 
6135 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)6136 get_same_sized_vectype (tree scalar_type, tree vector_type)
6137 {
6138   return get_vectype_for_scalar_type_and_size
6139 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6140 }
6141 
6142 /* Function vect_is_simple_use.
6143 
6144    Input:
6145    LOOP_VINFO - the vect info of the loop that is being vectorized.
6146    BB_VINFO - the vect info of the basic block that is being vectorized.
6147    OPERAND - operand of STMT in the loop or bb.
6148    DEF - the defining stmt in case OPERAND is an SSA_NAME.
6149 
6150    Returns whether a stmt with OPERAND can be vectorized.
6151    For loops, supportable operands are constants, loop invariants, and operands
6152    that are defined by the current iteration of the loop.  Unsupportable
6153    operands are those that are defined by a previous iteration of the loop (as
6154    is the case in reduction/induction computations).
6155    For basic blocks, supportable operands are constants and bb invariants.
6156    For now, operands defined outside the basic block are not supported.  */
6157 
6158 bool
vect_is_simple_use(tree operand,gimple stmt,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,gimple * def_stmt,tree * def,enum vect_def_type * dt)6159 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6160                     bb_vec_info bb_vinfo, gimple *def_stmt,
6161 		    tree *def, enum vect_def_type *dt)
6162 {
6163   basic_block bb;
6164   stmt_vec_info stmt_vinfo;
6165   struct loop *loop = NULL;
6166 
6167   if (loop_vinfo)
6168     loop = LOOP_VINFO_LOOP (loop_vinfo);
6169 
6170   *def_stmt = NULL;
6171   *def = NULL_TREE;
6172 
6173   if (dump_enabled_p ())
6174     {
6175       dump_printf_loc (MSG_NOTE, vect_location,
6176                        "vect_is_simple_use: operand ");
6177       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6178     }
6179 
6180   if (CONSTANT_CLASS_P (operand))
6181     {
6182       *dt = vect_constant_def;
6183       return true;
6184     }
6185 
6186   if (is_gimple_min_invariant (operand))
6187     {
6188       *def = operand;
6189       *dt = vect_external_def;
6190       return true;
6191     }
6192 
6193   if (TREE_CODE (operand) == PAREN_EXPR)
6194     {
6195       if (dump_enabled_p ())
6196         dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6197       operand = TREE_OPERAND (operand, 0);
6198     }
6199 
6200   if (TREE_CODE (operand) != SSA_NAME)
6201     {
6202       if (dump_enabled_p ())
6203         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6204                          "not ssa-name.");
6205       return false;
6206     }
6207 
6208   *def_stmt = SSA_NAME_DEF_STMT (operand);
6209   if (*def_stmt == NULL)
6210     {
6211       if (dump_enabled_p ())
6212         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213                          "no def_stmt.");
6214       return false;
6215     }
6216 
6217   if (dump_enabled_p ())
6218     {
6219       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6220       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6221     }
6222 
6223   /* Empty stmt is expected only in case of a function argument.
6224      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
6225   if (gimple_nop_p (*def_stmt))
6226     {
6227       *def = operand;
6228       *dt = vect_external_def;
6229       return true;
6230     }
6231 
6232   bb = gimple_bb (*def_stmt);
6233 
6234   if ((loop && !flow_bb_inside_loop_p (loop, bb))
6235       || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6236       || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6237     *dt = vect_external_def;
6238   else
6239     {
6240       stmt_vinfo = vinfo_for_stmt (*def_stmt);
6241       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6242     }
6243 
6244   if (*dt == vect_unknown_def_type
6245       || (stmt
6246 	  && *dt == vect_double_reduction_def
6247 	  && gimple_code (stmt) != GIMPLE_PHI))
6248     {
6249       if (dump_enabled_p ())
6250         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251                          "Unsupported pattern.");
6252       return false;
6253     }
6254 
6255   if (dump_enabled_p ())
6256     dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6257 
6258   switch (gimple_code (*def_stmt))
6259     {
6260     case GIMPLE_PHI:
6261       *def = gimple_phi_result (*def_stmt);
6262       break;
6263 
6264     case GIMPLE_ASSIGN:
6265       *def = gimple_assign_lhs (*def_stmt);
6266       break;
6267 
6268     case GIMPLE_CALL:
6269       *def = gimple_call_lhs (*def_stmt);
6270       if (*def != NULL)
6271 	break;
6272       /* FALLTHRU */
6273     default:
6274       if (dump_enabled_p ())
6275         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6276                          "unsupported defining stmt: ");
6277       return false;
6278     }
6279 
6280   return true;
6281 }
6282 
6283 /* Function vect_is_simple_use_1.
6284 
6285    Same as vect_is_simple_use_1 but also determines the vector operand
6286    type of OPERAND and stores it to *VECTYPE.  If the definition of
6287    OPERAND is vect_uninitialized_def, vect_constant_def or
6288    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6289    is responsible to compute the best suited vector type for the
6290    scalar operand.  */
6291 
6292 bool
vect_is_simple_use_1(tree operand,gimple stmt,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,gimple * def_stmt,tree * def,enum vect_def_type * dt,tree * vectype)6293 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6294 		      bb_vec_info bb_vinfo, gimple *def_stmt,
6295 		      tree *def, enum vect_def_type *dt, tree *vectype)
6296 {
6297   if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6298 			   def, dt))
6299     return false;
6300 
6301   /* Now get a vector type if the def is internal, otherwise supply
6302      NULL_TREE and leave it up to the caller to figure out a proper
6303      type for the use stmt.  */
6304   if (*dt == vect_internal_def
6305       || *dt == vect_induction_def
6306       || *dt == vect_reduction_def
6307       || *dt == vect_double_reduction_def
6308       || *dt == vect_nested_cycle)
6309     {
6310       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6311 
6312       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6313           && !STMT_VINFO_RELEVANT (stmt_info)
6314           && !STMT_VINFO_LIVE_P (stmt_info))
6315 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6316 
6317       *vectype = STMT_VINFO_VECTYPE (stmt_info);
6318       gcc_assert (*vectype != NULL_TREE);
6319     }
6320   else if (*dt == vect_uninitialized_def
6321 	   || *dt == vect_constant_def
6322 	   || *dt == vect_external_def)
6323     *vectype = NULL_TREE;
6324   else
6325     gcc_unreachable ();
6326 
6327   return true;
6328 }
6329 
6330 
6331 /* Function supportable_widening_operation
6332 
6333    Check whether an operation represented by the code CODE is a
6334    widening operation that is supported by the target platform in
6335    vector form (i.e., when operating on arguments of type VECTYPE_IN
6336    producing a result of type VECTYPE_OUT).
6337 
6338    Widening operations we currently support are NOP (CONVERT), FLOAT
6339    and WIDEN_MULT.  This function checks if these operations are supported
6340    by the target platform either directly (via vector tree-codes), or via
6341    target builtins.
6342 
6343    Output:
6344    - CODE1 and CODE2 are codes of vector operations to be used when
6345    vectorizing the operation, if available.
6346    - MULTI_STEP_CVT determines the number of required intermediate steps in
6347    case of multi-step conversion (like char->short->int - in that case
6348    MULTI_STEP_CVT will be 1).
6349    - INTERM_TYPES contains the intermediate type required to perform the
6350    widening operation (short in the above example).  */
6351 
6352 bool
supportable_widening_operation(enum tree_code code,gimple stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)6353 supportable_widening_operation (enum tree_code code, gimple stmt,
6354 				tree vectype_out, tree vectype_in,
6355                                 enum tree_code *code1, enum tree_code *code2,
6356                                 int *multi_step_cvt,
6357                                 vec<tree> *interm_types)
6358 {
6359   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6360   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6361   struct loop *vect_loop = NULL;
6362   enum machine_mode vec_mode;
6363   enum insn_code icode1, icode2;
6364   optab optab1, optab2;
6365   tree vectype = vectype_in;
6366   tree wide_vectype = vectype_out;
6367   enum tree_code c1, c2;
6368   int i;
6369   tree prev_type, intermediate_type;
6370   enum machine_mode intermediate_mode, prev_mode;
6371   optab optab3, optab4;
6372 
6373   *multi_step_cvt = 0;
6374   if (loop_info)
6375     vect_loop = LOOP_VINFO_LOOP (loop_info);
6376 
6377   switch (code)
6378     {
6379     case WIDEN_MULT_EXPR:
6380       /* The result of a vectorized widening operation usually requires
6381 	 two vectors (because the widened results do not fit into one vector).
6382 	 The generated vector results would normally be expected to be
6383 	 generated in the same order as in the original scalar computation,
6384 	 i.e. if 8 results are generated in each vector iteration, they are
6385 	 to be organized as follows:
6386 		vect1: [res1,res2,res3,res4],
6387 		vect2: [res5,res6,res7,res8].
6388 
6389 	 However, in the special case that the result of the widening
6390 	 operation is used in a reduction computation only, the order doesn't
6391 	 matter (because when vectorizing a reduction we change the order of
6392 	 the computation).  Some targets can take advantage of this and
6393 	 generate more efficient code.  For example, targets like Altivec,
6394 	 that support widen_mult using a sequence of {mult_even,mult_odd}
6395 	 generate the following vectors:
6396 		vect1: [res1,res3,res5,res7],
6397 		vect2: [res2,res4,res6,res8].
6398 
6399 	 When vectorizing outer-loops, we execute the inner-loop sequentially
6400 	 (each vectorized inner-loop iteration contributes to VF outer-loop
6401 	 iterations in parallel).  We therefore don't allow to change the
6402 	 order of the computation in the inner-loop during outer-loop
6403 	 vectorization.  */
6404       /* TODO: Another case in which order doesn't *really* matter is when we
6405 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
6406 	 Normally, pack_trunc performs an even/odd permute, whereas the
6407 	 repack from an even/odd expansion would be an interleave, which
6408 	 would be significantly simpler for e.g. AVX2.  */
6409       /* In any case, in order to avoid duplicating the code below, recurse
6410 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
6411 	 are properly set up for the caller.  If we fail, we'll continue with
6412 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
6413       if (vect_loop
6414 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6415 	  && !nested_in_vect_loop_p (vect_loop, stmt)
6416 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6417 					     stmt, vectype_out, vectype_in,
6418 					     code1, code2, multi_step_cvt,
6419 					     interm_types))
6420 	return true;
6421       c1 = VEC_WIDEN_MULT_LO_EXPR;
6422       c2 = VEC_WIDEN_MULT_HI_EXPR;
6423       break;
6424 
6425     case VEC_WIDEN_MULT_EVEN_EXPR:
6426       /* Support the recursion induced just above.  */
6427       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6428       c2 = VEC_WIDEN_MULT_ODD_EXPR;
6429       break;
6430 
6431     case WIDEN_LSHIFT_EXPR:
6432       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6433       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6434       break;
6435 
6436     CASE_CONVERT:
6437       c1 = VEC_UNPACK_LO_EXPR;
6438       c2 = VEC_UNPACK_HI_EXPR;
6439       break;
6440 
6441     case FLOAT_EXPR:
6442       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6443       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6444       break;
6445 
6446     case FIX_TRUNC_EXPR:
6447       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6448 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6449 	 computing the operation.  */
6450       return false;
6451 
6452     default:
6453       gcc_unreachable ();
6454     }
6455 
6456   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6457     {
6458       enum tree_code ctmp = c1;
6459       c1 = c2;
6460       c2 = ctmp;
6461     }
6462 
6463   if (code == FIX_TRUNC_EXPR)
6464     {
6465       /* The signedness is determined from output operand.  */
6466       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6467       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6468     }
6469   else
6470     {
6471       optab1 = optab_for_tree_code (c1, vectype, optab_default);
6472       optab2 = optab_for_tree_code (c2, vectype, optab_default);
6473     }
6474 
6475   if (!optab1 || !optab2)
6476     return false;
6477 
6478   vec_mode = TYPE_MODE (vectype);
6479   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6480        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6481     return false;
6482 
6483   *code1 = c1;
6484   *code2 = c2;
6485 
6486   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6487       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6488     return true;
6489 
6490   /* Check if it's a multi-step conversion that can be done using intermediate
6491      types.  */
6492 
6493   prev_type = vectype;
6494   prev_mode = vec_mode;
6495 
6496   if (!CONVERT_EXPR_CODE_P (code))
6497     return false;
6498 
6499   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6500      intermediate steps in promotion sequence.  We try
6501      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6502      not.  */
6503   interm_types->create (MAX_INTERM_CVT_STEPS);
6504   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6505     {
6506       intermediate_mode = insn_data[icode1].operand[0].mode;
6507       intermediate_type
6508 	= lang_hooks.types.type_for_mode (intermediate_mode,
6509 					  TYPE_UNSIGNED (prev_type));
6510       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6511       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6512 
6513       if (!optab3 || !optab4
6514           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6515 	  || insn_data[icode1].operand[0].mode != intermediate_mode
6516 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6517 	  || insn_data[icode2].operand[0].mode != intermediate_mode
6518 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
6519 	      == CODE_FOR_nothing)
6520 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
6521 	      == CODE_FOR_nothing))
6522 	break;
6523 
6524       interm_types->quick_push (intermediate_type);
6525       (*multi_step_cvt)++;
6526 
6527       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6528 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6529 	return true;
6530 
6531       prev_type = intermediate_type;
6532       prev_mode = intermediate_mode;
6533     }
6534 
6535   interm_types->release ();
6536   return false;
6537 }
6538 
6539 
6540 /* Function supportable_narrowing_operation
6541 
6542    Check whether an operation represented by the code CODE is a
6543    narrowing operation that is supported by the target platform in
6544    vector form (i.e., when operating on arguments of type VECTYPE_IN
6545    and producing a result of type VECTYPE_OUT).
6546 
6547    Narrowing operations we currently support are NOP (CONVERT) and
6548    FIX_TRUNC.  This function checks if these operations are supported by
6549    the target platform directly via vector tree-codes.
6550 
6551    Output:
6552    - CODE1 is the code of a vector operation to be used when
6553    vectorizing the operation, if available.
6554    - MULTI_STEP_CVT determines the number of required intermediate steps in
6555    case of multi-step conversion (like int->short->char - in that case
6556    MULTI_STEP_CVT will be 1).
6557    - INTERM_TYPES contains the intermediate type required to perform the
6558    narrowing operation (short in the above example).   */
6559 
6560 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)6561 supportable_narrowing_operation (enum tree_code code,
6562 				 tree vectype_out, tree vectype_in,
6563 				 enum tree_code *code1, int *multi_step_cvt,
6564                                  vec<tree> *interm_types)
6565 {
6566   enum machine_mode vec_mode;
6567   enum insn_code icode1;
6568   optab optab1, interm_optab;
6569   tree vectype = vectype_in;
6570   tree narrow_vectype = vectype_out;
6571   enum tree_code c1;
6572   tree intermediate_type;
6573   enum machine_mode intermediate_mode, prev_mode;
6574   int i;
6575   bool uns;
6576 
6577   *multi_step_cvt = 0;
6578   switch (code)
6579     {
6580     CASE_CONVERT:
6581       c1 = VEC_PACK_TRUNC_EXPR;
6582       break;
6583 
6584     case FIX_TRUNC_EXPR:
6585       c1 = VEC_PACK_FIX_TRUNC_EXPR;
6586       break;
6587 
6588     case FLOAT_EXPR:
6589       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6590 	 tree code and optabs used for computing the operation.  */
6591       return false;
6592 
6593     default:
6594       gcc_unreachable ();
6595     }
6596 
6597   if (code == FIX_TRUNC_EXPR)
6598     /* The signedness is determined from output operand.  */
6599     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6600   else
6601     optab1 = optab_for_tree_code (c1, vectype, optab_default);
6602 
6603   if (!optab1)
6604     return false;
6605 
6606   vec_mode = TYPE_MODE (vectype);
6607   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6608     return false;
6609 
6610   *code1 = c1;
6611 
6612   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6613     return true;
6614 
6615   /* Check if it's a multi-step conversion that can be done using intermediate
6616      types.  */
6617   prev_mode = vec_mode;
6618   if (code == FIX_TRUNC_EXPR)
6619     uns = TYPE_UNSIGNED (vectype_out);
6620   else
6621     uns = TYPE_UNSIGNED (vectype);
6622 
6623   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6624      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6625      costly than signed.  */
6626   if (code == FIX_TRUNC_EXPR && uns)
6627     {
6628       enum insn_code icode2;
6629 
6630       intermediate_type
6631 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6632       interm_optab
6633 	= optab_for_tree_code (c1, intermediate_type, optab_default);
6634       if (interm_optab != unknown_optab
6635 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6636 	  && insn_data[icode1].operand[0].mode
6637 	     == insn_data[icode2].operand[0].mode)
6638 	{
6639 	  uns = false;
6640 	  optab1 = interm_optab;
6641 	  icode1 = icode2;
6642 	}
6643     }
6644 
6645   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6646      intermediate steps in promotion sequence.  We try
6647      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
6648   interm_types->create (MAX_INTERM_CVT_STEPS);
6649   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6650     {
6651       intermediate_mode = insn_data[icode1].operand[0].mode;
6652       intermediate_type
6653 	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
6654       interm_optab
6655 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6656 			       optab_default);
6657       if (!interm_optab
6658 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6659 	  || insn_data[icode1].operand[0].mode != intermediate_mode
6660 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6661 	      == CODE_FOR_nothing))
6662 	break;
6663 
6664       interm_types->quick_push (intermediate_type);
6665       (*multi_step_cvt)++;
6666 
6667       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6668 	return true;
6669 
6670       prev_mode = intermediate_mode;
6671       optab1 = interm_optab;
6672     }
6673 
6674   interm_types->release ();
6675   return false;
6676 }
6677