1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"		/* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 
48 
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50    FINAL_P is true if we have vectorized the instance or if we have
51    made a final decision not to vectorize the statements in any way.  */
52 
53 static void
vect_free_slp_tree(slp_tree node,bool final_p)54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56   int i;
57   slp_tree child;
58 
59   if (--node->refcnt != 0)
60     return;
61 
62   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63     vect_free_slp_tree (child, final_p);
64 
65   /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66      Some statements might no longer exist, after having been
67      removed by vect_transform_stmt.  Updating the remaining
68      statements would be redundant.  */
69   if (!final_p)
70     {
71       stmt_vec_info stmt_info;
72       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 	{
74 	  gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 	  STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 	}
77     }
78 
79   SLP_TREE_CHILDREN (node).release ();
80   SLP_TREE_SCALAR_STMTS (node).release ();
81   SLP_TREE_SCALAR_OPS (node).release ();
82   SLP_TREE_VEC_STMTS (node).release ();
83   SLP_TREE_LOAD_PERMUTATION (node).release ();
84 
85   free (node);
86 }
87 
88 /* Free the memory allocated for the SLP instance.  FINAL_P is true if we
89    have vectorized the instance or if we have made a final decision not
90    to vectorize the statements in any way.  */
91 
92 void
vect_free_slp_instance(slp_instance instance,bool final_p)93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95   vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96   SLP_INSTANCE_LOADS (instance).release ();
97   free (instance);
98 }
99 
100 
101 /* Create an SLP node for SCALAR_STMTS.  */
102 
103 static slp_tree
vect_create_new_slp_node(vec<stmt_vec_info> scalar_stmts)104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106   slp_tree node;
107   stmt_vec_info stmt_info = scalar_stmts[0];
108   unsigned int nops;
109 
110   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111     nops = gimple_call_num_args (stmt);
112   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113     {
114       nops = gimple_num_ops (stmt) - 1;
115       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 	nops++;
117     }
118   else if (is_a <gphi *> (stmt_info->stmt))
119     nops = 0;
120   else
121     return NULL;
122 
123   node = XNEW (struct _slp_tree);
124   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125   SLP_TREE_SCALAR_OPS (node) = vNULL;
126   SLP_TREE_VEC_STMTS (node).create (0);
127   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128   SLP_TREE_CHILDREN (node).create (nops);
129   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130   SLP_TREE_TWO_OPERATORS (node) = false;
131   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132   node->refcnt = 1;
133   node->max_nunits = 1;
134 
135   unsigned i;
136   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137     STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138 
139   return node;
140 }
141 
142 /* Create an SLP node for OPS.  */
143 
144 static slp_tree
vect_create_new_slp_node(vec<tree> ops)145 vect_create_new_slp_node (vec<tree> ops)
146 {
147   slp_tree node;
148 
149   node = XNEW (struct _slp_tree);
150   SLP_TREE_SCALAR_STMTS (node) = vNULL;
151   SLP_TREE_SCALAR_OPS (node) = ops;
152   SLP_TREE_VEC_STMTS (node).create (0);
153   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154   SLP_TREE_CHILDREN (node) = vNULL;
155   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156   SLP_TREE_TWO_OPERATORS (node) = false;
157   SLP_TREE_DEF_TYPE (node) = vect_external_def;
158   node->refcnt = 1;
159   node->max_nunits = 1;
160 
161   return node;
162 }
163 
164 
165 /* This structure is used in creation of an SLP tree.  Each instance
166    corresponds to the same operand in a group of scalar stmts in an SLP
167    node.  */
168 typedef struct _slp_oprnd_info
169 {
170   /* Def-stmts for the operands.  */
171   vec<stmt_vec_info> def_stmts;
172   /* Operands.  */
173   vec<tree> ops;
174   /* Information about the first statement, its vector def-type, type, the
175      operand itself in case it's constant, and an indication if it's a pattern
176      stmt.  */
177   tree first_op_type;
178   enum vect_def_type first_dt;
179   bool any_pattern;
180 } *slp_oprnd_info;
181 
182 
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184    operand.  */
185 static vec<slp_oprnd_info>
vect_create_oprnd_info(int nops,int group_size)186 vect_create_oprnd_info (int nops, int group_size)
187 {
188   int i;
189   slp_oprnd_info oprnd_info;
190   vec<slp_oprnd_info> oprnds_info;
191 
192   oprnds_info.create (nops);
193   for (i = 0; i < nops; i++)
194     {
195       oprnd_info = XNEW (struct _slp_oprnd_info);
196       oprnd_info->def_stmts.create (group_size);
197       oprnd_info->ops.create (group_size);
198       oprnd_info->first_dt = vect_uninitialized_def;
199       oprnd_info->first_op_type = NULL_TREE;
200       oprnd_info->any_pattern = false;
201       oprnds_info.quick_push (oprnd_info);
202     }
203 
204   return oprnds_info;
205 }
206 
207 
208 /* Free operands info.  */
209 
210 static void
vect_free_oprnd_info(vec<slp_oprnd_info> & oprnds_info)211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213   int i;
214   slp_oprnd_info oprnd_info;
215 
216   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217     {
218       oprnd_info->def_stmts.release ();
219       oprnd_info->ops.release ();
220       XDELETE (oprnd_info);
221     }
222 
223   oprnds_info.release ();
224 }
225 
226 
227 /* Return true if STMTS contains a pattern statement.  */
228 
229 static bool
vect_contains_pattern_stmt_p(vec<stmt_vec_info> stmts)230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231 {
232   stmt_vec_info stmt_info;
233   unsigned int i;
234   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235     if (is_pattern_stmt_p (stmt_info))
236       return true;
237   return false;
238 }
239 
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
242    of the chain.  */
243 
244 int
vect_get_place_in_interleaving_chain(stmt_vec_info stmt_info,stmt_vec_info first_stmt_info)245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 				      stmt_vec_info first_stmt_info)
247 {
248   stmt_vec_info next_stmt_info = first_stmt_info;
249   int result = 0;
250 
251   if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252     return -1;
253 
254   do
255     {
256       if (next_stmt_info == stmt_info)
257 	return result;
258       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259       if (next_stmt_info)
260 	result += DR_GROUP_GAP (next_stmt_info);
261     }
262   while (next_stmt_info);
263 
264   return -1;
265 }
266 
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268    using the method implemented by duplicate_and_interleave.  Return true
269    if so, returning the number of intermediate vectors in *NVECTORS_OUT
270    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271    (if nonnull).  */
272 
273 bool
can_duplicate_and_interleave_p(vec_info * vinfo,unsigned int count,tree elt_type,unsigned int * nvectors_out,tree * vector_type_out,tree * permutes)274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 				tree elt_type, unsigned int *nvectors_out,
276 				tree *vector_type_out,
277 				tree *permutes)
278 {
279   tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280   if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281     return false;
282 
283   machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284   poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285   unsigned int nvectors = 1;
286   for (;;)
287     {
288       scalar_int_mode int_mode;
289       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290       if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
291 	{
292 	  /* Get the natural vector type for this SLP group size.  */
293 	  tree int_type = build_nonstandard_integer_type
294 	    (GET_MODE_BITSIZE (int_mode), 1);
295 	  tree vector_type
296 	    = get_vectype_for_scalar_type (vinfo, int_type, count);
297 	  if (vector_type
298 	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 			   GET_MODE_SIZE (base_vector_mode)))
301 	    {
302 	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 		 together into elements of type INT_TYPE and using the result
304 		 to build NVECTORS vectors.  */
305 	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 	      vec_perm_builder sel1 (nelts, 2, 3);
307 	      vec_perm_builder sel2 (nelts, 2, 3);
308 	      poly_int64 half_nelts = exact_div (nelts, 2);
309 	      for (unsigned int i = 0; i < 3; ++i)
310 		{
311 		  sel1.quick_push (i);
312 		  sel1.quick_push (i + nelts);
313 		  sel2.quick_push (half_nelts + i);
314 		  sel2.quick_push (half_nelts + i + nelts);
315 		}
316 	      vec_perm_indices indices1 (sel1, 2, nelts);
317 	      vec_perm_indices indices2 (sel2, 2, nelts);
318 	      if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 		  && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 		{
321 		  if (nvectors_out)
322 		    *nvectors_out = nvectors;
323 		  if (vector_type_out)
324 		    *vector_type_out = vector_type;
325 		  if (permutes)
326 		    {
327 		      permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 								indices1);
329 		      permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 								indices2);
331 		    }
332 		  return true;
333 		}
334 	    }
335 	}
336       if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 	return false;
338       nvectors *= 2;
339     }
340 }
341 
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343    they are of a valid type and that they match the defs of the first stmt of
344    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
345    by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
346    indicates swap is required for cond_expr stmts.  Specifically, *SWAP
347    is 1 if STMT is cond and operands of comparison need to be swapped;
348    *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349    If there is any operand swap in this function, *SWAP is set to non-zero
350    value.
351    If there was a fatal error return -1; if the error could be corrected by
352    swapping operands of father node of this one, return 1; if everything is
353    ok return 0.  */
354 static int
vect_get_and_check_slp_defs(vec_info * vinfo,unsigned char * swap,vec<stmt_vec_info> stmts,unsigned stmt_num,vec<slp_oprnd_info> * oprnds_info)355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 			     vec<stmt_vec_info> stmts, unsigned stmt_num,
357 			     vec<slp_oprnd_info> *oprnds_info)
358 {
359   stmt_vec_info stmt_info = stmts[stmt_num];
360   tree oprnd;
361   unsigned int i, number_of_oprnds;
362   enum vect_def_type dt = vect_uninitialized_def;
363   slp_oprnd_info oprnd_info;
364   int first_op_idx = 1;
365   unsigned int commutative_op = -1U;
366   bool first_op_cond = false;
367   bool first = stmt_num == 0;
368 
369   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
370     {
371       number_of_oprnds = gimple_call_num_args (stmt);
372       first_op_idx = 3;
373       if (gimple_call_internal_p (stmt))
374 	{
375 	  internal_fn ifn = gimple_call_internal_fn (stmt);
376 	  commutative_op = first_commutative_argument (ifn);
377 
378 	  /* Masked load, only look at mask.  */
379 	  if (ifn == IFN_MASK_LOAD)
380 	    {
381 	      number_of_oprnds = 1;
382 	      /* Mask operand index.  */
383 	      first_op_idx = 5;
384 	    }
385 	}
386     }
387   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
388     {
389       enum tree_code code = gimple_assign_rhs_code (stmt);
390       number_of_oprnds = gimple_num_ops (stmt) - 1;
391       /* Swap can only be done for cond_expr if asked to, otherwise we
392 	 could result in different comparison code to the first stmt.  */
393       if (code == COND_EXPR
394 	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
395 	{
396 	  first_op_cond = true;
397 	  number_of_oprnds++;
398 	}
399       else
400 	commutative_op = commutative_tree_code (code) ? 0U : -1U;
401     }
402   else
403     return -1;
404 
405   bool swapped = (*swap != 0);
406   gcc_assert (!swapped || first_op_cond);
407   for (i = 0; i < number_of_oprnds; i++)
408     {
409 again:
410       if (first_op_cond)
411 	{
412 	  /* Map indicating how operands of cond_expr should be swapped.  */
413 	  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 	  int *map = maps[*swap];
415 
416 	  if (i < 2)
417 	    oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 					     first_op_idx), map[i]);
419 	  else
420 	    oprnd = gimple_op (stmt_info->stmt, map[i]);
421 	}
422       else
423 	oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 	oprnd = TREE_OPERAND (oprnd, 0);
426 
427       oprnd_info = (*oprnds_info)[i];
428 
429       stmt_vec_info def_stmt_info;
430       if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
431 	{
432 	  if (dump_enabled_p ())
433 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 			     "Build SLP failed: can't analyze def for %T\n",
435 			     oprnd);
436 
437 	  return -1;
438 	}
439 
440       if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 	oprnd_info->any_pattern = true;
442 
443       if (first)
444 	{
445 	  /* For the swapping logic below force vect_reduction_def
446 	     for the reduction op in a SLP reduction group.  */
447 	  if (!STMT_VINFO_DATA_REF (stmt_info)
448 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 	      && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 	      && def_stmt_info)
451 	    dt = vect_reduction_def;
452 	  oprnd_info->first_dt = dt;
453 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
454 	}
455       else
456 	{
457 	  /* Not first stmt of the group, check that the def-stmt/s match
458 	     the def-stmt/s of the first stmt.  Allow different definition
459 	     types for reduction chains: the first stmt must be a
460 	     vect_reduction_def (a phi node), and the rest
461 	     end in the reduction chain.  */
462 	  tree type = TREE_TYPE (oprnd);
463 	  if ((oprnd_info->first_dt != dt
464 	       && !(oprnd_info->first_dt == vect_reduction_def
465 		    && !STMT_VINFO_DATA_REF (stmt_info)
466 		    && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 		    && def_stmt_info
468 		    && !STMT_VINFO_DATA_REF (def_stmt_info)
469 		    && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 			== REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 	       && !((oprnd_info->first_dt == vect_external_def
472 		     || oprnd_info->first_dt == vect_constant_def)
473 		    && (dt == vect_external_def
474 			|| dt == vect_constant_def)))
475 	      || !types_compatible_p (oprnd_info->first_op_type, type)
476 	      || (!STMT_VINFO_DATA_REF (stmt_info)
477 		  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 		  && ((!def_stmt_info
479 		       || STMT_VINFO_DATA_REF (def_stmt_info)
480 		       || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 			   != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 		      != (oprnd_info->first_dt != vect_reduction_def))))
483 	    {
484 	      /* Try swapping operands if we got a mismatch.  */
485 	      if (i == commutative_op && !swapped)
486 		{
487 		  if (dump_enabled_p ())
488 		    dump_printf_loc (MSG_NOTE, vect_location,
489 				     "trying swapped operands\n");
490 		  swapped = true;
491 		  goto again;
492 		}
493 
494 	      if (dump_enabled_p ())
495 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 				 "Build SLP failed: different types\n");
497 
498 	      return 1;
499 	    }
500 	  if ((dt == vect_constant_def
501 	       || dt == vect_external_def)
502 	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 	      && (TREE_CODE (type) == BOOLEAN_TYPE
504 		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 						      type)))
506 	    {
507 	      if (dump_enabled_p ())
508 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 				 "Build SLP failed: invalid type of def "
510 				 "for variable-length SLP %T\n", oprnd);
511 	      return -1;
512 	    }
513 	}
514 
515       /* Check the types of the definitions.  */
516       switch (dt)
517 	{
518 	case vect_external_def:
519 	  /* Make sure to demote the overall operand to external.  */
520 	  oprnd_info->first_dt = vect_external_def;
521 	  /* Fallthru.  */
522 	case vect_constant_def:
523 	  oprnd_info->def_stmts.quick_push (NULL);
524 	  oprnd_info->ops.quick_push (oprnd);
525 	  break;
526 
527 	case vect_internal_def:
528 	case vect_reduction_def:
529 	  if (oprnd_info->first_dt == vect_reduction_def
530 	      && !STMT_VINFO_DATA_REF (stmt_info)
531 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 	      && !STMT_VINFO_DATA_REF (def_stmt_info)
533 	      && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 		  == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 	    {
536 	      /* For a SLP reduction chain we want to duplicate the
537 	         reduction to each of the chain members.  That gets
538 		 us a sane SLP graph (still the stmts are not 100%
539 		 correct wrt the initial values).  */
540 	      gcc_assert (!first);
541 	      oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 	      oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 	      break;
544 	    }
545 	  /* Fallthru.  */
546 	case vect_induction_def:
547 	  oprnd_info->def_stmts.quick_push (def_stmt_info);
548 	  oprnd_info->ops.quick_push (oprnd);
549 	  break;
550 
551 	default:
552 	  /* FORNOW: Not supported.  */
553 	  if (dump_enabled_p ())
554 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 			     "Build SLP failed: illegal type of def %T\n",
556 			     oprnd);
557 
558 	  return -1;
559 	}
560     }
561 
562   /* Swap operands.  */
563   if (swapped)
564     {
565       if (dump_enabled_p ())
566 	dump_printf_loc (MSG_NOTE, vect_location,
567 			 "swapped operands to match def types in %G",
568 			 stmt_info->stmt);
569     }
570 
571   *swap = swapped;
572   return 0;
573 }
574 
575 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
576    Return true if we can, meaning that this choice doesn't conflict with
577    existing SLP nodes that use STMT_INFO.  */
578 
579 static bool
vect_update_shared_vectype(stmt_vec_info stmt_info,tree vectype)580 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
581 {
582   tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
583   if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
584     return true;
585 
586   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
587       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
588     {
589       /* We maintain the invariant that if any statement in the group is
590 	 used, all other members of the group have the same vector type.  */
591       stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
592       stmt_vec_info member_info = first_info;
593       for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
594 	if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
595 	    || is_pattern_stmt_p (member_info))
596 	  break;
597 
598       if (!member_info)
599 	{
600 	  for (member_info = first_info; member_info;
601 	       member_info = DR_GROUP_NEXT_ELEMENT (member_info))
602 	    STMT_VINFO_VECTYPE (member_info) = vectype;
603 	  return true;
604 	}
605     }
606   else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
607 	   && !is_pattern_stmt_p (stmt_info))
608     {
609       STMT_VINFO_VECTYPE (stmt_info) = vectype;
610       return true;
611     }
612 
613   if (dump_enabled_p ())
614     {
615       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 		       "Build SLP failed: incompatible vector"
617 		       " types for: %G", stmt_info->stmt);
618       dump_printf_loc (MSG_NOTE, vect_location,
619 		       "    old vector type: %T\n", old_vectype);
620       dump_printf_loc (MSG_NOTE, vect_location,
621 		       "    new vector type: %T\n", vectype);
622     }
623   return false;
624 }
625 
626 /* Try to infer and assign a vector type to all the statements in STMTS.
627    Used only for BB vectorization.  */
628 
629 static bool
vect_update_all_shared_vectypes(vec<stmt_vec_info> stmts)630 vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
631 {
632   tree vectype, nunits_vectype;
633   if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
634 				       &nunits_vectype, stmts.length ()))
635     return false;
636 
637   stmt_vec_info stmt_info;
638   unsigned int i;
639   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
640     if (!vect_update_shared_vectype (stmt_info, vectype))
641       return false;
642 
643   return true;
644 }
645 
646 /* Return true if call statements CALL1 and CALL2 are similar enough
647    to be combined into the same SLP group.  */
648 
649 static bool
compatible_calls_p(gcall * call1,gcall * call2)650 compatible_calls_p (gcall *call1, gcall *call2)
651 {
652   unsigned int nargs = gimple_call_num_args (call1);
653   if (nargs != gimple_call_num_args (call2))
654     return false;
655 
656   if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
657     return false;
658 
659   if (gimple_call_internal_p (call1))
660     {
661       if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
662 			       TREE_TYPE (gimple_call_lhs (call2))))
663 	return false;
664       for (unsigned int i = 0; i < nargs; ++i)
665 	if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
666 				 TREE_TYPE (gimple_call_arg (call2, i))))
667 	  return false;
668     }
669   else
670     {
671       if (!operand_equal_p (gimple_call_fn (call1),
672 			    gimple_call_fn (call2), 0))
673 	return false;
674 
675       if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
676 	return false;
677     }
678   return true;
679 }
680 
681 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
682    caller's attempt to find the vector type in STMT_INFO with the narrowest
683    element type.  Return true if VECTYPE is nonnull and if it is valid
684    for STMT_INFO.  When returning true, update MAX_NUNITS to reflect the
685    number of units in VECTYPE.  GROUP_SIZE and MAX_NUNITS are as for
686    vect_build_slp_tree.  */
687 
688 static bool
vect_record_max_nunits(stmt_vec_info stmt_info,unsigned int group_size,tree vectype,poly_uint64 * max_nunits)689 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
690 			tree vectype, poly_uint64 *max_nunits)
691 {
692   if (!vectype)
693     {
694       if (dump_enabled_p ())
695 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 			 "Build SLP failed: unsupported data-type in %G\n",
697 			 stmt_info->stmt);
698       /* Fatal mismatch.  */
699       return false;
700     }
701 
702   /* If populating the vector type requires unrolling then fail
703      before adjusting *max_nunits for basic-block vectorization.  */
704   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
705   unsigned HOST_WIDE_INT const_nunits;
706   if (STMT_VINFO_BB_VINFO (stmt_info)
707       && (!nunits.is_constant (&const_nunits)
708 	  || const_nunits > group_size))
709     {
710       if (dump_enabled_p ())
711 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 			 "Build SLP failed: unrolling required "
713 			 "in basic block SLP\n");
714       /* Fatal mismatch.  */
715       return false;
716     }
717 
718   /* In case of multiple types we need to detect the smallest type.  */
719   vect_update_max_nunits (max_nunits, vectype);
720   return true;
721 }
722 
723 /* STMTS is a group of GROUP_SIZE SLP statements in which some
724    statements do the same operation as the first statement and in which
725    the others do ALT_STMT_CODE.  Return true if we can take one vector
726    of the first operation and one vector of the second and permute them
727    to get the required result.  VECTYPE is the type of the vector that
728    would be permuted.  */
729 
730 static bool
vect_two_operations_perm_ok_p(vec<stmt_vec_info> stmts,unsigned int group_size,tree vectype,tree_code alt_stmt_code)731 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
732 			       unsigned int group_size, tree vectype,
733 			       tree_code alt_stmt_code)
734 {
735   unsigned HOST_WIDE_INT count;
736   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
737     return false;
738 
739   vec_perm_builder sel (count, count, 1);
740   for (unsigned int i = 0; i < count; ++i)
741     {
742       unsigned int elt = i;
743       gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
744       if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
745 	elt += count;
746       sel.quick_push (elt);
747     }
748   vec_perm_indices indices (sel, 2, count);
749   return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
750 }
751 
752 /* Verify if the scalar stmts STMTS are isomorphic, require data
753    permutation or are of unsupported types of operation.  Return
754    true if they are, otherwise return false and indicate in *MATCHES
755    which stmts are not isomorphic to the first one.  If MATCHES[0]
756    is false then this indicates the comparison could not be
757    carried out or the stmts will never be vectorized by SLP.
758 
759    Note COND_EXPR is possibly isomorphic to another one after swapping its
760    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
761    the first stmt by swapping the two operands of comparison; set SWAP[i]
762    to 2 if stmt I is isormorphic to the first stmt by inverting the code
763    of comparison.  Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
764    to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1.  */
765 
766 static bool
vect_build_slp_tree_1(unsigned char * swap,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,bool * two_operators)767 vect_build_slp_tree_1 (unsigned char *swap,
768 		       vec<stmt_vec_info> stmts, unsigned int group_size,
769 		       poly_uint64 *max_nunits, bool *matches,
770 		       bool *two_operators)
771 {
772   unsigned int i;
773   stmt_vec_info first_stmt_info = stmts[0];
774   enum tree_code first_stmt_code = ERROR_MARK;
775   enum tree_code alt_stmt_code = ERROR_MARK;
776   enum tree_code rhs_code = ERROR_MARK;
777   enum tree_code first_cond_code = ERROR_MARK;
778   tree lhs;
779   bool need_same_oprnds = false;
780   tree vectype = NULL_TREE, first_op1 = NULL_TREE;
781   optab optab;
782   int icode;
783   machine_mode optab_op2_mode;
784   machine_mode vec_mode;
785   stmt_vec_info first_load = NULL, prev_first_load = NULL;
786   bool load_p = false;
787 
788   /* For every stmt in NODE find its def stmt/s.  */
789   stmt_vec_info stmt_info;
790   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
791     {
792       vec_info *vinfo = stmt_info->vinfo;
793       gimple *stmt = stmt_info->stmt;
794       swap[i] = 0;
795       matches[i] = false;
796 
797       if (dump_enabled_p ())
798 	dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
799 
800       /* Fail to vectorize statements marked as unvectorizable.  */
801       if (!STMT_VINFO_VECTORIZABLE (stmt_info))
802         {
803           if (dump_enabled_p ())
804 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805 			     "Build SLP failed: unvectorizable statement %G",
806 			     stmt);
807 	  /* Fatal mismatch.  */
808 	  matches[0] = false;
809           return false;
810         }
811 
812       lhs = gimple_get_lhs (stmt);
813       if (lhs == NULL_TREE)
814 	{
815 	  if (dump_enabled_p ())
816 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 			     "Build SLP failed: not GIMPLE_ASSIGN nor "
818 			     "GIMPLE_CALL %G", stmt);
819 	  /* Fatal mismatch.  */
820 	  matches[0] = false;
821 	  return false;
822 	}
823 
824       tree nunits_vectype;
825       if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
826 					   &nunits_vectype, group_size)
827 	  || (nunits_vectype
828 	      && !vect_record_max_nunits (stmt_info, group_size,
829 					  nunits_vectype, max_nunits)))
830 	{
831 	  /* Fatal mismatch.  */
832 	  matches[0] = false;
833 	  return false;
834 	}
835 
836       gcc_assert (vectype);
837 
838       if (is_a <bb_vec_info> (vinfo)
839 	  && !vect_update_shared_vectype (stmt_info, vectype))
840 	continue;
841 
842       gcall *call_stmt = dyn_cast <gcall *> (stmt);
843       if (call_stmt)
844 	{
845 	  rhs_code = CALL_EXPR;
846 
847 	  if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
848 	    load_p = true;
849 	  else if ((gimple_call_internal_p (call_stmt)
850 		    && (!vectorizable_internal_fn_p
851 			(gimple_call_internal_fn (call_stmt))))
852 		   || gimple_call_tail_p (call_stmt)
853 		   || gimple_call_noreturn_p (call_stmt)
854 		   || !gimple_call_nothrow_p (call_stmt)
855 		   || gimple_call_chain (call_stmt))
856 	    {
857 	      if (dump_enabled_p ())
858 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
859 				 "Build SLP failed: unsupported call type %G",
860 				 call_stmt);
861 	      /* Fatal mismatch.  */
862 	      matches[0] = false;
863 	      return false;
864 	    }
865 	}
866       else
867 	{
868 	  rhs_code = gimple_assign_rhs_code (stmt);
869 	  load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
870 	}
871 
872       /* Check the operation.  */
873       if (i == 0)
874 	{
875 	  first_stmt_code = rhs_code;
876 
877 	  /* Shift arguments should be equal in all the packed stmts for a
878 	     vector shift with scalar shift operand.  */
879 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
880 	      || rhs_code == LROTATE_EXPR
881 	      || rhs_code == RROTATE_EXPR)
882 	    {
883 	      vec_mode = TYPE_MODE (vectype);
884 
885 	      /* First see if we have a vector/vector shift.  */
886 	      optab = optab_for_tree_code (rhs_code, vectype,
887 					   optab_vector);
888 
889 	      if (!optab
890 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
891 		{
892 		  /* No vector/vector shift, try for a vector/scalar shift.  */
893 		  optab = optab_for_tree_code (rhs_code, vectype,
894 					       optab_scalar);
895 
896 		  if (!optab)
897 		    {
898 		      if (dump_enabled_p ())
899 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900 					 "Build SLP failed: no optab.\n");
901 		      /* Fatal mismatch.  */
902 		      matches[0] = false;
903 		      return false;
904 		    }
905 		  icode = (int) optab_handler (optab, vec_mode);
906 		  if (icode == CODE_FOR_nothing)
907 		    {
908 		      if (dump_enabled_p ())
909 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 					 "Build SLP failed: "
911 					 "op not supported by target.\n");
912 		      /* Fatal mismatch.  */
913 		      matches[0] = false;
914 		      return false;
915 		    }
916 		  optab_op2_mode = insn_data[icode].operand[2].mode;
917 		  if (!VECTOR_MODE_P (optab_op2_mode))
918 		    {
919 		      need_same_oprnds = true;
920 		      first_op1 = gimple_assign_rhs2 (stmt);
921 		    }
922 		}
923 	    }
924 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
925             {
926               need_same_oprnds = true;
927               first_op1 = gimple_assign_rhs2 (stmt);
928             }
929 	  else if (call_stmt
930 		   && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
931 	    {
932 	      need_same_oprnds = true;
933 	      first_op1 = gimple_call_arg (call_stmt, 1);
934 	    }
935 	}
936       else
937 	{
938 	  if (first_stmt_code != rhs_code
939 	      && alt_stmt_code == ERROR_MARK)
940 	    alt_stmt_code = rhs_code;
941 	  if (first_stmt_code != rhs_code
942 	      && (first_stmt_code != IMAGPART_EXPR
943 		  || rhs_code != REALPART_EXPR)
944 	      && (first_stmt_code != REALPART_EXPR
945 		  || rhs_code != IMAGPART_EXPR)
946 	      /* Handle mismatches in plus/minus by computing both
947 		 and merging the results.  */
948 	      && !((first_stmt_code == PLUS_EXPR
949 		    || first_stmt_code == MINUS_EXPR)
950 		   && (alt_stmt_code == PLUS_EXPR
951 		       || alt_stmt_code == MINUS_EXPR)
952 		   && rhs_code == alt_stmt_code)
953 	      && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
954                    && (first_stmt_code == ARRAY_REF
955                        || first_stmt_code == BIT_FIELD_REF
956                        || first_stmt_code == INDIRECT_REF
957                        || first_stmt_code == COMPONENT_REF
958                        || first_stmt_code == MEM_REF)))
959 	    {
960 	      if (dump_enabled_p ())
961 		{
962 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
963 				   "Build SLP failed: different operation "
964 				   "in stmt %G", stmt);
965 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
966 				   "original stmt %G", first_stmt_info->stmt);
967 		}
968 	      /* Mismatch.  */
969 	      continue;
970 	    }
971 
972 	  if (need_same_oprnds)
973 	    {
974 	      tree other_op1 = (call_stmt
975 				? gimple_call_arg (call_stmt, 1)
976 				: gimple_assign_rhs2 (stmt));
977 	      if (!operand_equal_p (first_op1, other_op1, 0))
978 		{
979 		  if (dump_enabled_p ())
980 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
981 				     "Build SLP failed: different shift "
982 				     "arguments in %G", stmt);
983 		  /* Mismatch.  */
984 		  continue;
985 		}
986 	    }
987 
988 	  if (!load_p && rhs_code == CALL_EXPR)
989 	    {
990 	      if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
991 				       as_a <gcall *> (stmt)))
992 		{
993 		  if (dump_enabled_p ())
994 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 				     "Build SLP failed: different calls in %G",
996 				     stmt);
997 		  /* Mismatch.  */
998 		  continue;
999 		}
1000 	    }
1001 	}
1002 
1003       /* Grouped store or load.  */
1004       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1005 	{
1006 	  if (REFERENCE_CLASS_P (lhs))
1007 	    {
1008 	      /* Store.  */
1009 	      ;
1010 	    }
1011 	  else
1012 	    {
1013 	      /* Load.  */
1014 	      first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1015               if (prev_first_load)
1016                 {
1017                   /* Check that there are no loads from different interleaving
1018                      chains in the same node.  */
1019                   if (prev_first_load != first_load)
1020                     {
1021                       if (dump_enabled_p ())
1022 			dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1023 					 vect_location,
1024 					 "Build SLP failed: different "
1025 					 "interleaving chains in one node %G",
1026 					 stmt);
1027 		      /* Mismatch.  */
1028 		      continue;
1029                     }
1030                 }
1031               else
1032                 prev_first_load = first_load;
1033            }
1034         } /* Grouped access.  */
1035       else
1036 	{
1037 	  if (load_p)
1038 	    {
1039 	      /* Not grouped load.  */
1040 	      if (dump_enabled_p ())
1041 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 				 "Build SLP failed: not grouped load %G", stmt);
1043 
1044 	      /* FORNOW: Not grouped loads are not supported.  */
1045 	      /* Fatal mismatch.  */
1046 	      matches[0] = false;
1047 	      return false;
1048 	    }
1049 
1050 	  /* Not memory operation.  */
1051 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1052 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
1053 	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
1054 	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1055 	      && rhs_code != VIEW_CONVERT_EXPR
1056 	      && rhs_code != CALL_EXPR)
1057 	    {
1058 	      if (dump_enabled_p ())
1059 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 				 "Build SLP failed: operation unsupported %G",
1061 				 stmt);
1062 	      /* Fatal mismatch.  */
1063 	      matches[0] = false;
1064 	      return false;
1065 	    }
1066 
1067 	  if (rhs_code == COND_EXPR)
1068 	    {
1069 	      tree cond_expr = gimple_assign_rhs1 (stmt);
1070 	      enum tree_code cond_code = TREE_CODE (cond_expr);
1071 	      enum tree_code swap_code = ERROR_MARK;
1072 	      enum tree_code invert_code = ERROR_MARK;
1073 
1074 	      if (i == 0)
1075 		first_cond_code = TREE_CODE (cond_expr);
1076 	      else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1077 		{
1078 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1079 		  swap_code = swap_tree_comparison (cond_code);
1080 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
1081 		}
1082 
1083 	      if (first_cond_code == cond_code)
1084 		;
1085 	      /* Isomorphic can be achieved by swapping.  */
1086 	      else if (first_cond_code == swap_code)
1087 		swap[i] = 1;
1088 	      /* Isomorphic can be achieved by inverting.  */
1089 	      else if (first_cond_code == invert_code)
1090 		swap[i] = 2;
1091 	      else
1092 		{
1093 		  if (dump_enabled_p ())
1094 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1095 				     "Build SLP failed: different"
1096 				     " operation %G", stmt);
1097 		  /* Mismatch.  */
1098 		  continue;
1099 		}
1100 	    }
1101 	}
1102 
1103       matches[i] = true;
1104     }
1105 
1106   for (i = 0; i < group_size; ++i)
1107     if (!matches[i])
1108       return false;
1109 
1110   /* If we allowed a two-operation SLP node verify the target can cope
1111      with the permute we are going to use.  */
1112   if (alt_stmt_code != ERROR_MARK
1113       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1114     {
1115       if (!vect_two_operations_perm_ok_p (stmts, group_size,
1116 					  vectype, alt_stmt_code))
1117 	{
1118 	  for (i = 0; i < group_size; ++i)
1119 	    if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1120 	      {
1121 		matches[i] = false;
1122 		if (dump_enabled_p ())
1123 		  {
1124 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125 				     "Build SLP failed: different operation "
1126 				     "in stmt %G", stmts[i]->stmt);
1127 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1128 				     "original stmt %G", first_stmt_info->stmt);
1129 		  }
1130 	      }
1131 	  return false;
1132 	}
1133       *two_operators = true;
1134     }
1135 
1136   return true;
1137 }
1138 
1139 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1140    Note we never remove apart from at destruction time so we do not
1141    need a special value for deleted that differs from empty.  */
1142 struct bst_traits
1143 {
1144   typedef vec <stmt_vec_info> value_type;
1145   typedef vec <stmt_vec_info> compare_type;
1146   static inline hashval_t hash (value_type);
1147   static inline bool equal (value_type existing, value_type candidate);
is_emptybst_traits1148   static inline bool is_empty (value_type x) { return !x.exists (); }
is_deletedbst_traits1149   static inline bool is_deleted (value_type x) { return !x.exists (); }
1150   static const bool empty_zero_p = true;
mark_emptybst_traits1151   static inline void mark_empty (value_type &x) { x.release (); }
mark_deletedbst_traits1152   static inline void mark_deleted (value_type &x) { x.release (); }
removebst_traits1153   static inline void remove (value_type &x) { x.release (); }
1154 };
1155 inline hashval_t
hash(value_type x)1156 bst_traits::hash (value_type x)
1157 {
1158   inchash::hash h;
1159   for (unsigned i = 0; i < x.length (); ++i)
1160     h.add_int (gimple_uid (x[i]->stmt));
1161   return h.end ();
1162 }
1163 inline bool
equal(value_type existing,value_type candidate)1164 bst_traits::equal (value_type existing, value_type candidate)
1165 {
1166   if (existing.length () != candidate.length ())
1167     return false;
1168   for (unsigned i = 0; i < existing.length (); ++i)
1169     if (existing[i] != candidate[i])
1170       return false;
1171   return true;
1172 }
1173 
1174 typedef hash_map <vec <gimple *>, slp_tree,
1175 		  simple_hashmap_traits <bst_traits, slp_tree> >
1176   scalar_stmts_to_slp_tree_map_t;
1177 
1178 static slp_tree
1179 vect_build_slp_tree_2 (vec_info *vinfo,
1180 		       vec<stmt_vec_info> stmts, unsigned int group_size,
1181 		       poly_uint64 *max_nunits,
1182 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
1183 		       scalar_stmts_to_slp_tree_map_t *bst_map);
1184 
1185 static slp_tree
vect_build_slp_tree(vec_info * vinfo,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,unsigned * npermutes,unsigned * tree_size,scalar_stmts_to_slp_tree_map_t * bst_map)1186 vect_build_slp_tree (vec_info *vinfo,
1187 		     vec<stmt_vec_info> stmts, unsigned int group_size,
1188 		     poly_uint64 *max_nunits,
1189 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
1190 		     scalar_stmts_to_slp_tree_map_t *bst_map)
1191 {
1192   if (slp_tree *leader = bst_map->get (stmts))
1193     {
1194       if (dump_enabled_p ())
1195 	dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1196 			 *leader ? "" : "failed ", *leader);
1197       if (*leader)
1198 	{
1199 	  (*leader)->refcnt++;
1200 	  vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1201 	}
1202       return *leader;
1203     }
1204   poly_uint64 this_max_nunits = 1;
1205   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1206 					&this_max_nunits,
1207 					matches, npermutes, tree_size, bst_map);
1208   if (res)
1209     {
1210       res->max_nunits = this_max_nunits;
1211       vect_update_max_nunits (max_nunits, this_max_nunits);
1212       /* Keep a reference for the bst_map use.  */
1213       res->refcnt++;
1214     }
1215   bst_map->put (stmts.copy (), res);
1216   return res;
1217 }
1218 
1219 /* Recursively build an SLP tree starting from NODE.
1220    Fail (and return a value not equal to zero) if def-stmts are not
1221    isomorphic, require data permutation or are of unsupported types of
1222    operation.  Otherwise, return 0.
1223    The value returned is the depth in the SLP tree where a mismatch
1224    was found.  */
1225 
1226 static slp_tree
vect_build_slp_tree_2(vec_info * vinfo,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,unsigned * npermutes,unsigned * tree_size,scalar_stmts_to_slp_tree_map_t * bst_map)1227 vect_build_slp_tree_2 (vec_info *vinfo,
1228 		       vec<stmt_vec_info> stmts, unsigned int group_size,
1229 		       poly_uint64 *max_nunits,
1230 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
1231 		       scalar_stmts_to_slp_tree_map_t *bst_map)
1232 {
1233   unsigned nops, i, this_tree_size = 0;
1234   poly_uint64 this_max_nunits = *max_nunits;
1235   slp_tree node;
1236 
1237   matches[0] = false;
1238 
1239   stmt_vec_info stmt_info = stmts[0];
1240   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1241     nops = gimple_call_num_args (stmt);
1242   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1243     {
1244       nops = gimple_num_ops (stmt) - 1;
1245       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1246 	nops++;
1247     }
1248   else if (is_a <gphi *> (stmt_info->stmt))
1249     nops = 0;
1250   else
1251     return NULL;
1252 
1253   /* If the SLP node is a PHI (induction or reduction), terminate
1254      the recursion.  */
1255   if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1256     {
1257       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1258       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1259       if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1260 	return NULL;
1261 
1262       vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1263       /* Induction from different IVs is not supported.  */
1264       if (def_type == vect_induction_def)
1265 	{
1266 	  stmt_vec_info other_info;
1267 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
1268 	    if (stmt_info != other_info)
1269 	      return NULL;
1270 	}
1271       else if (def_type == vect_reduction_def
1272 	       || def_type == vect_double_reduction_def
1273 	       || def_type == vect_nested_cycle)
1274 	{
1275 	  /* Else def types have to match.  */
1276 	  stmt_vec_info other_info;
1277 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
1278 	    if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1279 	      return NULL;
1280 	}
1281       else
1282 	return NULL;
1283       (*tree_size)++;
1284       node = vect_create_new_slp_node (stmts);
1285       return node;
1286     }
1287 
1288 
1289   bool two_operators = false;
1290   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1291   if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1292 			      &this_max_nunits, matches, &two_operators))
1293     return NULL;
1294 
1295   /* If the SLP node is a load, terminate the recursion unless masked.  */
1296   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1297       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1298     {
1299       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1300 	{
1301 	  /* Masked load.  */
1302 	  gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1303 	  nops = 1;
1304 	}
1305       else
1306 	{
1307 	  *max_nunits = this_max_nunits;
1308 	  (*tree_size)++;
1309 	  node = vect_create_new_slp_node (stmts);
1310 	  /* And compute the load permutation.  Whether it is actually
1311 	     a permutation depends on the unrolling factor which is
1312 	     decided later.  */
1313 	  vec<unsigned> load_permutation;
1314 	  int j;
1315 	  stmt_vec_info load_info;
1316 	  load_permutation.create (group_size);
1317 	  stmt_vec_info first_stmt_info
1318 	    = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1319 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1320 	    {
1321 	      int load_place = vect_get_place_in_interleaving_chain
1322 		  (load_info, first_stmt_info);
1323 	      gcc_assert (load_place != -1);
1324 	      load_permutation.safe_push (load_place);
1325 	    }
1326 	  SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1327 	  return node;
1328 	}
1329     }
1330 
1331   /* Get at the operands, verifying they are compatible.  */
1332   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1333   slp_oprnd_info oprnd_info;
1334   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1335     {
1336       int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1337 					     stmts, i, &oprnds_info);
1338       if (res != 0)
1339 	matches[(res == -1) ? 0 : i] = false;
1340       if (!matches[0])
1341 	break;
1342     }
1343   for (i = 0; i < group_size; ++i)
1344     if (!matches[i])
1345       {
1346 	vect_free_oprnd_info (oprnds_info);
1347 	return NULL;
1348       }
1349 
1350   auto_vec<slp_tree, 4> children;
1351 
1352   stmt_info = stmts[0];
1353 
1354   /* Create SLP_TREE nodes for the definition node/s.  */
1355   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1356     {
1357       slp_tree child;
1358       unsigned old_tree_size = this_tree_size;
1359       unsigned int j;
1360 
1361       if (oprnd_info->first_dt == vect_uninitialized_def)
1362 	{
1363 	  /* COND_EXPR have one too many eventually if the condition
1364 	     is a SSA name.  */
1365 	  gcc_assert (i == 3 && nops == 4);
1366 	  continue;
1367 	}
1368 
1369       if (oprnd_info->first_dt != vect_internal_def
1370 	  && oprnd_info->first_dt != vect_reduction_def
1371 	  && oprnd_info->first_dt != vect_induction_def)
1372 	{
1373 	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1374 	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1375 	  oprnd_info->ops = vNULL;
1376 	  children.safe_push (invnode);
1377 	  continue;
1378 	}
1379 
1380       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1381 					group_size, &this_max_nunits,
1382 					matches, npermutes,
1383 					&this_tree_size, bst_map)) != NULL)
1384 	{
1385 	  /* If we have all children of a non-unary child built up from
1386 	     scalars then just throw that away and build it up this node
1387 	     from scalars.  */
1388 	  if (is_a <bb_vec_info> (vinfo)
1389 	      && SLP_TREE_CHILDREN (child).length () > 1
1390 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
1391 		 do extra work to cancel the pattern so the uses see the
1392 		 scalar version.  */
1393 	      && !oprnd_info->any_pattern)
1394 	    {
1395 	      slp_tree grandchild;
1396 
1397 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1398 		if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1399 		  break;
1400 	      if (!grandchild
1401 		  && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1402 		{
1403 		  /* Roll back.  */
1404 		  this_tree_size = old_tree_size;
1405 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1406 		    vect_free_slp_tree (grandchild, false);
1407 		  SLP_TREE_CHILDREN (child).truncate (0);
1408 
1409 		  if (dump_enabled_p ())
1410 		    dump_printf_loc (MSG_NOTE, vect_location,
1411 				     "Building parent vector operands from "
1412 				     "scalars instead\n");
1413 		  oprnd_info->def_stmts = vNULL;
1414 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1415 		  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1416 		  oprnd_info->ops = vNULL;
1417 		  ++this_tree_size;
1418 		  children.safe_push (child);
1419 		  continue;
1420 		}
1421 	    }
1422 
1423 	  oprnd_info->def_stmts = vNULL;
1424 	  children.safe_push (child);
1425 	  continue;
1426 	}
1427 
1428       /* If the SLP build failed fatally and we analyze a basic-block
1429          simply treat nodes we fail to build as externally defined
1430 	 (and thus build vectors from the scalar defs).
1431 	 The cost model will reject outright expensive cases.
1432 	 ???  This doesn't treat cases where permutation ultimatively
1433 	 fails (or we don't try permutation below).  Ideally we'd
1434 	 even compute a permutation that will end up with the maximum
1435 	 SLP tree size...  */
1436       if (is_a <bb_vec_info> (vinfo)
1437 	  && !matches[0]
1438 	  /* ???  Rejecting patterns this way doesn't work.  We'd have to
1439 	     do extra work to cancel the pattern so the uses see the
1440 	     scalar version.  */
1441 	  && !is_pattern_stmt_p (stmt_info)
1442 	  && !oprnd_info->any_pattern
1443 	  && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1444 	{
1445 	  if (dump_enabled_p ())
1446 	    dump_printf_loc (MSG_NOTE, vect_location,
1447 			     "Building vector operands from scalars\n");
1448 	  this_tree_size++;
1449 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
1450 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1451 	  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1452 	  children.safe_push (child);
1453 	  oprnd_info->ops = vNULL;
1454 	  oprnd_info->def_stmts = vNULL;
1455 	  continue;
1456 	}
1457 
1458       /* If the SLP build for operand zero failed and operand zero
1459 	 and one can be commutated try that for the scalar stmts
1460 	 that failed the match.  */
1461       if (i == 0
1462 	  /* A first scalar stmt mismatch signals a fatal mismatch.  */
1463 	  && matches[0]
1464 	  /* ???  For COND_EXPRs we can swap the comparison operands
1465 	     as well as the arms under some constraints.  */
1466 	  && nops == 2
1467 	  && oprnds_info[1]->first_dt == vect_internal_def
1468 	  && is_gimple_assign (stmt_info->stmt)
1469 	  /* Swapping operands for reductions breaks assumptions later on.  */
1470 	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1471 	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1472 	  /* Do so only if the number of not successful permutes was nor more
1473 	     than a cut-ff as re-trying the recursive match on
1474 	     possibly each level of the tree would expose exponential
1475 	     behavior.  */
1476 	  && *npermutes < 4)
1477 	{
1478 	  /* See whether we can swap the matching or the non-matching
1479 	     stmt operands.  */
1480 	  bool swap_not_matching = true;
1481 	  do
1482 	    {
1483 	      for (j = 0; j < group_size; ++j)
1484 		{
1485 		  if (matches[j] != !swap_not_matching)
1486 		    continue;
1487 		  stmt_vec_info stmt_info = stmts[j];
1488 		  /* Verify if we can swap operands of this stmt.  */
1489 		  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1490 		  if (!stmt
1491 		      || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1492 		    {
1493 		      if (!swap_not_matching)
1494 			goto fail;
1495 		      swap_not_matching = false;
1496 		      break;
1497 		    }
1498 		}
1499 	    }
1500 	  while (j != group_size);
1501 
1502 	  /* Swap mismatched definition stmts.  */
1503 	  if (dump_enabled_p ())
1504 	    dump_printf_loc (MSG_NOTE, vect_location,
1505 			     "Re-trying with swapped operands of stmts ");
1506 	  for (j = 0; j < group_size; ++j)
1507 	    if (matches[j] == !swap_not_matching)
1508 	      {
1509 		std::swap (oprnds_info[0]->def_stmts[j],
1510 			   oprnds_info[1]->def_stmts[j]);
1511 		std::swap (oprnds_info[0]->ops[j],
1512 			   oprnds_info[1]->ops[j]);
1513 		if (dump_enabled_p ())
1514 		  dump_printf (MSG_NOTE, "%d ", j);
1515 	      }
1516 	  if (dump_enabled_p ())
1517 	    dump_printf (MSG_NOTE, "\n");
1518 	  /* And try again with scratch 'matches' ... */
1519 	  bool *tem = XALLOCAVEC (bool, group_size);
1520 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1521 					    group_size, &this_max_nunits,
1522 					    tem, npermutes,
1523 					    &this_tree_size, bst_map)) != NULL)
1524 	    {
1525 	      /* If we have all children of a non-unary child built up from
1526 		 scalars then just throw that away and build it up this node
1527 		 from scalars.  */
1528 	      if (is_a <bb_vec_info> (vinfo)
1529 		  && SLP_TREE_CHILDREN (child).length () > 1
1530 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
1531 		     to do extra work to cancel the pattern so the uses see the
1532 		     scalar version.  */
1533 		  && !oprnd_info->any_pattern)
1534 		{
1535 		  unsigned int j;
1536 		  slp_tree grandchild;
1537 
1538 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1539 		    if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1540 		      break;
1541 		  if (!grandchild
1542 		      && (vect_update_all_shared_vectypes
1543 			  (oprnd_info->def_stmts)))
1544 		    {
1545 		      /* Roll back.  */
1546 		      this_tree_size = old_tree_size;
1547 		      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1548 			vect_free_slp_tree (grandchild, false);
1549 		      SLP_TREE_CHILDREN (child).truncate (0);
1550 
1551 		      if (dump_enabled_p ())
1552 			dump_printf_loc (MSG_NOTE, vect_location,
1553 					 "Building parent vector operands from "
1554 					 "scalars instead\n");
1555 		      oprnd_info->def_stmts = vNULL;
1556 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
1557 		      SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1558 		      oprnd_info->ops = vNULL;
1559 		      ++this_tree_size;
1560 		      children.safe_push (child);
1561 		      continue;
1562 		    }
1563 		}
1564 
1565 	      oprnd_info->def_stmts = vNULL;
1566 	      children.safe_push (child);
1567 	      continue;
1568 	    }
1569 
1570 	  ++*npermutes;
1571 	}
1572 
1573 fail:
1574       gcc_assert (child == NULL);
1575       FOR_EACH_VEC_ELT (children, j, child)
1576 	vect_free_slp_tree (child, false);
1577       vect_free_oprnd_info (oprnds_info);
1578       return NULL;
1579     }
1580 
1581   vect_free_oprnd_info (oprnds_info);
1582 
1583   *tree_size += this_tree_size + 1;
1584   *max_nunits = this_max_nunits;
1585 
1586   node = vect_create_new_slp_node (stmts);
1587   SLP_TREE_TWO_OPERATORS (node) = two_operators;
1588   SLP_TREE_CHILDREN (node).splice (children);
1589   return node;
1590 }
1591 
1592 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1593 
1594 static void
vect_print_slp_tree(dump_flags_t dump_kind,dump_location_t loc,slp_tree node,hash_set<slp_tree> & visited)1595 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1596 		     slp_tree node, hash_set<slp_tree> &visited)
1597 {
1598   unsigned i, j;
1599   stmt_vec_info stmt_info;
1600   slp_tree child;
1601   tree op;
1602 
1603   if (visited.add (node))
1604     return;
1605 
1606   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1607   dump_user_location_t user_loc = loc.get_user_location ();
1608   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1609 		   SLP_TREE_DEF_TYPE (node) == vect_external_def
1610 		   ? " (external)"
1611 		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1612 		      ? " (constant)"
1613 		      : ""), node,
1614 		   estimated_poly_value (node->max_nunits), node->refcnt);
1615   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1616     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1617       dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1618   else
1619     {
1620       dump_printf_loc (metadata, user_loc, "\t{ ");
1621       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1622 	dump_printf (metadata, "%T%s ", op,
1623 		     i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1624       dump_printf (metadata, "}\n");
1625     }
1626   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1627     {
1628       dump_printf_loc (metadata, user_loc, "\tload permutation {");
1629       FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
1630 	dump_printf (dump_kind, " %u", j);
1631       dump_printf (dump_kind, " }\n");
1632     }
1633   if (SLP_TREE_CHILDREN (node).is_empty ())
1634     return;
1635   dump_printf_loc (metadata, user_loc, "\tchildren");
1636   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1637     dump_printf (dump_kind, " %p", (void *)child);
1638   dump_printf (dump_kind, "\n");
1639   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1640     vect_print_slp_tree (dump_kind, loc, child, visited);
1641 }
1642 
1643 static void
vect_print_slp_tree(dump_flags_t dump_kind,dump_location_t loc,slp_tree node)1644 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1645 		     slp_tree node)
1646 {
1647   hash_set<slp_tree> visited;
1648   vect_print_slp_tree (dump_kind, loc, node, visited);
1649 }
1650 
1651 /* Mark the tree rooted at NODE with PURE_SLP.  */
1652 
1653 static void
vect_mark_slp_stmts(slp_tree node,hash_set<slp_tree> & visited)1654 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1655 {
1656   int i;
1657   stmt_vec_info stmt_info;
1658   slp_tree child;
1659 
1660   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1661     return;
1662 
1663   if (visited.add (node))
1664     return;
1665 
1666   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1667     STMT_SLP_TYPE (stmt_info) = pure_slp;
1668 
1669   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1670     vect_mark_slp_stmts (child, visited);
1671 }
1672 
1673 static void
vect_mark_slp_stmts(slp_tree node)1674 vect_mark_slp_stmts (slp_tree node)
1675 {
1676   hash_set<slp_tree> visited;
1677   vect_mark_slp_stmts (node, visited);
1678 }
1679 
1680 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1681 
1682 static void
vect_mark_slp_stmts_relevant(slp_tree node,hash_set<slp_tree> & visited)1683 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1684 {
1685   int i;
1686   stmt_vec_info stmt_info;
1687   slp_tree child;
1688 
1689   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1690     return;
1691 
1692   if (visited.add (node))
1693     return;
1694 
1695   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1696     {
1697       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1698                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1699       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1700     }
1701 
1702   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1703     vect_mark_slp_stmts_relevant (child, visited);
1704 }
1705 
1706 static void
vect_mark_slp_stmts_relevant(slp_tree node)1707 vect_mark_slp_stmts_relevant (slp_tree node)
1708 {
1709   hash_set<slp_tree> visited;
1710   vect_mark_slp_stmts_relevant (node, visited);
1711 }
1712 
1713 /* Copy the SLP subtree rooted at NODE.  */
1714 
1715 static slp_tree
slp_copy_subtree(slp_tree node,hash_map<slp_tree,slp_tree> & map)1716 slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1717 {
1718   unsigned i;
1719 
1720   bool existed_p;
1721   slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
1722   if (existed_p)
1723     return copy_ref;
1724 
1725   copy_ref = XNEW (_slp_tree);
1726   slp_tree copy = copy_ref;
1727   memcpy (copy, node, sizeof (_slp_tree));
1728   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1729     {
1730       SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1731       stmt_vec_info stmt_info;
1732       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1733 	STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1734     }
1735   if (SLP_TREE_SCALAR_OPS (node).exists ())
1736     SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1737   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1738     SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1739   if (SLP_TREE_CHILDREN (node).exists ())
1740     SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1741   gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1742   copy->refcnt = 0;
1743 
1744   slp_tree child;
1745   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1746     {
1747       SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1748       SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1749     }
1750   return copy;
1751 }
1752 
1753 /* Rearrange the statements of NODE according to PERMUTATION.  */
1754 
1755 static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,vec<unsigned> permutation,hash_set<slp_tree> & visited)1756 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1757                           vec<unsigned> permutation,
1758 			  hash_set<slp_tree> &visited)
1759 {
1760   unsigned int i;
1761   slp_tree child;
1762 
1763   if (visited.add (node))
1764     return;
1765 
1766   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1767     vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1768 
1769   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1770     {
1771       gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1772       /* ???  Computation nodes are isomorphic and need no rearrangement.
1773 	 This is a quick hack to cover those where rearrangement breaks
1774 	 semantics because only the first stmt is guaranteed to have the
1775 	 correct operation code due to others being swapped or inverted.  */
1776       stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
1777       if (is_gimple_assign (first->stmt)
1778 	  && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
1779 	return;
1780       vec<stmt_vec_info> tmp_stmts;
1781       tmp_stmts.create (group_size);
1782       tmp_stmts.quick_grow (group_size);
1783       stmt_vec_info stmt_info;
1784       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1785 	tmp_stmts[permutation[i]] = stmt_info;
1786       SLP_TREE_SCALAR_STMTS (node).release ();
1787       SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1788     }
1789   if (SLP_TREE_SCALAR_OPS (node).exists ())
1790     {
1791       gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1792       vec<tree> tmp_ops;
1793       tmp_ops.create (group_size);
1794       tmp_ops.quick_grow (group_size);
1795       tree op;
1796       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1797 	tmp_ops[permutation[i]] = op;
1798       SLP_TREE_SCALAR_OPS (node).release ();
1799       SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1800     }
1801 }
1802 
1803 
1804 /* Attempt to reorder stmts in a reduction chain so that we don't
1805    require any load permutation.  Return true if that was possible,
1806    otherwise return false.  */
1807 
1808 static bool
vect_attempt_slp_rearrange_stmts(slp_instance slp_instn)1809 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1810 {
1811   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1812   unsigned int i, j;
1813   unsigned int lidx;
1814   slp_tree node, load;
1815 
1816   /* Compare all the permutation sequences to the first one.  We know
1817      that at least one load is permuted.  */
1818   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1819   if (!node->load_permutation.exists ())
1820     return false;
1821   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1822     {
1823       if (!load->load_permutation.exists ())
1824 	return false;
1825       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1826 	if (lidx != node->load_permutation[j])
1827 	  return false;
1828     }
1829 
1830   /* Check that the loads in the first sequence are different and there
1831      are no gaps between them.  */
1832   auto_sbitmap load_index (group_size);
1833   bitmap_clear (load_index);
1834   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1835     {
1836       if (lidx >= group_size)
1837 	return false;
1838       if (bitmap_bit_p (load_index, lidx))
1839 	return false;
1840 
1841       bitmap_set_bit (load_index, lidx);
1842     }
1843   for (i = 0; i < group_size; i++)
1844     if (!bitmap_bit_p (load_index, i))
1845       return false;
1846 
1847   /* This permutation is valid for reduction.  Since the order of the
1848      statements in the nodes is not important unless they are memory
1849      accesses, we can rearrange the statements in all the nodes
1850      according to the order of the loads.  */
1851 
1852   /* We have to unshare the SLP tree we modify.  */
1853   hash_map<slp_tree, slp_tree> map;
1854   slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1855   vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1856   unshared->refcnt++;
1857   SLP_INSTANCE_TREE (slp_instn) = unshared;
1858   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1859     SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1860   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1861 
1862   /* Do the actual re-arrangement.  */
1863   hash_set<slp_tree> visited;
1864   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1865 			    node->load_permutation, visited);
1866 
1867   /* We are done, no actual permutations need to be generated.  */
1868   poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1869   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1870     {
1871       stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1872       first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1873       /* But we have to keep those permutations that are required because
1874          of handling of gaps.  */
1875       if (known_eq (unrolling_factor, 1U)
1876 	  || (group_size == DR_GROUP_SIZE (first_stmt_info)
1877 	      && DR_GROUP_GAP (first_stmt_info) == 0))
1878 	SLP_TREE_LOAD_PERMUTATION (node).release ();
1879       else
1880 	for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1881 	  SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1882     }
1883 
1884   return true;
1885 }
1886 
1887 /* Gather loads in the SLP graph NODE and populate the INST loads array.  */
1888 
1889 static void
vect_gather_slp_loads(slp_instance inst,slp_tree node,hash_set<slp_tree> & visited)1890 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1891 		       hash_set<slp_tree> &visited)
1892 {
1893   if (visited.add (node))
1894     return;
1895 
1896   if (SLP_TREE_CHILDREN (node).length () == 0)
1897     {
1898       if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1899 	return;
1900       stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1901       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1902 	  && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1903 	SLP_INSTANCE_LOADS (inst).safe_push (node);
1904     }
1905   else
1906     {
1907       unsigned i;
1908       slp_tree child;
1909       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1910 	vect_gather_slp_loads (inst, child, visited);
1911     }
1912 }
1913 
1914 static void
vect_gather_slp_loads(slp_instance inst,slp_tree node)1915 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1916 {
1917   hash_set<slp_tree> visited;
1918   vect_gather_slp_loads (inst, node, visited);
1919 }
1920 
1921 /* Check if the required load permutations in the SLP instance
1922    SLP_INSTN are supported.  */
1923 
1924 static bool
vect_supported_load_permutation_p(slp_instance slp_instn)1925 vect_supported_load_permutation_p (slp_instance slp_instn)
1926 {
1927   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1928   unsigned int i, j, k, next;
1929   slp_tree node;
1930 
1931   if (dump_enabled_p ())
1932     {
1933       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1934       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1935 	if (node->load_permutation.exists ())
1936 	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1937 	    dump_printf (MSG_NOTE, "%d ", next);
1938 	else
1939 	  for (k = 0; k < group_size; ++k)
1940 	    dump_printf (MSG_NOTE, "%d ", k);
1941       dump_printf (MSG_NOTE, "\n");
1942     }
1943 
1944   /* In case of reduction every load permutation is allowed, since the order
1945      of the reduction statements is not important (as opposed to the case of
1946      grouped stores).  The only condition we need to check is that all the
1947      load nodes are of the same size and have the same permutation (and then
1948      rearrange all the nodes of the SLP instance according to this
1949      permutation).  */
1950 
1951   /* Check that all the load nodes are of the same size.  */
1952   /* ???  Can't we assert this? */
1953   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1954     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1955       return false;
1956 
1957   node = SLP_INSTANCE_TREE (slp_instn);
1958   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1959 
1960   /* Reduction (there are no data-refs in the root).
1961      In reduction chain the order of the loads is not important.  */
1962   if (!STMT_VINFO_DATA_REF (stmt_info)
1963       && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1964     vect_attempt_slp_rearrange_stmts (slp_instn);
1965 
1966   /* In basic block vectorization we allow any subchain of an interleaving
1967      chain.
1968      FORNOW: not supported in loop SLP because of realignment compications.  */
1969   if (STMT_VINFO_BB_VINFO (stmt_info))
1970     {
1971       /* Check whether the loads in an instance form a subchain and thus
1972          no permutation is necessary.  */
1973       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1974         {
1975 	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1976 	    continue;
1977 	  bool subchain_p = true;
1978 	  stmt_vec_info next_load_info = NULL;
1979 	  stmt_vec_info load_info;
1980 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1981 	    {
1982 	      if (j != 0
1983 		  && (next_load_info != load_info
1984 		      || DR_GROUP_GAP (load_info) != 1))
1985 		{
1986 		  subchain_p = false;
1987 		  break;
1988 		}
1989 	      next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1990 	    }
1991 	  if (subchain_p)
1992 	    SLP_TREE_LOAD_PERMUTATION (node).release ();
1993 	  else
1994 	    {
1995 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1996 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1997 	      unsigned HOST_WIDE_INT nunits;
1998 	      unsigned k, maxk = 0;
1999 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
2000 		if (k > maxk)
2001 		  maxk = k;
2002 	      /* In BB vectorization we may not actually use a loaded vector
2003 		 accessing elements in excess of DR_GROUP_SIZE.  */
2004 	      tree vectype = STMT_VINFO_VECTYPE (group_info);
2005 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2006 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
2007 		{
2008 		  if (dump_enabled_p ())
2009 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2010 				     "BB vectorization with gaps at the end of "
2011 				     "a load is not supported\n");
2012 		  return false;
2013 		}
2014 
2015 	      /* Verify the permutation can be generated.  */
2016 	      vec<tree> tem;
2017 	      unsigned n_perms;
2018 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
2019 						 1, slp_instn, true, &n_perms))
2020 		{
2021 		  if (dump_enabled_p ())
2022 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2023 				     vect_location,
2024 				     "unsupported load permutation\n");
2025 		  return false;
2026 		}
2027 	    }
2028         }
2029       return true;
2030     }
2031 
2032   /* For loop vectorization verify we can generate the permutation.  Be
2033      conservative about the vectorization factor, there are permutations
2034      that will use three vector inputs only starting from a specific factor
2035      and the vectorization factor is not yet final.
2036      ???  The SLP instance unrolling factor might not be the maximum one.  */
2037   unsigned n_perms;
2038   poly_uint64 test_vf
2039     = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
2040 			     LOOP_VINFO_VECT_FACTOR
2041 			     (STMT_VINFO_LOOP_VINFO (stmt_info)));
2042   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2043     if (node->load_permutation.exists ()
2044 	&& !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
2045 					  slp_instn, true, &n_perms))
2046       return false;
2047 
2048   return true;
2049 }
2050 
2051 
2052 /* Find the last store in SLP INSTANCE.  */
2053 
2054 stmt_vec_info
vect_find_last_scalar_stmt_in_slp(slp_tree node)2055 vect_find_last_scalar_stmt_in_slp (slp_tree node)
2056 {
2057   stmt_vec_info last = NULL;
2058   stmt_vec_info stmt_vinfo;
2059 
2060   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2061     {
2062       stmt_vinfo = vect_orig_stmt (stmt_vinfo);
2063       last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2064     }
2065 
2066   return last;
2067 }
2068 
2069 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2070    two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2071    (also containing the first GROUP1_SIZE stmts, since stores are
2072    consecutive), the second containing the remainder.
2073    Return the first stmt in the second group.  */
2074 
2075 static stmt_vec_info
vect_split_slp_store_group(stmt_vec_info first_vinfo,unsigned group1_size)2076 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
2077 {
2078   gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
2079   gcc_assert (group1_size > 0);
2080   int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
2081   gcc_assert (group2_size > 0);
2082   DR_GROUP_SIZE (first_vinfo) = group1_size;
2083 
2084   stmt_vec_info stmt_info = first_vinfo;
2085   for (unsigned i = group1_size; i > 1; i--)
2086     {
2087       stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2088       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2089     }
2090   /* STMT is now the last element of the first group.  */
2091   stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2092   DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
2093 
2094   DR_GROUP_SIZE (group2) = group2_size;
2095   for (stmt_info = group2; stmt_info;
2096        stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
2097     {
2098       DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2099       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2100     }
2101 
2102   /* For the second group, the DR_GROUP_GAP is that before the original group,
2103      plus skipping over the first vector.  */
2104   DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
2105 
2106   /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
2107   DR_GROUP_GAP (first_vinfo) += group2_size;
2108 
2109   if (dump_enabled_p ())
2110     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2111 		     group1_size, group2_size);
2112 
2113   return group2;
2114 }
2115 
2116 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2117    statements and a vector of NUNITS elements.  */
2118 
2119 static poly_uint64
calculate_unrolling_factor(poly_uint64 nunits,unsigned int group_size)2120 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2121 {
2122   return exact_div (common_multiple (nunits, group_size), group_size);
2123 }
2124 
2125 /* Analyze an SLP instance starting from a group of grouped stores.  Call
2126    vect_build_slp_tree to build a tree of packed stmts if possible.
2127    Return FALSE if it's impossible to SLP any stmt in the loop.  */
2128 
2129 static bool
vect_analyze_slp_instance(vec_info * vinfo,scalar_stmts_to_slp_tree_map_t * bst_map,stmt_vec_info stmt_info,unsigned max_tree_size)2130 vect_analyze_slp_instance (vec_info *vinfo,
2131 			   scalar_stmts_to_slp_tree_map_t *bst_map,
2132 			   stmt_vec_info stmt_info, unsigned max_tree_size)
2133 {
2134   slp_instance new_instance;
2135   slp_tree node;
2136   unsigned int group_size;
2137   tree vectype, scalar_type = NULL_TREE;
2138   unsigned int i;
2139   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2140   vec<stmt_vec_info> scalar_stmts;
2141   bool constructor = false;
2142 
2143   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2144     {
2145       scalar_type = TREE_TYPE (DR_REF (dr));
2146       group_size = DR_GROUP_SIZE (stmt_info);
2147       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2148     }
2149   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2150     {
2151       gcc_assert (is_a <loop_vec_info> (vinfo));
2152       vectype = STMT_VINFO_VECTYPE (stmt_info);
2153       group_size = REDUC_GROUP_SIZE (stmt_info);
2154     }
2155   else if (is_gimple_assign (stmt_info->stmt)
2156 	    && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2157     {
2158       vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2159       group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2160       constructor = true;
2161     }
2162   else
2163     {
2164       gcc_assert (is_a <loop_vec_info> (vinfo));
2165       vectype = STMT_VINFO_VECTYPE (stmt_info);
2166       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2167     }
2168 
2169   if (!vectype)
2170     {
2171       if (dump_enabled_p ())
2172 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2173 			 "Build SLP failed: unsupported data-type %T\n",
2174 			 scalar_type);
2175 
2176       return false;
2177     }
2178   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2179 
2180   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
2181   scalar_stmts.create (group_size);
2182   stmt_vec_info next_info = stmt_info;
2183   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2184     {
2185       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
2186       while (next_info)
2187         {
2188 	  scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2189 	  next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2190         }
2191     }
2192   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2193     {
2194       /* Collect the reduction stmts and store them in
2195 	 SLP_TREE_SCALAR_STMTS.  */
2196       while (next_info)
2197         {
2198 	  scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2199 	  next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2200         }
2201       /* Mark the first element of the reduction chain as reduction to properly
2202 	 transform the node.  In the reduction analysis phase only the last
2203 	 element of the chain is marked as reduction.  */
2204       STMT_VINFO_DEF_TYPE (stmt_info)
2205 	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2206       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2207 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2208     }
2209   else if (constructor)
2210     {
2211       tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2212       tree val;
2213       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2214 	{
2215 	  if (TREE_CODE (val) == SSA_NAME)
2216 	    {
2217 	      gimple* def = SSA_NAME_DEF_STMT (val);
2218 	      stmt_vec_info def_info = vinfo->lookup_stmt (def);
2219 	      /* Value is defined in another basic block.  */
2220 	      if (!def_info)
2221 		return false;
2222 	      def_info = vect_stmt_to_vectorize (def_info);
2223 	      scalar_stmts.safe_push (def_info);
2224 	    }
2225 	  else
2226 	    return false;
2227 	}
2228       if (dump_enabled_p ())
2229 	dump_printf_loc (MSG_NOTE, vect_location,
2230 			 "Analyzing vectorizable constructor: %G\n",
2231 			 stmt_info->stmt);
2232     }
2233   else
2234     {
2235       /* Collect reduction statements.  */
2236       vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2237       for (i = 0; reductions.iterate (i, &next_info); i++)
2238 	scalar_stmts.safe_push (next_info);
2239     }
2240 
2241   /* Build the tree for the SLP instance.  */
2242   bool *matches = XALLOCAVEC (bool, group_size);
2243   unsigned npermutes = 0;
2244   poly_uint64 max_nunits = nunits;
2245   unsigned tree_size = 0;
2246   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2247 			      &max_nunits, matches, &npermutes,
2248 			      &tree_size, bst_map);
2249   if (node != NULL)
2250     {
2251       /* Calculate the unrolling factor based on the smallest type.  */
2252       poly_uint64 unrolling_factor
2253 	= calculate_unrolling_factor (max_nunits, group_size);
2254 
2255       if (maybe_ne (unrolling_factor, 1U)
2256 	  && is_a <bb_vec_info> (vinfo))
2257 	{
2258 	  unsigned HOST_WIDE_INT const_max_nunits;
2259 	  if (!max_nunits.is_constant (&const_max_nunits)
2260 	      || const_max_nunits > group_size)
2261 	    {
2262 	      if (dump_enabled_p ())
2263 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 				 "Build SLP failed: store group "
2265 				 "size not a multiple of the vector size "
2266 				 "in basic block SLP\n");
2267 	      vect_free_slp_tree (node, false);
2268 	      return false;
2269 	    }
2270 	  /* Fatal mismatch.  */
2271 	  matches[group_size / const_max_nunits * const_max_nunits] = false;
2272 	  vect_free_slp_tree (node, false);
2273 	}
2274       else
2275 	{
2276 	  /* Create a new SLP instance.  */
2277 	  new_instance = XNEW (class _slp_instance);
2278 	  SLP_INSTANCE_TREE (new_instance) = node;
2279 	  SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2280 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2281 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
2282 	  SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2283 
2284 	  vect_gather_slp_loads (new_instance, node);
2285 	  if (dump_enabled_p ())
2286 	    dump_printf_loc (MSG_NOTE, vect_location,
2287 			     "SLP size %u vs. limit %u.\n",
2288 			     tree_size, max_tree_size);
2289 
2290 	  /* Compute the load permutation.  */
2291 	  slp_tree load_node;
2292 	  bool loads_permuted = false;
2293 	  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2294 	    {
2295 	      if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2296 		continue;
2297 	      unsigned j;
2298 	      stmt_vec_info load_info;
2299 	      bool this_load_permuted = false;
2300 	      stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2301 		  (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2302 	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2303 		if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2304 		  {
2305 		    this_load_permuted = true;
2306 		    break;
2307 		  }
2308 	      if (!this_load_permuted
2309 		  /* The load requires permutation when unrolling exposes
2310 		     a gap either because the group is larger than the SLP
2311 		     group-size or because there is a gap between the groups.  */
2312 		  && (known_eq (unrolling_factor, 1U)
2313 		      || (group_size == DR_GROUP_SIZE (first_stmt_info)
2314 			  && DR_GROUP_GAP (first_stmt_info) == 0)))
2315 		{
2316 		  SLP_TREE_LOAD_PERMUTATION (load_node).release ();
2317 		  continue;
2318 		}
2319 	      loads_permuted = true;
2320 	    }
2321 
2322 	  if (loads_permuted)
2323 	    {
2324 	      if (!vect_supported_load_permutation_p (new_instance))
2325 		{
2326 		  if (dump_enabled_p ())
2327 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2328 				     "Build SLP failed: unsupported load "
2329 				     "permutation %G", stmt_info->stmt);
2330 		  vect_free_slp_instance (new_instance, false);
2331 		  return false;
2332 		}
2333 	    }
2334 
2335 	  /* If the loads and stores can be handled with load/store-lan
2336 	     instructions do not generate this SLP instance.  */
2337 	  if (is_a <loop_vec_info> (vinfo)
2338 	      && loads_permuted
2339 	      && dr && vect_store_lanes_supported (vectype, group_size, false))
2340 	    {
2341 	      slp_tree load_node;
2342 	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2343 		{
2344 		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2345 		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2346 		  /* Use SLP for strided accesses (or if we can't load-lanes).  */
2347 		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2348 		      || ! vect_load_lanes_supported
2349 		      (STMT_VINFO_VECTYPE (stmt_vinfo),
2350 		       DR_GROUP_SIZE (stmt_vinfo), false))
2351 		    break;
2352 		}
2353 	      if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2354 		{
2355 		  if (dump_enabled_p ())
2356 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 				     "Built SLP cancelled: can use "
2358 				     "load/store-lanes\n");
2359 		  vect_free_slp_instance (new_instance, false);
2360 		  return false;
2361 		}
2362 	    }
2363 
2364 	  /* If this is a reduction chain with a conversion in front
2365 	     amend the SLP tree with a node for that.  */
2366 	  if (!dr
2367 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2368 	      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2369 	    {
2370 	      /* Get at the conversion stmt - we know it's the single use
2371 		 of the last stmt of the reduction chain.  */
2372 	      gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2373 	      use_operand_p use_p;
2374 	      gimple *use_stmt;
2375 	      bool r = single_imm_use (gimple_assign_lhs (tem),
2376 				       &use_p, &use_stmt);
2377 	      gcc_assert (r);
2378 	      next_info = vinfo->lookup_stmt (use_stmt);
2379 	      next_info = vect_stmt_to_vectorize (next_info);
2380 	      scalar_stmts = vNULL;
2381 	      scalar_stmts.create (group_size);
2382 	      for (unsigned i = 0; i < group_size; ++i)
2383 		scalar_stmts.quick_push (next_info);
2384 	      slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2385 	      SLP_TREE_CHILDREN (conv).quick_push (node);
2386 	      SLP_INSTANCE_TREE (new_instance) = conv;
2387 	      /* We also have to fake this conversion stmt as SLP reduction
2388 		 group so we don't have to mess with too much code
2389 		 elsewhere.  */
2390 	      REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2391 	      REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2392 	    }
2393 
2394 	  vinfo->slp_instances.safe_push (new_instance);
2395 
2396 	  if (dump_enabled_p ())
2397 	    {
2398 	      dump_printf_loc (MSG_NOTE, vect_location,
2399 			       "Final SLP tree for instance:\n");
2400 	      vect_print_slp_tree (MSG_NOTE, vect_location,
2401 				   SLP_INSTANCE_TREE (new_instance));
2402 	    }
2403 
2404 	  return true;
2405 	}
2406     }
2407   else
2408     {
2409       /* Failed to SLP.  */
2410       /* Free the allocated memory.  */
2411       scalar_stmts.release ();
2412     }
2413 
2414   /* For basic block SLP, try to break the group up into multiples of the
2415      vector size.  */
2416   unsigned HOST_WIDE_INT const_nunits;
2417   if (is_a <bb_vec_info> (vinfo)
2418       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2419       && DR_GROUP_FIRST_ELEMENT (stmt_info)
2420       && nunits.is_constant (&const_nunits))
2421     {
2422       /* We consider breaking the group only on VF boundaries from the existing
2423 	 start.  */
2424       for (i = 0; i < group_size; i++)
2425 	if (!matches[i]) break;
2426 
2427       if (i >= const_nunits && i < group_size)
2428 	{
2429 	  /* Split into two groups at the first vector boundary before i.  */
2430 	  gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2431 	  unsigned group1_size = i & ~(const_nunits - 1);
2432 
2433 	  stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2434 							   group1_size);
2435 	  bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2436 						max_tree_size);
2437 	  /* If the first non-match was in the middle of a vector,
2438 	     skip the rest of that vector.  */
2439 	  if (group1_size < i)
2440 	    {
2441 	      i = group1_size + const_nunits;
2442 	      if (i < group_size)
2443 		rest = vect_split_slp_store_group (rest, const_nunits);
2444 	    }
2445 	  if (i < group_size)
2446 	    res |= vect_analyze_slp_instance (vinfo, bst_map,
2447 					      rest, max_tree_size);
2448 	  return res;
2449 	}
2450       /* Even though the first vector did not all match, we might be able to SLP
2451 	 (some) of the remainder.  FORNOW ignore this possibility.  */
2452     }
2453 
2454   return false;
2455 }
2456 
2457 
2458 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
2459    trees of packed scalar stmts if SLP is possible.  */
2460 
2461 opt_result
vect_analyze_slp(vec_info * vinfo,unsigned max_tree_size)2462 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2463 {
2464   unsigned int i;
2465   stmt_vec_info first_element;
2466 
2467   DUMP_VECT_SCOPE ("vect_analyze_slp");
2468 
2469   scalar_stmts_to_slp_tree_map_t *bst_map
2470     = new scalar_stmts_to_slp_tree_map_t ();
2471 
2472   /* Find SLP sequences starting from groups of grouped stores.  */
2473   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2474     vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2475 
2476   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2477     {
2478       if (loop_vinfo->reduction_chains.length () > 0)
2479 	{
2480 	  /* Find SLP sequences starting from reduction chains.  */
2481 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2482 	    if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2483 					     max_tree_size))
2484 	      {
2485 		/* Dissolve reduction chain group.  */
2486 		stmt_vec_info vinfo = first_element;
2487 		stmt_vec_info last = NULL;
2488 		while (vinfo)
2489 		  {
2490 		    stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2491 		    REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2492 		    REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2493 		    last = vinfo;
2494 		    vinfo = next;
2495 		  }
2496 		STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2497 		/* It can be still vectorized as part of an SLP reduction.  */
2498 		loop_vinfo->reductions.safe_push (last);
2499 	      }
2500 	}
2501 
2502       /* Find SLP sequences starting from groups of reductions.  */
2503       if (loop_vinfo->reductions.length () > 1)
2504 	vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2505 				   max_tree_size);
2506     }
2507 
2508   /* The map keeps a reference on SLP nodes built, release that.  */
2509   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2510        it != bst_map->end (); ++it)
2511     if ((*it).second)
2512       vect_free_slp_tree ((*it).second, false);
2513   delete bst_map;
2514 
2515   return opt_result::success ();
2516 }
2517 
2518 
2519 /* For each possible SLP instance decide whether to SLP it and calculate overall
2520    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2521    least one instance.  */
2522 
2523 bool
vect_make_slp_decision(loop_vec_info loop_vinfo)2524 vect_make_slp_decision (loop_vec_info loop_vinfo)
2525 {
2526   unsigned int i;
2527   poly_uint64 unrolling_factor = 1;
2528   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2529   slp_instance instance;
2530   int decided_to_slp = 0;
2531 
2532   DUMP_VECT_SCOPE ("vect_make_slp_decision");
2533 
2534   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2535     {
2536       /* FORNOW: SLP if you can.  */
2537       /* All unroll factors have the form:
2538 
2539 	   GET_MODE_SIZE (vinfo->vector_mode) * X
2540 
2541 	 for some rational X, so they must have a common multiple.  */
2542       unrolling_factor
2543 	= force_common_multiple (unrolling_factor,
2544 				 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2545 
2546       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2547 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2548 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2549       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2550       decided_to_slp++;
2551     }
2552 
2553   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2554 
2555   if (decided_to_slp && dump_enabled_p ())
2556     {
2557       dump_printf_loc (MSG_NOTE, vect_location,
2558 		       "Decided to SLP %d instances. Unrolling factor ",
2559 		       decided_to_slp);
2560       dump_dec (MSG_NOTE, unrolling_factor);
2561       dump_printf (MSG_NOTE, "\n");
2562     }
2563 
2564   return (decided_to_slp > 0);
2565 }
2566 
2567 
2568 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2569    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2570 
2571 static void
vect_detect_hybrid_slp_stmts(slp_tree node,unsigned i,slp_vect_type stype,hash_map<slp_tree,unsigned> & visited)2572 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2573 			      hash_map<slp_tree, unsigned> &visited)
2574 {
2575   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2576   imm_use_iterator imm_iter;
2577   gimple *use_stmt;
2578   stmt_vec_info use_vinfo;
2579   slp_tree child;
2580   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2581   int j;
2582 
2583   /* We need to union stype over the incoming graph edges but we still
2584      want to limit recursion to stay O(N+E).  */
2585   unsigned visited_cnt = ++visited.get_or_insert (node);
2586   gcc_assert (visited_cnt <= node->refcnt);
2587   bool only_edge = (visited_cnt != node->refcnt);
2588 
2589   /* Propagate hybrid down the SLP tree.  */
2590   if (stype == hybrid)
2591     ;
2592   else if (HYBRID_SLP_STMT (stmt_vinfo))
2593     stype = hybrid;
2594   else if (!only_edge)
2595     {
2596       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2597       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2598       /* If we get a pattern stmt here we have to use the LHS of the
2599          original stmt for immediate uses.  */
2600       gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2601       tree def;
2602       if (gimple_code (stmt) == GIMPLE_PHI)
2603 	def = gimple_phi_result (stmt);
2604       else
2605 	def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2606       if (def)
2607 	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2608 	  {
2609 	    use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2610 	    if (!use_vinfo)
2611 	      continue;
2612 	    use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2613 	    if (!STMT_SLP_TYPE (use_vinfo)
2614 		&& (STMT_VINFO_RELEVANT (use_vinfo)
2615 		    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2616 		&& !(gimple_code (use_stmt) == GIMPLE_PHI
2617 		     && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2618 	      {
2619 		if (dump_enabled_p ())
2620 		  dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2621 				   "def in non-SLP stmt: %G", use_stmt);
2622 		stype = hybrid;
2623 	      }
2624 	  }
2625     }
2626 
2627   if (stype == hybrid
2628       && !HYBRID_SLP_STMT (stmt_vinfo))
2629     {
2630       if (dump_enabled_p ())
2631 	dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2632 			 stmt_vinfo->stmt);
2633       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2634     }
2635 
2636   if (!only_edge)
2637     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2638       if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2639 	  && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2640 	vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2641 }
2642 
2643 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2644 
2645 static tree
vect_detect_hybrid_slp_1(tree * tp,int *,void * data)2646 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2647 {
2648   walk_stmt_info *wi = (walk_stmt_info *)data;
2649   loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2650 
2651   if (wi->is_lhs)
2652     return NULL_TREE;
2653 
2654   stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2655   if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2656     {
2657       if (dump_enabled_p ())
2658 	dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2659 			 def_stmt_info->stmt);
2660       STMT_SLP_TYPE (def_stmt_info) = hybrid;
2661     }
2662 
2663   return NULL_TREE;
2664 }
2665 
2666 static tree
vect_detect_hybrid_slp_2(gimple_stmt_iterator * gsi,bool * handled,walk_stmt_info * wi)2667 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2668 			  walk_stmt_info *wi)
2669 {
2670   loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2671   stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2672   /* If the stmt is in a SLP instance then this isn't a reason
2673      to mark use definitions in other SLP instances as hybrid.  */
2674   if (! STMT_SLP_TYPE (use_vinfo)
2675       && (STMT_VINFO_RELEVANT (use_vinfo)
2676 	  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2677       && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2678 	    && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2679     ;
2680   else
2681     *handled = true;
2682   return NULL_TREE;
2683 }
2684 
2685 /* Find stmts that must be both vectorized and SLPed.  */
2686 
2687 void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)2688 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2689 {
2690   unsigned int i;
2691   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2692   slp_instance instance;
2693 
2694   DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2695 
2696   /* First walk all pattern stmt in the loop and mark defs of uses as
2697      hybrid because immediate uses in them are not recorded.  */
2698   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2699     {
2700       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2701       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2702 	   gsi_next (&gsi))
2703 	{
2704 	  gimple *stmt = gsi_stmt (gsi);
2705 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2706 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2707 	    {
2708 	      walk_stmt_info wi;
2709 	      memset (&wi, 0, sizeof (wi));
2710 	      wi.info = loop_vinfo;
2711 	      gimple_stmt_iterator gsi2
2712 		= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2713 	      walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2714 				vect_detect_hybrid_slp_1, &wi);
2715 	      walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2716 			       vect_detect_hybrid_slp_2,
2717 			       vect_detect_hybrid_slp_1, &wi);
2718 	    }
2719 	}
2720     }
2721 
2722   /* Then walk the SLP instance trees marking stmts with uses in
2723      non-SLP stmts as hybrid, also propagating hybrid down the
2724      SLP tree, collecting the above info on-the-fly.  */
2725   for (unsigned j = 0;; ++j)
2726     {
2727       hash_map<slp_tree, unsigned> visited;
2728       bool any = false;
2729       FOR_EACH_VEC_ELT (slp_instances, i, instance)
2730 	if (j < SLP_INSTANCE_GROUP_SIZE (instance))
2731 	  {
2732 	    any = true;
2733 	    vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2734 					  j, pure_slp, visited);
2735 	  }
2736       if (!any)
2737 	break;
2738     }
2739 }
2740 
2741 
2742 /* Initialize a bb_vec_info struct for the statements between
2743    REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive).  */
2744 
_bb_vec_info(gimple_stmt_iterator region_begin_in,gimple_stmt_iterator region_end_in,vec_info_shared * shared)2745 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2746 			    gimple_stmt_iterator region_end_in,
2747 			    vec_info_shared *shared)
2748   : vec_info (vec_info::bb, init_cost (NULL), shared),
2749     bb (gsi_bb (region_begin_in)),
2750     region_begin (region_begin_in),
2751     region_end (region_end_in)
2752 {
2753   gimple_stmt_iterator gsi;
2754 
2755   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2756        gsi_next (&gsi))
2757     {
2758       gimple *stmt = gsi_stmt (gsi);
2759       gimple_set_uid (stmt, 0);
2760       add_stmt (stmt);
2761     }
2762 
2763   bb->aux = this;
2764 }
2765 
2766 
2767 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2768    stmts in the basic block.  */
2769 
~_bb_vec_info()2770 _bb_vec_info::~_bb_vec_info ()
2771 {
2772   for (gimple_stmt_iterator si = region_begin;
2773        gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2774     /* Reset region marker.  */
2775     gimple_set_uid (gsi_stmt (si), -1);
2776 
2777   bb->aux = NULL;
2778 }
2779 
2780 /* Subroutine of vect_slp_analyze_node_operations.  Handle the root of NODE,
2781    given then that child nodes have already been processed, and that
2782    their def types currently match their SLP node's def type.  */
2783 
2784 static bool
vect_slp_analyze_node_operations_1(vec_info * vinfo,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)2785 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2786 				    slp_instance node_instance,
2787 				    stmt_vector_for_cost *cost_vec)
2788 {
2789   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2790   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2791 
2792   /* Calculate the number of vector statements to be created for the
2793      scalar stmts in this node.  For SLP reductions it is equal to the
2794      number of vector statements in the children (which has already been
2795      calculated by the recursive call).  Otherwise it is the number of
2796      scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2797      VF divided by the number of elements in a vector.  */
2798   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2799       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2800     {
2801       for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2802 	if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2803 	  {
2804 	    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2805 	      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2806 	    break;
2807 	  }
2808     }
2809   else
2810     {
2811       poly_uint64 vf;
2812       if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2813 	vf = loop_vinfo->vectorization_factor;
2814       else
2815 	vf = 1;
2816       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2817       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2818       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2819 	= vect_get_num_vectors (vf * group_size, vectype);
2820     }
2821 
2822   bool dummy;
2823   return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2824 }
2825 
2826 /* Try to build NODE from scalars, returning true on success.
2827    NODE_INSTANCE is the SLP instance that contains NODE.  */
2828 
2829 static bool
vect_slp_convert_to_external(vec_info * vinfo,slp_tree node,slp_instance node_instance)2830 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2831 			      slp_instance node_instance)
2832 {
2833   stmt_vec_info stmt_info;
2834   unsigned int i;
2835 
2836   if (!is_a <bb_vec_info> (vinfo)
2837       || node == SLP_INSTANCE_TREE (node_instance)
2838       || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2839     return false;
2840 
2841   if (dump_enabled_p ())
2842     dump_printf_loc (MSG_NOTE, vect_location,
2843 		     "Building vector operands from scalars instead\n");
2844 
2845   /* Don't remove and free the child nodes here, since they could be
2846      referenced by other structures.  The analysis and scheduling phases
2847      (need to) ignore child nodes of anything that isn't vect_internal_def.  */
2848   unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2849   SLP_TREE_DEF_TYPE (node) = vect_external_def;
2850   SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2851   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2852     {
2853       tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2854       SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2855     }
2856   return true;
2857 }
2858 
2859 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2860    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2861 
2862    Return true if the operations are supported.  */
2863 
2864 static bool
vect_slp_analyze_node_operations(vec_info * vinfo,slp_tree node,slp_instance node_instance,hash_set<slp_tree> & visited,hash_set<slp_tree> & lvisited,stmt_vector_for_cost * cost_vec)2865 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2866 				  slp_instance node_instance,
2867 				  hash_set<slp_tree> &visited,
2868 				  hash_set<slp_tree> &lvisited,
2869 				  stmt_vector_for_cost *cost_vec)
2870 {
2871   int i, j;
2872   slp_tree child;
2873 
2874   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2875     return true;
2876 
2877   /* If we already analyzed the exact same set of scalar stmts we're done.
2878      We share the generated vector stmts for those.
2879      The SLP graph is acyclic so not caching whether we failed or succeeded
2880      doesn't result in any issue since we throw away the lvisited set
2881      when we fail.  */
2882   if (visited.contains (node)
2883       || lvisited.add (node))
2884     return true;
2885 
2886   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2887     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2888 					   visited, lvisited, cost_vec))
2889       return false;
2890 
2891   /* ???  We have to catch the case late where two first scalar stmts appear
2892      in multiple SLP children with different def type and fail.  Remember
2893      original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2894      match it when that is vect_internal_def.  */
2895   auto_vec<vect_def_type, 4> dt;
2896   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2897   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2898     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2899       dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2900 
2901   /* Push SLP node def-type to stmt operands.  */
2902   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2903     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2904 	&& SLP_TREE_SCALAR_STMTS (child).length () != 0)
2905       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2906 	= SLP_TREE_DEF_TYPE (child);
2907 
2908   /* Check everything worked out.  */
2909   bool res = true;
2910   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2911       if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2912 	{
2913 	  if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2914 	    {
2915 	      if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2916 		  != SLP_TREE_DEF_TYPE (child))
2917 		res = false;
2918 	    }
2919 	  else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2920 		   != dt[j])
2921 	    res = false;
2922 	}
2923   if (!res && dump_enabled_p ())
2924     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2925 		     "not vectorized: same operand with different "
2926 		     "def type in stmt.\n");
2927 
2928   if (res)
2929     res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2930 					      cost_vec);
2931 
2932   /* Restore def-types.  */
2933   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2934     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2935       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2936 
2937   /* If this node can't be vectorized, try pruning the tree here rather
2938      than felling the whole thing.  */
2939   if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2940     res = true;
2941 
2942   return res;
2943 }
2944 
2945 
2946 /* Analyze statements in SLP instances of VINFO.  Return true if the
2947    operations are supported. */
2948 
2949 bool
vect_slp_analyze_operations(vec_info * vinfo)2950 vect_slp_analyze_operations (vec_info *vinfo)
2951 {
2952   slp_instance instance;
2953   int i;
2954 
2955   DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2956 
2957   hash_set<slp_tree> visited;
2958   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2959     {
2960       hash_set<slp_tree> lvisited;
2961       stmt_vector_for_cost cost_vec;
2962       cost_vec.create (2);
2963       if (!vect_slp_analyze_node_operations (vinfo,
2964 					     SLP_INSTANCE_TREE (instance),
2965 					     instance, visited, lvisited,
2966 					     &cost_vec)
2967 	  /* Instances with a root stmt require vectorized defs for the
2968 	     SLP tree root.  */
2969 	  || (SLP_INSTANCE_ROOT_STMT (instance)
2970 	      && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2971 		  != vect_internal_def)))
2972         {
2973 	  slp_tree node = SLP_INSTANCE_TREE (instance);
2974 	  stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2975 	  if (dump_enabled_p ())
2976 	    dump_printf_loc (MSG_NOTE, vect_location,
2977 			     "removing SLP instance operations starting from: %G",
2978 			     stmt_info->stmt);
2979 	  vect_free_slp_instance (instance, false);
2980           vinfo->slp_instances.ordered_remove (i);
2981 	  cost_vec.release ();
2982 	}
2983       else
2984 	{
2985 	  for (hash_set<slp_tree>::iterator x = lvisited.begin();
2986 	       x != lvisited.end(); ++x)
2987 	    visited.add (*x);
2988 	  i++;
2989 
2990 	  add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2991 	  cost_vec.release ();
2992 	}
2993     }
2994 
2995   return !vinfo->slp_instances.is_empty ();
2996 }
2997 
2998 
2999 /* Compute the scalar cost of the SLP node NODE and its children
3000    and return it.  Do not account defs that are marked in LIFE and
3001    update LIFE according to uses of NODE.  */
3002 
3003 static void
vect_bb_slp_scalar_cost(basic_block bb,slp_tree node,vec<bool,va_heap> * life,stmt_vector_for_cost * cost_vec,hash_set<slp_tree> & visited)3004 vect_bb_slp_scalar_cost (basic_block bb,
3005 			 slp_tree node, vec<bool, va_heap> *life,
3006 			 stmt_vector_for_cost *cost_vec,
3007 			 hash_set<slp_tree> &visited)
3008 {
3009   unsigned i;
3010   stmt_vec_info stmt_info;
3011   slp_tree child;
3012 
3013   if (visited.add (node))
3014     return;
3015 
3016   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3017     {
3018       gimple *stmt = stmt_info->stmt;
3019       vec_info *vinfo = stmt_info->vinfo;
3020       ssa_op_iter op_iter;
3021       def_operand_p def_p;
3022 
3023       if ((*life)[i])
3024 	continue;
3025 
3026       /* If there is a non-vectorized use of the defs then the scalar
3027          stmt is kept live in which case we do not account it or any
3028 	 required defs in the SLP children in the scalar cost.  This
3029 	 way we make the vectorization more costly when compared to
3030 	 the scalar cost.  */
3031       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
3032 	{
3033 	  imm_use_iterator use_iter;
3034 	  gimple *use_stmt;
3035 	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
3036 	    if (!is_gimple_debug (use_stmt))
3037 	      {
3038 		stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
3039 		if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
3040 		  {
3041 		    (*life)[i] = true;
3042 		    BREAK_FROM_IMM_USE_STMT (use_iter);
3043 		  }
3044 	      }
3045 	}
3046       if ((*life)[i])
3047 	continue;
3048 
3049       /* Count scalar stmts only once.  */
3050       if (gimple_visited_p (stmt))
3051 	continue;
3052       gimple_set_visited (stmt, true);
3053 
3054       vect_cost_for_stmt kind;
3055       if (STMT_VINFO_DATA_REF (stmt_info))
3056         {
3057           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
3058 	    kind = scalar_load;
3059           else
3060 	    kind = scalar_store;
3061         }
3062       else if (vect_nop_conversion_p (stmt_info))
3063 	continue;
3064       else
3065 	kind = scalar_stmt;
3066       record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
3067     }
3068 
3069   auto_vec<bool, 20> subtree_life;
3070   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3071     {
3072       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3073 	{
3074 	  /* Do not directly pass LIFE to the recursive call, copy it to
3075 	     confine changes in the callee to the current child/subtree.  */
3076 	  subtree_life.safe_splice (*life);
3077 	  vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3078 				   visited);
3079 	  subtree_life.truncate (0);
3080 	}
3081     }
3082 }
3083 
3084 /* Check if vectorization of the basic block is profitable.  */
3085 
3086 static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)3087 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3088 {
3089   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3090   slp_instance instance;
3091   int i;
3092   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
3093   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
3094 
3095   /* Calculate scalar cost.  */
3096   stmt_vector_for_cost scalar_costs;
3097   scalar_costs.create (0);
3098   hash_set<slp_tree> visited;
3099   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3100     {
3101       auto_vec<bool, 20> life;
3102       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
3103       vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3104 			       SLP_INSTANCE_TREE (instance),
3105 			       &life, &scalar_costs, visited);
3106     }
3107   void *target_cost_data = init_cost (NULL);
3108   add_stmt_costs (target_cost_data, &scalar_costs);
3109   scalar_costs.release ();
3110   unsigned dummy;
3111   finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3112   destroy_cost_data (target_cost_data);
3113 
3114   /* Unset visited flag.  */
3115   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3116        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3117     gimple_set_visited  (gsi_stmt (gsi), false);
3118 
3119   /* Complete the target-specific cost calculation.  */
3120   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3121 	       &vec_inside_cost, &vec_epilogue_cost);
3122 
3123   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
3124 
3125   if (dump_enabled_p ())
3126     {
3127       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3128       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
3129 		   vec_inside_cost);
3130       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
3131       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
3132       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
3133     }
3134 
3135   /* Vectorization is profitable if its cost is more than the cost of scalar
3136      version.  Note that we err on the vector side for equal cost because
3137      the cost estimate is otherwise quite pessimistic (constant uses are
3138      free on the scalar side but cost a load on the vector side for
3139      example).  */
3140   if (vec_outside_cost + vec_inside_cost > scalar_cost)
3141     return false;
3142 
3143   return true;
3144 }
3145 
3146 /* Find any vectorizable constructors and add them to the grouped_store
3147    array.  */
3148 
3149 static void
vect_slp_check_for_constructors(bb_vec_info bb_vinfo)3150 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3151 {
3152   gimple_stmt_iterator gsi;
3153 
3154   for (gsi = bb_vinfo->region_begin;
3155        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3156     {
3157       gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3158       if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3159 	continue;
3160 
3161       tree rhs = gimple_assign_rhs1 (stmt);
3162       if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3163 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3164 		       CONSTRUCTOR_NELTS (rhs))
3165 	  || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3166 	  || uniform_vector_p (rhs))
3167 	continue;
3168 
3169       stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3170       BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3171     }
3172 }
3173 
3174 /* Check if the region described by BB_VINFO can be vectorized, returning
3175    true if so.  When returning false, set FATAL to true if the same failure
3176    would prevent vectorization at other vector sizes, false if it is still
3177    worth trying other sizes.  N_STMTS is the number of statements in the
3178    region.  */
3179 
3180 static bool
vect_slp_analyze_bb_1(bb_vec_info bb_vinfo,int n_stmts,bool & fatal)3181 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3182 {
3183   DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3184 
3185   slp_instance instance;
3186   int i;
3187   poly_uint64 min_vf = 2;
3188 
3189   /* The first group of checks is independent of the vector size.  */
3190   fatal = true;
3191 
3192   /* Analyze the data references.  */
3193 
3194   if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3195     {
3196       if (dump_enabled_p ())
3197         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3198 			 "not vectorized: unhandled data-ref in basic "
3199 			 "block.\n");
3200       return false;
3201     }
3202 
3203   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3204     {
3205       if (dump_enabled_p ())
3206         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 			 "not vectorized: not enough data-refs in "
3208 			 "basic block.\n");
3209       return false;
3210     }
3211 
3212   if (!vect_analyze_data_ref_accesses (bb_vinfo))
3213     {
3214      if (dump_enabled_p ())
3215        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3216 			"not vectorized: unhandled data access in "
3217 			"basic block.\n");
3218       return false;
3219     }
3220 
3221   vect_slp_check_for_constructors (bb_vinfo);
3222 
3223   /* If there are no grouped stores in the region there is no need
3224      to continue with pattern recog as vect_analyze_slp will fail
3225      anyway.  */
3226   if (bb_vinfo->grouped_stores.is_empty ())
3227     {
3228       if (dump_enabled_p ())
3229 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3230 			 "not vectorized: no grouped stores in "
3231 			 "basic block.\n");
3232       return false;
3233     }
3234 
3235   /* While the rest of the analysis below depends on it in some way.  */
3236   fatal = false;
3237 
3238   vect_pattern_recog (bb_vinfo);
3239 
3240   /* Check the SLP opportunities in the basic block, analyze and build SLP
3241      trees.  */
3242   if (!vect_analyze_slp (bb_vinfo, n_stmts))
3243     {
3244       if (dump_enabled_p ())
3245 	{
3246 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 			   "Failed to SLP the basic block.\n");
3248 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3249 			   "not vectorized: failed to find SLP opportunities "
3250 			   "in basic block.\n");
3251 	}
3252       return false;
3253     }
3254 
3255   vect_record_base_alignments (bb_vinfo);
3256 
3257   /* Analyze and verify the alignment of data references and the
3258      dependence in the SLP instances.  */
3259   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3260     {
3261       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3262 	  || ! vect_slp_analyze_instance_dependence (instance))
3263 	{
3264 	  slp_tree node = SLP_INSTANCE_TREE (instance);
3265 	  stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3266 	  if (dump_enabled_p ())
3267 	    dump_printf_loc (MSG_NOTE, vect_location,
3268 			     "removing SLP instance operations starting from: %G",
3269 			     stmt_info->stmt);
3270 	  vect_free_slp_instance (instance, false);
3271 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3272 	  continue;
3273 	}
3274 
3275       /* Mark all the statements that we want to vectorize as pure SLP and
3276 	 relevant.  */
3277       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3278       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3279       if (SLP_INSTANCE_ROOT_STMT (instance))
3280 	STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3281 
3282       i++;
3283     }
3284   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3285     return false;
3286 
3287   if (!vect_slp_analyze_operations (bb_vinfo))
3288     {
3289       if (dump_enabled_p ())
3290         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3291 			 "not vectorized: bad operation in basic block.\n");
3292       return false;
3293     }
3294 
3295   /* Cost model: check if the vectorization is worthwhile.  */
3296   if (!unlimited_cost_model (NULL)
3297       && !vect_bb_vectorization_profitable_p (bb_vinfo))
3298     {
3299       if (dump_enabled_p ())
3300         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3301 			 "not vectorized: vectorization is not "
3302 			 "profitable.\n");
3303       return false;
3304     }
3305 
3306   if (dump_enabled_p ())
3307     dump_printf_loc (MSG_NOTE, vect_location,
3308 		     "Basic block will be vectorized using SLP\n");
3309   return true;
3310 }
3311 
3312 /* Subroutine of vect_slp_bb.  Try to vectorize the statements between
3313    REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3314    on success.  The region has N_STMTS statements and has the datarefs
3315    given by DATAREFS.  */
3316 
3317 static bool
vect_slp_bb_region(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end,vec<data_reference_p> datarefs,unsigned int n_stmts)3318 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3319 		    gimple_stmt_iterator region_end,
3320 		    vec<data_reference_p> datarefs,
3321 		    unsigned int n_stmts)
3322 {
3323   bb_vec_info bb_vinfo;
3324   auto_vector_modes vector_modes;
3325 
3326   /* Autodetect first vector size we try.  */
3327   machine_mode next_vector_mode = VOIDmode;
3328   targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3329   unsigned int mode_i = 0;
3330 
3331   vec_info_shared shared;
3332 
3333   machine_mode autodetected_vector_mode = VOIDmode;
3334   while (1)
3335     {
3336       bool vectorized = false;
3337       bool fatal = false;
3338       bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3339 
3340       bool first_time_p = shared.datarefs.is_empty ();
3341       BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3342       if (first_time_p)
3343 	bb_vinfo->shared->save_datarefs ();
3344       else
3345 	bb_vinfo->shared->check_datarefs ();
3346       bb_vinfo->vector_mode = next_vector_mode;
3347 
3348       if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3349 	  && dbg_cnt (vect_slp))
3350 	{
3351 	  if (dump_enabled_p ())
3352 	    {
3353 	      dump_printf_loc (MSG_NOTE, vect_location,
3354 			       "***** Analysis succeeded with vector mode"
3355 			       " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3356 	      dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3357 	    }
3358 
3359 	  bb_vinfo->shared->check_datarefs ();
3360 	  vect_schedule_slp (bb_vinfo);
3361 
3362 	  unsigned HOST_WIDE_INT bytes;
3363 	  if (dump_enabled_p ())
3364 	    {
3365 	      if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3366 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3367 				 "basic block part vectorized using %wu byte "
3368 				 "vectors\n", bytes);
3369 	      else
3370 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3371 				 "basic block part vectorized using variable "
3372 				 "length vectors\n");
3373 	    }
3374 
3375 	  vectorized = true;
3376 	}
3377       else
3378 	{
3379 	  if (dump_enabled_p ())
3380 	    dump_printf_loc (MSG_NOTE, vect_location,
3381 			     "***** Analysis failed with vector mode %s\n",
3382 			     GET_MODE_NAME (bb_vinfo->vector_mode));
3383 	}
3384 
3385       if (mode_i == 0)
3386 	autodetected_vector_mode = bb_vinfo->vector_mode;
3387 
3388       if (!fatal)
3389 	while (mode_i < vector_modes.length ()
3390 	       && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3391 	  {
3392 	    if (dump_enabled_p ())
3393 	      dump_printf_loc (MSG_NOTE, vect_location,
3394 			       "***** The result for vector mode %s would"
3395 			       " be the same\n",
3396 			       GET_MODE_NAME (vector_modes[mode_i]));
3397 	    mode_i += 1;
3398 	  }
3399 
3400       delete bb_vinfo;
3401 
3402       if (mode_i < vector_modes.length ()
3403 	  && VECTOR_MODE_P (autodetected_vector_mode)
3404 	  && (related_vector_mode (vector_modes[mode_i],
3405 				   GET_MODE_INNER (autodetected_vector_mode))
3406 	      == autodetected_vector_mode)
3407 	  && (related_vector_mode (autodetected_vector_mode,
3408 				   GET_MODE_INNER (vector_modes[mode_i]))
3409 	      == vector_modes[mode_i]))
3410 	{
3411 	  if (dump_enabled_p ())
3412 	    dump_printf_loc (MSG_NOTE, vect_location,
3413 			     "***** Skipping vector mode %s, which would"
3414 			     " repeat the analysis for %s\n",
3415 			     GET_MODE_NAME (vector_modes[mode_i]),
3416 			     GET_MODE_NAME (autodetected_vector_mode));
3417 	  mode_i += 1;
3418 	}
3419 
3420       if (vectorized
3421 	  || mode_i == vector_modes.length ()
3422 	  || autodetected_vector_mode == VOIDmode
3423 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
3424 	     vector sizes will fail do not bother iterating.  */
3425 	  || fatal)
3426 	return vectorized;
3427 
3428       /* Try the next biggest vector size.  */
3429       next_vector_mode = vector_modes[mode_i++];
3430       if (dump_enabled_p ())
3431 	dump_printf_loc (MSG_NOTE, vect_location,
3432 			 "***** Re-trying analysis with vector mode %s\n",
3433 			 GET_MODE_NAME (next_vector_mode));
3434     }
3435 }
3436 
3437 /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
3438    true if anything in the basic-block was vectorized.  */
3439 
3440 bool
vect_slp_bb(basic_block bb)3441 vect_slp_bb (basic_block bb)
3442 {
3443   gimple_stmt_iterator gsi;
3444   bool any_vectorized = false;
3445 
3446   gsi = gsi_start_bb (bb);
3447   while (!gsi_end_p (gsi))
3448     {
3449       gimple_stmt_iterator region_begin = gsi;
3450       vec<data_reference_p> datarefs = vNULL;
3451       int insns = 0;
3452 
3453       for (; !gsi_end_p (gsi); gsi_next (&gsi))
3454 	{
3455 	  gimple *stmt = gsi_stmt (gsi);
3456 	  if (is_gimple_debug (stmt))
3457 	    continue;
3458 	  insns++;
3459 
3460 	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
3461 	    vect_location = stmt;
3462 
3463 	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3464 	    break;
3465 	}
3466 
3467       /* Skip leading unhandled stmts.  */
3468       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3469 	{
3470 	  gsi_next (&gsi);
3471 	  continue;
3472 	}
3473 
3474       gimple_stmt_iterator region_end = gsi;
3475 
3476       if (insns > param_slp_max_insns_in_bb)
3477 	{
3478 	  if (dump_enabled_p ())
3479 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3480 			     "not vectorized: too many instructions in "
3481 			     "basic block.\n");
3482 	}
3483       else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3484 	any_vectorized = true;
3485 
3486       if (gsi_end_p (region_end))
3487 	break;
3488 
3489       /* Skip the unhandled stmt.  */
3490       gsi_next (&gsi);
3491     }
3492 
3493   return any_vectorized;
3494 }
3495 
3496 
3497 /* Return 1 if vector type STMT_VINFO is a boolean vector.  */
3498 
3499 static bool
vect_mask_constant_operand_p(stmt_vec_info stmt_vinfo,unsigned op_num)3500 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
3501 {
3502   enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3503   tree op, vectype;
3504   enum vect_def_type dt;
3505 
3506   /* For comparison and COND_EXPR type is chosen depending
3507      on the non-constant other comparison operand.  */
3508   if (TREE_CODE_CLASS (code) == tcc_comparison)
3509     {
3510       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3511       op = gimple_assign_rhs1 (stmt);
3512 
3513       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3514 	gcc_unreachable ();
3515 
3516       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3517     }
3518 
3519   if (code == COND_EXPR)
3520     {
3521       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3522       tree cond = gimple_assign_rhs1 (stmt);
3523 
3524       if (TREE_CODE (cond) == SSA_NAME)
3525 	{
3526 	  if (op_num > 0)
3527 	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3528 	  op = cond;
3529 	}
3530       else
3531 	{
3532 	  if (op_num > 1)
3533 	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3534 	  op = TREE_OPERAND (cond, 0);
3535 	}
3536 
3537       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3538 	gcc_unreachable ();
3539 
3540       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3541     }
3542 
3543   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3544 }
3545 
3546 /* Build a variable-length vector in which the elements in ELTS are repeated
3547    to a fill NRESULTS vectors of type VECTOR_TYPE.  Store the vectors in
3548    RESULTS and add any new instructions to SEQ.
3549 
3550    The approach we use is:
3551 
3552    (1) Find a vector mode VM with integer elements of mode IM.
3553 
3554    (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3555        ELTS' has mode IM.  This involves creating NELTS' VIEW_CONVERT_EXPRs
3556        from small vectors to IM.
3557 
3558    (3) Duplicate each ELTS'[I] into a vector of mode VM.
3559 
3560    (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3561        correct byte contents.
3562 
3563    (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3564 
3565    We try to find the largest IM for which this sequence works, in order
3566    to cut down on the number of interleaves.  */
3567 
3568 void
duplicate_and_interleave(vec_info * vinfo,gimple_seq * seq,tree vector_type,vec<tree> elts,unsigned int nresults,vec<tree> & results)3569 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3570 			  vec<tree> elts, unsigned int nresults,
3571 			  vec<tree> &results)
3572 {
3573   unsigned int nelts = elts.length ();
3574   tree element_type = TREE_TYPE (vector_type);
3575 
3576   /* (1) Find a vector mode VM with integer elements of mode IM.  */
3577   unsigned int nvectors = 1;
3578   tree new_vector_type;
3579   tree permutes[2];
3580   if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3581 				       &nvectors, &new_vector_type,
3582 				       permutes))
3583     gcc_unreachable ();
3584 
3585   /* Get a vector type that holds ELTS[0:NELTS/NELTS'].  */
3586   unsigned int partial_nelts = nelts / nvectors;
3587   tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3588 
3589   tree_vector_builder partial_elts;
3590   auto_vec<tree, 32> pieces (nvectors * 2);
3591   pieces.quick_grow (nvectors * 2);
3592   for (unsigned int i = 0; i < nvectors; ++i)
3593     {
3594       /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3595 	     ELTS' has mode IM.  */
3596       partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3597       for (unsigned int j = 0; j < partial_nelts; ++j)
3598 	partial_elts.quick_push (elts[i * partial_nelts + j]);
3599       tree t = gimple_build_vector (seq, &partial_elts);
3600       t = gimple_build (seq, VIEW_CONVERT_EXPR,
3601 			TREE_TYPE (new_vector_type), t);
3602 
3603       /* (3) Duplicate each ELTS'[I] into a vector of mode VM.  */
3604       pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3605     }
3606 
3607   /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3608 	 correct byte contents.
3609 
3610      We need to repeat the following operation log2(nvectors) times:
3611 
3612 	out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3613 	out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3614 
3615      However, if each input repeats every N elements and the VF is
3616      a multiple of N * 2, the HI result is the same as the LO.  */
3617   unsigned int in_start = 0;
3618   unsigned int out_start = nvectors;
3619   unsigned int hi_start = nvectors / 2;
3620   /* A bound on the number of outputs needed to produce NRESULTS results
3621      in the final iteration.  */
3622   unsigned int noutputs_bound = nvectors * nresults;
3623   for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3624     {
3625       noutputs_bound /= 2;
3626       unsigned int limit = MIN (noutputs_bound, nvectors);
3627       for (unsigned int i = 0; i < limit; ++i)
3628 	{
3629 	  if ((i & 1) != 0
3630 	      && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3631 			     2 * in_repeat))
3632 	    {
3633 	      pieces[out_start + i] = pieces[out_start + i - 1];
3634 	      continue;
3635 	    }
3636 
3637 	  tree output = make_ssa_name (new_vector_type);
3638 	  tree input1 = pieces[in_start + (i / 2)];
3639 	  tree input2 = pieces[in_start + (i / 2) + hi_start];
3640 	  gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3641 					       input1, input2,
3642 					       permutes[i & 1]);
3643 	  gimple_seq_add_stmt (seq, stmt);
3644 	  pieces[out_start + i] = output;
3645 	}
3646       std::swap (in_start, out_start);
3647     }
3648 
3649   /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type.  */
3650   results.reserve (nresults);
3651   for (unsigned int i = 0; i < nresults; ++i)
3652     if (i < nvectors)
3653       results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3654 					pieces[in_start + i]));
3655     else
3656       results.quick_push (results[i - nvectors]);
3657 }
3658 
3659 
3660 /* For constant and loop invariant defs of SLP_NODE this function returns
3661    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3662    OP_NODE determines the node for the operand containing the scalar
3663    operands.  */
3664 
3665 static void
vect_get_constant_vectors(slp_tree slp_node,unsigned op_num,vec<tree> * vec_oprnds)3666 vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
3667                            vec<tree> *vec_oprnds)
3668 {
3669   slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3670   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3671   vec_info *vinfo = stmt_vinfo->vinfo;
3672   unsigned HOST_WIDE_INT nunits;
3673   tree vec_cst;
3674   unsigned j, number_of_places_left_in_vector;
3675   tree vector_type;
3676   tree vop;
3677   int group_size = op_node->ops.length ();
3678   unsigned int vec_num, i;
3679   unsigned number_of_copies = 1;
3680   bool constant_p;
3681   tree neutral_op = NULL;
3682   gimple_seq ctor_seq = NULL;
3683   auto_vec<tree, 16> permute_results;
3684 
3685   /* ???  SLP analysis should compute the vector type for the
3686      constant / invariant and store it in the SLP node.  */
3687   tree op = op_node->ops[0];
3688   /* Check if vector type is a boolean vector.  */
3689   tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3690   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3691       && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3692     vector_type = truth_type_for (stmt_vectype);
3693   else
3694     vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3695 
3696   /* ???  For lane-reducing ops we should also have the required number
3697      of vector stmts initialized rather than second-guessing here.  */
3698   unsigned int number_of_vectors;
3699   if (is_gimple_assign (stmt_vinfo->stmt)
3700       && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3701 	  || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3702 	  || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3703     number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3704   else
3705     number_of_vectors
3706       = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3707 			      * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3708 			      vector_type);
3709   vec_oprnds->create (number_of_vectors);
3710   auto_vec<tree> voprnds (number_of_vectors);
3711 
3712   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3713      created vectors. It is greater than 1 if unrolling is performed.
3714 
3715      For example, we have two scalar operands, s1 and s2 (e.g., group of
3716      strided accesses of size two), while NUNITS is four (i.e., four scalars
3717      of this type can be packed in a vector).  The output vector will contain
3718      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
3719      will be 2).
3720 
3721      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3722      containing the operands.
3723 
3724      For example, NUNITS is four as before, and the group size is 8
3725      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
3726      {s5, s6, s7, s8}.  */
3727 
3728   /* When using duplicate_and_interleave, we just need one element for
3729      each scalar statement.  */
3730   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3731     nunits = group_size;
3732 
3733   number_of_copies = nunits * number_of_vectors / group_size;
3734 
3735   number_of_places_left_in_vector = nunits;
3736   constant_p = true;
3737   tree_vector_builder elts (vector_type, nunits, 1);
3738   elts.quick_grow (nunits);
3739   bool place_after_defs = false;
3740   for (j = 0; j < number_of_copies; j++)
3741     {
3742       for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3743         {
3744           /* Create 'vect_ = {op0,op1,...,opn}'.  */
3745           number_of_places_left_in_vector--;
3746 	  tree orig_op = op;
3747 	  if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3748 	    {
3749 	      if (CONSTANT_CLASS_P (op))
3750 		{
3751 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3752 		    {
3753 		      /* Can't use VIEW_CONVERT_EXPR for booleans because
3754 			 of possibly different sizes of scalar value and
3755 			 vector element.  */
3756 		      if (integer_zerop (op))
3757 			op = build_int_cst (TREE_TYPE (vector_type), 0);
3758 		      else if (integer_onep (op))
3759 			op = build_all_ones_cst (TREE_TYPE (vector_type));
3760 		      else
3761 			gcc_unreachable ();
3762 		    }
3763 		  else
3764 		    op = fold_unary (VIEW_CONVERT_EXPR,
3765 				     TREE_TYPE (vector_type), op);
3766 		  gcc_assert (op && CONSTANT_CLASS_P (op));
3767 		}
3768 	      else
3769 		{
3770 		  tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3771 		  gimple *init_stmt;
3772 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3773 		    {
3774 		      tree true_val
3775 			= build_all_ones_cst (TREE_TYPE (vector_type));
3776 		      tree false_val
3777 			= build_zero_cst (TREE_TYPE (vector_type));
3778 		      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3779 		      init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3780 						       op, true_val,
3781 						       false_val);
3782 		    }
3783 		  else
3784 		    {
3785 		      op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3786 				   op);
3787 		      init_stmt
3788 			= gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3789 					       op);
3790 		    }
3791 		  gimple_seq_add_stmt (&ctor_seq, init_stmt);
3792 		  op = new_temp;
3793 		}
3794 	    }
3795 	  elts[number_of_places_left_in_vector] = op;
3796 	  if (!CONSTANT_CLASS_P (op))
3797 	    constant_p = false;
3798 	  if (TREE_CODE (orig_op) == SSA_NAME
3799 	      && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3800 	      && STMT_VINFO_BB_VINFO (stmt_vinfo)
3801 	      && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3802 		  == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3803 	    place_after_defs = true;
3804 
3805           if (number_of_places_left_in_vector == 0)
3806             {
3807 	      if (constant_p
3808 		  ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3809 		  : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3810 		vec_cst = gimple_build_vector (&ctor_seq, &elts);
3811 	      else
3812 		{
3813 		  if (permute_results.is_empty ())
3814 		    duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3815 					      elts, number_of_vectors,
3816 					      permute_results);
3817 		  vec_cst = permute_results[number_of_vectors - j - 1];
3818 		}
3819 	      tree init;
3820 	      gimple_stmt_iterator gsi;
3821 	      if (place_after_defs)
3822 		{
3823 		  stmt_vec_info last_stmt_info
3824 		    = vect_find_last_scalar_stmt_in_slp (slp_node);
3825 		  gsi = gsi_for_stmt (last_stmt_info->stmt);
3826 		  init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3827 					   &gsi);
3828 		}
3829 	      else
3830 		init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3831 					 NULL);
3832 	      if (ctor_seq != NULL)
3833 		{
3834 		  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3835 		  gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3836 		  ctor_seq = NULL;
3837 		}
3838 	      voprnds.quick_push (init);
3839 	      place_after_defs = false;
3840               number_of_places_left_in_vector = nunits;
3841 	      constant_p = true;
3842 	      elts.new_vector (vector_type, nunits, 1);
3843 	      elts.quick_grow (nunits);
3844             }
3845         }
3846     }
3847 
3848   /* Since the vectors are created in the reverse order, we should invert
3849      them.  */
3850   vec_num = voprnds.length ();
3851   for (j = vec_num; j != 0; j--)
3852     {
3853       vop = voprnds[j - 1];
3854       vec_oprnds->quick_push (vop);
3855     }
3856 
3857   /* In case that VF is greater than the unrolling factor needed for the SLP
3858      group of stmts, NUMBER_OF_VECTORS to be created is greater than
3859      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3860      to replicate the vectors.  */
3861   while (number_of_vectors > vec_oprnds->length ())
3862     {
3863       tree neutral_vec = NULL;
3864 
3865       if (neutral_op)
3866         {
3867           if (!neutral_vec)
3868 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
3869 
3870           vec_oprnds->quick_push (neutral_vec);
3871         }
3872       else
3873         {
3874           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3875             vec_oprnds->quick_push (vop);
3876         }
3877     }
3878 }
3879 
3880 
3881 /* Get vectorized definitions from SLP_NODE that contains corresponding
3882    vectorized def-stmts.  */
3883 
3884 static void
vect_get_slp_vect_defs(slp_tree slp_node,vec<tree> * vec_oprnds)3885 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3886 {
3887   stmt_vec_info vec_def_stmt_info;
3888   unsigned int i;
3889 
3890   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3891 
3892   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3893     vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3894 }
3895 
3896 
3897 /* Get N vectorized definitions for SLP_NODE.
3898    If the scalar definitions are loop invariants or constants, collect them and
3899    call vect_get_constant_vectors() to create vector stmts.
3900    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3901    must be stored in the corresponding child of SLP_NODE, and we call
3902    vect_get_slp_vect_defs () to retrieve them.  */
3903 
3904 void
vect_get_slp_defs(slp_tree slp_node,vec<vec<tree>> * vec_oprnds,unsigned n)3905 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3906 {
3907   if (n == -1U)
3908     n = SLP_TREE_CHILDREN (slp_node).length ();
3909 
3910   for (unsigned i = 0; i < n; ++i)
3911     {
3912       slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3913 
3914       vec<tree> vec_defs = vNULL;
3915 
3916       /* For each operand we check if it has vectorized definitions in a child
3917 	 node or we need to create them (for invariants and constants).  */
3918       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3919 	{
3920 	  vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3921 	  vect_get_slp_vect_defs (child, &vec_defs);
3922 	}
3923       else
3924 	vect_get_constant_vectors (slp_node, i, &vec_defs);
3925 
3926       vec_oprnds->quick_push (vec_defs);
3927     }
3928 }
3929 
3930 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3931    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3932    permute statements for the SLP node NODE of the SLP instance
3933    SLP_NODE_INSTANCE.  */
3934 
3935 bool
vect_transform_slp_perm_load(slp_tree node,vec<tree> dr_chain,gimple_stmt_iterator * gsi,poly_uint64 vf,slp_instance slp_node_instance,bool analyze_only,unsigned * n_perms)3936 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3937 			      gimple_stmt_iterator *gsi, poly_uint64 vf,
3938 			      slp_instance slp_node_instance, bool analyze_only,
3939 			      unsigned *n_perms)
3940 {
3941   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3942   vec_info *vinfo = stmt_info->vinfo;
3943   int vec_index = 0;
3944   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3945   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3946   unsigned int mask_element;
3947   machine_mode mode;
3948 
3949   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3950     return false;
3951 
3952   stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3953 
3954   mode = TYPE_MODE (vectype);
3955   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3956 
3957   /* Initialize the vect stmts of NODE to properly insert the generated
3958      stmts later.  */
3959   if (! analyze_only)
3960     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3961 	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3962       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3963 
3964   /* Generate permutation masks for every NODE. Number of masks for each NODE
3965      is equal to GROUP_SIZE.
3966      E.g., we have a group of three nodes with three loads from the same
3967      location in each node, and the vector size is 4. I.e., we have a
3968      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3969      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3970      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3971      ...
3972 
3973      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3974      The last mask is illegal since we assume two operands for permute
3975      operation, and the mask element values can't be outside that range.
3976      Hence, the last mask must be converted into {2,5,5,5}.
3977      For the first two permutations we need the first and the second input
3978      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3979      we need the second and the third vectors: {b1,c1,a2,b2} and
3980      {c2,a3,b3,c3}.  */
3981 
3982   int vect_stmts_counter = 0;
3983   unsigned int index = 0;
3984   int first_vec_index = -1;
3985   int second_vec_index = -1;
3986   bool noop_p = true;
3987   *n_perms = 0;
3988 
3989   vec_perm_builder mask;
3990   unsigned int nelts_to_build;
3991   unsigned int nvectors_per_build;
3992   bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3993 		      && multiple_p (nunits, group_size));
3994   if (repeating_p)
3995     {
3996       /* A single vector contains a whole number of copies of the node, so:
3997 	 (a) all permutes can use the same mask; and
3998 	 (b) the permutes only need a single vector input.  */
3999       mask.new_vector (nunits, group_size, 3);
4000       nelts_to_build = mask.encoded_nelts ();
4001       nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
4002     }
4003   else
4004     {
4005       /* We need to construct a separate mask for each vector statement.  */
4006       unsigned HOST_WIDE_INT const_nunits, const_vf;
4007       if (!nunits.is_constant (&const_nunits)
4008 	  || !vf.is_constant (&const_vf))
4009 	return false;
4010       mask.new_vector (const_nunits, const_nunits, 1);
4011       nelts_to_build = const_vf * group_size;
4012       nvectors_per_build = 1;
4013     }
4014 
4015   unsigned int count = mask.encoded_nelts ();
4016   mask.quick_grow (count);
4017   vec_perm_indices indices;
4018 
4019   for (unsigned int j = 0; j < nelts_to_build; j++)
4020     {
4021       unsigned int iter_num = j / group_size;
4022       unsigned int stmt_num = j % group_size;
4023       unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
4024 			+ SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
4025       if (repeating_p)
4026 	{
4027 	  first_vec_index = 0;
4028 	  mask_element = i;
4029 	}
4030       else
4031 	{
4032 	  /* Enforced before the loop when !repeating_p.  */
4033 	  unsigned int const_nunits = nunits.to_constant ();
4034 	  vec_index = i / const_nunits;
4035 	  mask_element = i % const_nunits;
4036 	  if (vec_index == first_vec_index
4037 	      || first_vec_index == -1)
4038 	    {
4039 	      first_vec_index = vec_index;
4040 	    }
4041 	  else if (vec_index == second_vec_index
4042 		   || second_vec_index == -1)
4043 	    {
4044 	      second_vec_index = vec_index;
4045 	      mask_element += const_nunits;
4046 	    }
4047 	  else
4048 	    {
4049 	      if (dump_enabled_p ())
4050 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4051 				 "permutation requires at "
4052 				 "least three vectors %G",
4053 				 stmt_info->stmt);
4054 	      gcc_assert (analyze_only);
4055 	      return false;
4056 	    }
4057 
4058 	  gcc_assert (mask_element < 2 * const_nunits);
4059 	}
4060 
4061       if (mask_element != index)
4062 	noop_p = false;
4063       mask[index++] = mask_element;
4064 
4065       if (index == count && !noop_p)
4066 	{
4067 	  indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4068 	  if (!can_vec_perm_const_p (mode, indices))
4069 	    {
4070 	      if (dump_enabled_p ())
4071 		{
4072 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4073 				   vect_location,
4074 				   "unsupported vect permute { ");
4075 		  for (i = 0; i < count; ++i)
4076 		    {
4077 		      dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4078 		      dump_printf (MSG_MISSED_OPTIMIZATION, " ");
4079 		    }
4080 		  dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
4081 		}
4082 	      gcc_assert (analyze_only);
4083 	      return false;
4084 	    }
4085 
4086 	  ++*n_perms;
4087 	}
4088 
4089       if (index == count)
4090 	{
4091 	  if (!analyze_only)
4092 	    {
4093 	      tree mask_vec = NULL_TREE;
4094 
4095 	      if (! noop_p)
4096 		mask_vec = vect_gen_perm_mask_checked (vectype, indices);
4097 
4098 	      if (second_vec_index == -1)
4099 		second_vec_index = first_vec_index;
4100 
4101 	      for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4102 		{
4103 		  /* Generate the permute statement if necessary.  */
4104 		  tree first_vec = dr_chain[first_vec_index + ri];
4105 		  tree second_vec = dr_chain[second_vec_index + ri];
4106 		  stmt_vec_info perm_stmt_info;
4107 		  if (! noop_p)
4108 		    {
4109 		      gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4110 		      tree perm_dest
4111 			= vect_create_destination_var (gimple_assign_lhs (stmt),
4112 						       vectype);
4113 		      perm_dest = make_ssa_name (perm_dest);
4114 		      gassign *perm_stmt
4115 			= gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4116 					       first_vec, second_vec,
4117 					       mask_vec);
4118 		      perm_stmt_info
4119 			= vect_finish_stmt_generation (stmt_info, perm_stmt,
4120 						       gsi);
4121 		    }
4122 		  else
4123 		    /* If mask was NULL_TREE generate the requested
4124 		       identity transform.  */
4125 		    perm_stmt_info = vinfo->lookup_def (first_vec);
4126 
4127 		  /* Store the vector statement in NODE.  */
4128 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4129 		    = perm_stmt_info;
4130 		}
4131 	    }
4132 
4133 	  index = 0;
4134 	  first_vec_index = -1;
4135 	  second_vec_index = -1;
4136 	  noop_p = true;
4137 	}
4138     }
4139 
4140   return true;
4141 }
4142 
4143 /* Vectorize SLP instance tree in postorder.  */
4144 
4145 static void
vect_schedule_slp_instance(slp_tree node,slp_instance instance)4146 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
4147 {
4148   gimple_stmt_iterator si;
4149   stmt_vec_info stmt_info;
4150   unsigned int group_size;
4151   tree vectype;
4152   int i, j;
4153   slp_tree child;
4154 
4155   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4156     return;
4157 
4158   /* See if we have already vectorized the node in the graph of the
4159      SLP instance.  */
4160   if (SLP_TREE_VEC_STMTS (node).exists ())
4161     return;
4162 
4163   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4164     vect_schedule_slp_instance (child, instance);
4165 
4166   /* Push SLP node def-type to stmts.  */
4167   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4168     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4169       {
4170 	stmt_vec_info child_stmt_info;
4171 	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4172 	  STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4173       }
4174 
4175   stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4176 
4177   /* VECTYPE is the type of the destination.  */
4178   vectype = STMT_VINFO_VECTYPE (stmt_info);
4179   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4180   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4181 
4182   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4183   SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4184 
4185   if (dump_enabled_p ())
4186     dump_printf_loc (MSG_NOTE, vect_location,
4187 		     "------>vectorizing SLP node starting from: %G",
4188 		     stmt_info->stmt);
4189 
4190   /* Vectorized stmts go before the last scalar stmt which is where
4191      all uses are ready.  */
4192   stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4193   si = gsi_for_stmt (last_stmt_info->stmt);
4194 
4195   /* Handle two-operation SLP nodes by vectorizing the group with
4196      both operations and then performing a merge.  */
4197   bool done_p = false;
4198   if (SLP_TREE_TWO_OPERATORS (node))
4199     {
4200       gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4201       enum tree_code code0 = gimple_assign_rhs_code (stmt);
4202       enum tree_code ocode = ERROR_MARK;
4203       stmt_vec_info ostmt_info;
4204       vec_perm_builder mask (group_size, group_size, 1);
4205       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4206 	{
4207 	  gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4208 	  if (gimple_assign_rhs_code (ostmt) != code0)
4209 	    {
4210 	      mask.quick_push (1);
4211 	      ocode = gimple_assign_rhs_code (ostmt);
4212 	    }
4213 	  else
4214 	    mask.quick_push (0);
4215 	}
4216       if (ocode != ERROR_MARK)
4217 	{
4218 	  vec<stmt_vec_info> v0;
4219 	  vec<stmt_vec_info> v1;
4220 	  unsigned j;
4221 	  tree tmask = NULL_TREE;
4222 	  vect_transform_stmt (stmt_info, &si, node, instance);
4223 	  v0 = SLP_TREE_VEC_STMTS (node).copy ();
4224 	  SLP_TREE_VEC_STMTS (node).truncate (0);
4225 	  gimple_assign_set_rhs_code (stmt, ocode);
4226 	  vect_transform_stmt (stmt_info, &si, node, instance);
4227 	  gimple_assign_set_rhs_code (stmt, code0);
4228 	  v1 = SLP_TREE_VEC_STMTS (node).copy ();
4229 	  SLP_TREE_VEC_STMTS (node).truncate (0);
4230 	  tree meltype = build_nonstandard_integer_type
4231 	      (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4232 	  tree mvectype = get_same_sized_vectype (meltype, vectype);
4233 	  unsigned k = 0, l;
4234 	  for (j = 0; j < v0.length (); ++j)
4235 	    {
4236 	      /* Enforced by vect_build_slp_tree, which rejects variable-length
4237 		 vectors for SLP_TREE_TWO_OPERATORS.  */
4238 	      unsigned int const_nunits = nunits.to_constant ();
4239 	      tree_vector_builder melts (mvectype, const_nunits, 1);
4240 	      for (l = 0; l < const_nunits; ++l)
4241 		{
4242 		  if (k >= group_size)
4243 		    k = 0;
4244 		  tree t = build_int_cst (meltype,
4245 					  mask[k++] * const_nunits + l);
4246 		  melts.quick_push (t);
4247 		}
4248 	      tmask = melts.build ();
4249 
4250 	      /* ???  Not all targets support a VEC_PERM_EXPR with a
4251 	         constant mask that would translate to a vec_merge RTX
4252 		 (with their vec_perm_const_ok).  We can either not
4253 		 vectorize in that case or let veclower do its job.
4254 		 Unfortunately that isn't too great and at least for
4255 		 plus/minus we'd eventually like to match targets
4256 		 vector addsub instructions.  */
4257 	      gimple *vstmt;
4258 	      vstmt = gimple_build_assign (make_ssa_name (vectype),
4259 					   VEC_PERM_EXPR,
4260 					   gimple_assign_lhs (v0[j]->stmt),
4261 					   gimple_assign_lhs (v1[j]->stmt),
4262 					   tmask);
4263 	      SLP_TREE_VEC_STMTS (node).quick_push
4264 		(vect_finish_stmt_generation (stmt_info, vstmt, &si));
4265 	    }
4266 	  v0.release ();
4267 	  v1.release ();
4268 	  done_p = true;
4269 	}
4270     }
4271   if (!done_p)
4272     vect_transform_stmt (stmt_info, &si, node, instance);
4273 
4274   /* Restore stmt def-types.  */
4275   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4276     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4277       {
4278 	stmt_vec_info child_stmt_info;
4279 	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4280 	  STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4281       }
4282 }
4283 
4284 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4285    For loop vectorization this is done in vectorizable_call, but for SLP
4286    it needs to be deferred until end of vect_schedule_slp, because multiple
4287    SLP instances may refer to the same scalar stmt.  */
4288 
4289 static void
vect_remove_slp_scalar_calls(slp_tree node,hash_set<slp_tree> & visited)4290 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4291 {
4292   gimple *new_stmt;
4293   gimple_stmt_iterator gsi;
4294   int i;
4295   slp_tree child;
4296   tree lhs;
4297   stmt_vec_info stmt_info;
4298 
4299   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4300     return;
4301 
4302   if (visited.add (node))
4303     return;
4304 
4305   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4306     vect_remove_slp_scalar_calls (child, visited);
4307 
4308   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4309     {
4310       gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4311       if (!stmt || gimple_bb (stmt) == NULL)
4312 	continue;
4313       if (is_pattern_stmt_p (stmt_info)
4314 	  || !PURE_SLP_STMT (stmt_info))
4315 	continue;
4316       lhs = gimple_call_lhs (stmt);
4317       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4318       gsi = gsi_for_stmt (stmt);
4319       stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4320       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4321     }
4322 }
4323 
4324 static void
vect_remove_slp_scalar_calls(slp_tree node)4325 vect_remove_slp_scalar_calls (slp_tree node)
4326 {
4327   hash_set<slp_tree> visited;
4328   vect_remove_slp_scalar_calls (node, visited);
4329 }
4330 
4331 /* Vectorize the instance root.  */
4332 
4333 void
vectorize_slp_instance_root_stmt(slp_tree node,slp_instance instance)4334 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4335 {
4336   gassign *rstmt = NULL;
4337 
4338   if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4339     {
4340       stmt_vec_info child_stmt_info;
4341       int j;
4342 
4343       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4344 	{
4345 	  tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4346 	  tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4347 	  if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4348 					  TREE_TYPE (vect_lhs)))
4349 	    vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4350 			       vect_lhs);
4351 	  rstmt = gimple_build_assign (root_lhs, vect_lhs);
4352 	  break;
4353 	}
4354     }
4355   else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4356     {
4357       int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4358       stmt_vec_info child_stmt_info;
4359       int j;
4360       vec<constructor_elt, va_gc> *v;
4361       vec_alloc (v, nelts);
4362 
4363       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4364 	{
4365 	  CONSTRUCTOR_APPEND_ELT (v,
4366 				  NULL_TREE,
4367 				  gimple_get_lhs (child_stmt_info->stmt));
4368 	}
4369       tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4370       tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4371       tree r_constructor = build_constructor (rtype, v);
4372       rstmt = gimple_build_assign (lhs, r_constructor);
4373     }
4374 
4375     gcc_assert (rstmt);
4376 
4377     gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4378     gsi_replace (&rgsi, rstmt, true);
4379 }
4380 
4381 /* Generate vector code for all SLP instances in the loop/basic block.  */
4382 
4383 void
vect_schedule_slp(vec_info * vinfo)4384 vect_schedule_slp (vec_info *vinfo)
4385 {
4386   vec<slp_instance> slp_instances;
4387   slp_instance instance;
4388   unsigned int i;
4389 
4390   slp_instances = vinfo->slp_instances;
4391   FOR_EACH_VEC_ELT (slp_instances, i, instance)
4392     {
4393       slp_tree node = SLP_INSTANCE_TREE (instance);
4394       /* Schedule the tree of INSTANCE.  */
4395       vect_schedule_slp_instance (node, instance);
4396 
4397       if (SLP_INSTANCE_ROOT_STMT (instance))
4398 	vectorize_slp_instance_root_stmt (node, instance);
4399 
4400       if (dump_enabled_p ())
4401 	dump_printf_loc (MSG_NOTE, vect_location,
4402                          "vectorizing stmts using SLP.\n");
4403     }
4404 
4405   FOR_EACH_VEC_ELT (slp_instances, i, instance)
4406     {
4407       slp_tree root = SLP_INSTANCE_TREE (instance);
4408       stmt_vec_info store_info;
4409       unsigned int j;
4410 
4411       /* Remove scalar call stmts.  Do not do this for basic-block
4412 	 vectorization as not all uses may be vectorized.
4413 	 ???  Why should this be necessary?  DCE should be able to
4414 	 remove the stmts itself.
4415 	 ???  For BB vectorization we can as well remove scalar
4416 	 stmts starting from the SLP tree root if they have no
4417 	 uses.  */
4418       if (is_a <loop_vec_info> (vinfo))
4419 	vect_remove_slp_scalar_calls (root);
4420 
4421       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4422                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4423         {
4424 	  if (!STMT_VINFO_DATA_REF (store_info))
4425 	    break;
4426 
4427 	  if (SLP_INSTANCE_ROOT_STMT (instance))
4428 	    continue;
4429 
4430 	  store_info = vect_orig_stmt (store_info);
4431 	  /* Free the attached stmt_vec_info and remove the stmt.  */
4432 	  vinfo->remove_stmt (store_info);
4433         }
4434     }
4435 }
4436