1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2016 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"		/* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 
45 
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
47 
48 static void
vect_free_slp_tree(slp_tree node)49 vect_free_slp_tree (slp_tree node)
50 {
51   int i;
52   slp_tree child;
53 
54   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55     vect_free_slp_tree (child);
56 
57   gimple *stmt;
58   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59     /* After transform some stmts are removed and thus their vinfo is gone.  */
60     if (vinfo_for_stmt (stmt))
61       {
62 	gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 	STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64       }
65 
66   SLP_TREE_CHILDREN (node).release ();
67   SLP_TREE_SCALAR_STMTS (node).release ();
68   SLP_TREE_VEC_STMTS (node).release ();
69   SLP_TREE_LOAD_PERMUTATION (node).release ();
70 
71   free (node);
72 }
73 
74 
75 /* Free the memory allocated for the SLP instance.  */
76 
77 void
vect_free_slp_instance(slp_instance instance)78 vect_free_slp_instance (slp_instance instance)
79 {
80   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81   SLP_INSTANCE_LOADS (instance).release ();
82   free (instance);
83 }
84 
85 
86 /* Create an SLP node for SCALAR_STMTS.  */
87 
88 static slp_tree
vect_create_new_slp_node(vec<gimple * > scalar_stmts)89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
90 {
91   slp_tree node;
92   gimple *stmt = scalar_stmts[0];
93   unsigned int nops;
94 
95   if (is_gimple_call (stmt))
96     nops = gimple_call_num_args (stmt);
97   else if (is_gimple_assign (stmt))
98     {
99       nops = gimple_num_ops (stmt) - 1;
100       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 	nops++;
102     }
103   else
104     return NULL;
105 
106   node = XNEW (struct _slp_tree);
107   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
108   SLP_TREE_VEC_STMTS (node).create (0);
109   SLP_TREE_CHILDREN (node).create (nops);
110   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
111   SLP_TREE_TWO_OPERATORS (node) = false;
112   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
113 
114   unsigned i;
115   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
116     STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
117 
118   return node;
119 }
120 
121 
122 /* This structure is used in creation of an SLP tree.  Each instance
123    corresponds to the same operand in a group of scalar stmts in an SLP
124    node.  */
125 typedef struct _slp_oprnd_info
126 {
127   /* Def-stmts for the operands.  */
128   vec<gimple *> def_stmts;
129   /* Information about the first statement, its vector def-type, type, the
130      operand itself in case it's constant, and an indication if it's a pattern
131      stmt.  */
132   enum vect_def_type first_dt;
133   tree first_op_type;
134   bool first_pattern;
135   bool second_pattern;
136 } *slp_oprnd_info;
137 
138 
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140    operand.  */
141 static vec<slp_oprnd_info>
vect_create_oprnd_info(int nops,int group_size)142 vect_create_oprnd_info (int nops, int group_size)
143 {
144   int i;
145   slp_oprnd_info oprnd_info;
146   vec<slp_oprnd_info> oprnds_info;
147 
148   oprnds_info.create (nops);
149   for (i = 0; i < nops; i++)
150     {
151       oprnd_info = XNEW (struct _slp_oprnd_info);
152       oprnd_info->def_stmts.create (group_size);
153       oprnd_info->first_dt = vect_uninitialized_def;
154       oprnd_info->first_op_type = NULL_TREE;
155       oprnd_info->first_pattern = false;
156       oprnd_info->second_pattern = false;
157       oprnds_info.quick_push (oprnd_info);
158     }
159 
160   return oprnds_info;
161 }
162 
163 
164 /* Free operands info.  */
165 
166 static void
vect_free_oprnd_info(vec<slp_oprnd_info> & oprnds_info)167 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
168 {
169   int i;
170   slp_oprnd_info oprnd_info;
171 
172   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
173     {
174       oprnd_info->def_stmts.release ();
175       XDELETE (oprnd_info);
176     }
177 
178   oprnds_info.release ();
179 }
180 
181 
182 /* Find the place of the data-ref in STMT in the interleaving chain that starts
183    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
184 
185 static int
vect_get_place_in_interleaving_chain(gimple * stmt,gimple * first_stmt)186 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
187 {
188   gimple *next_stmt = first_stmt;
189   int result = 0;
190 
191   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
192     return -1;
193 
194   do
195     {
196       if (next_stmt == stmt)
197 	return result;
198       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199       if (next_stmt)
200 	result += GROUP_GAP (vinfo_for_stmt (next_stmt));
201     }
202   while (next_stmt);
203 
204   return -1;
205 }
206 
207 
208 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
209    they are of a valid type and that they match the defs of the first stmt of
210    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
211    return -1, if the error could be corrected by swapping operands of the
212    operation return 1, if everything is ok return 0.  */
213 
214 static int
vect_get_and_check_slp_defs(vec_info * vinfo,gimple * stmt,unsigned stmt_num,vec<slp_oprnd_info> * oprnds_info)215 vect_get_and_check_slp_defs (vec_info *vinfo,
216 			     gimple *stmt, unsigned stmt_num,
217                              vec<slp_oprnd_info> *oprnds_info)
218 {
219   tree oprnd;
220   unsigned int i, number_of_oprnds;
221   gimple *def_stmt;
222   enum vect_def_type dt = vect_uninitialized_def;
223   bool pattern = false;
224   slp_oprnd_info oprnd_info;
225   int first_op_idx = 1;
226   bool commutative = false;
227   bool first_op_cond = false;
228   bool first = stmt_num == 0;
229   bool second = stmt_num == 1;
230 
231   if (is_gimple_call (stmt))
232     {
233       number_of_oprnds = gimple_call_num_args (stmt);
234       first_op_idx = 3;
235     }
236   else if (is_gimple_assign (stmt))
237     {
238       enum tree_code code = gimple_assign_rhs_code (stmt);
239       number_of_oprnds = gimple_num_ops (stmt) - 1;
240       if (gimple_assign_rhs_code (stmt) == COND_EXPR
241 	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
242 	{
243 	  first_op_cond = true;
244 	  commutative = true;
245 	  number_of_oprnds++;
246 	}
247       else
248 	commutative = commutative_tree_code (code);
249     }
250   else
251     return -1;
252 
253   bool swapped = false;
254   for (i = 0; i < number_of_oprnds; i++)
255     {
256 again:
257       if (first_op_cond)
258 	{
259 	  if (i == 0 || i == 1)
260 	    oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261 				  swapped ? !i : i);
262 	  else
263 	    oprnd = gimple_op (stmt, first_op_idx + i - 1);
264 	}
265       else
266         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
267 
268       oprnd_info = (*oprnds_info)[i];
269 
270       if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
271 	{
272 	  if (dump_enabled_p ())
273 	    {
274 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
275 			       "Build SLP failed: can't analyze def for ");
276 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
277               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
278 	    }
279 
280 	  return -1;
281 	}
282 
283       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
284          from the pattern.  Check that all the stmts of the node are in the
285          pattern.  */
286       if (def_stmt && gimple_bb (def_stmt)
287           && vect_stmt_in_region_p (vinfo, def_stmt)
288           && vinfo_for_stmt (def_stmt)
289           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
290 	  && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
291 	  && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
292         {
293           pattern = true;
294           if (!first && !oprnd_info->first_pattern
295 	      /* Allow different pattern state for the defs of the
296 		 first stmt in reduction chains.  */
297 	      && (oprnd_info->first_dt != vect_reduction_def
298 		  || (!second && !oprnd_info->second_pattern)))
299 	    {
300 	      if (i == 0
301 		  && !swapped
302 		  && commutative)
303 		{
304 		  swapped = true;
305 		  goto again;
306 		}
307 
308 	      if (dump_enabled_p ())
309 		{
310 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311 				   "Build SLP failed: some of the stmts"
312 				   " are in a pattern, and others are not ");
313 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
314                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
315 		}
316 
317 	      return 1;
318             }
319 
320           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
321           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
322 
323           if (dt == vect_unknown_def_type)
324             {
325               if (dump_enabled_p ())
326                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 				 "Unsupported pattern.\n");
328               return -1;
329             }
330 
331           switch (gimple_code (def_stmt))
332             {
333             case GIMPLE_PHI:
334             case GIMPLE_ASSIGN:
335 	      break;
336 
337 	    default:
338 	      if (dump_enabled_p ())
339 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 				 "unsupported defining stmt:\n");
341 	      return -1;
342             }
343         }
344 
345       if (second)
346 	oprnd_info->second_pattern = pattern;
347 
348       if (first)
349 	{
350 	  oprnd_info->first_dt = dt;
351 	  oprnd_info->first_pattern = pattern;
352 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
353 	}
354       else
355 	{
356 	  /* Not first stmt of the group, check that the def-stmt/s match
357 	     the def-stmt/s of the first stmt.  Allow different definition
358 	     types for reduction chains: the first stmt must be a
359 	     vect_reduction_def (a phi node), and the rest
360 	     vect_internal_def.  */
361 	  if (((oprnd_info->first_dt != dt
362                 && !(oprnd_info->first_dt == vect_reduction_def
363                      && dt == vect_internal_def)
364 		&& !((oprnd_info->first_dt == vect_external_def
365 		      || oprnd_info->first_dt == vect_constant_def)
366 		     && (dt == vect_external_def
367 			 || dt == vect_constant_def)))
368                || !types_compatible_p (oprnd_info->first_op_type,
369 				       TREE_TYPE (oprnd))))
370 	    {
371 	      /* Try swapping operands if we got a mismatch.  */
372 	      if (i == 0
373 		  && !swapped
374 		  && commutative)
375 		{
376 		  swapped = true;
377 		  goto again;
378 		}
379 
380 	      if (dump_enabled_p ())
381 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
382 				 "Build SLP failed: different types\n");
383 
384 	      return 1;
385 	    }
386 	}
387 
388       /* Check the types of the definitions.  */
389       switch (dt)
390 	{
391 	case vect_constant_def:
392 	case vect_external_def:
393         case vect_reduction_def:
394 	  break;
395 
396 	case vect_internal_def:
397 	  oprnd_info->def_stmts.quick_push (def_stmt);
398 	  break;
399 
400 	default:
401 	  /* FORNOW: Not supported.  */
402 	  if (dump_enabled_p ())
403 	    {
404 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 			       "Build SLP failed: illegal type of def ");
406 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
407               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
408 	    }
409 
410 	  return -1;
411 	}
412     }
413 
414   /* Swap operands.  */
415   if (swapped)
416     {
417       /* If there are already uses of this stmt in a SLP instance then
418          we've committed to the operand order and can't swap it.  */
419       if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
420 	{
421 	  if (dump_enabled_p ())
422 	    {
423 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424 			       "Build SLP failed: cannot swap operands of "
425 			       "shared stmt ");
426 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
427 	    }
428 	  return -1;
429 	}
430 
431       if (first_op_cond)
432 	{
433 	  tree cond = gimple_assign_rhs1 (stmt);
434 	  swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
435 			     &TREE_OPERAND (cond, 1));
436 	  TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
437 	}
438       else
439 	swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
440 			   gimple_assign_rhs2_ptr (stmt));
441       if (dump_enabled_p ())
442 	{
443 	  dump_printf_loc (MSG_NOTE, vect_location,
444 			   "swapped operands to match def types in ");
445 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
446 	}
447     }
448 
449   return 0;
450 }
451 
452 
453 /* Verify if the scalar stmts STMTS are isomorphic, require data
454    permutation or are of unsupported types of operation.  Return
455    true if they are, otherwise return false and indicate in *MATCHES
456    which stmts are not isomorphic to the first one.  If MATCHES[0]
457    is false then this indicates the comparison could not be
458    carried out or the stmts will never be vectorized by SLP.  */
459 
460 static bool
vect_build_slp_tree_1(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,unsigned nops,unsigned int * max_nunits,bool * matches,bool * two_operators)461 vect_build_slp_tree_1 (vec_info *vinfo,
462 		       vec<gimple *> stmts, unsigned int group_size,
463 		       unsigned nops, unsigned int *max_nunits,
464 		       bool *matches, bool *two_operators)
465 {
466   unsigned int i;
467   gimple *first_stmt = stmts[0], *stmt = stmts[0];
468   enum tree_code first_stmt_code = ERROR_MARK;
469   enum tree_code alt_stmt_code = ERROR_MARK;
470   enum tree_code rhs_code = ERROR_MARK;
471   enum tree_code first_cond_code = ERROR_MARK;
472   tree lhs;
473   bool need_same_oprnds = false;
474   tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
475   optab optab;
476   int icode;
477   machine_mode optab_op2_mode;
478   machine_mode vec_mode;
479   HOST_WIDE_INT dummy;
480   gimple *first_load = NULL, *prev_first_load = NULL;
481 
482   /* For every stmt in NODE find its def stmt/s.  */
483   FOR_EACH_VEC_ELT (stmts, i, stmt)
484     {
485       matches[i] = false;
486 
487       if (dump_enabled_p ())
488 	{
489 	  dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
490 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
491 	}
492 
493       /* Fail to vectorize statements marked as unvectorizable.  */
494       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
495         {
496           if (dump_enabled_p ())
497             {
498               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 			       "Build SLP failed: unvectorizable statement ");
500               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
502             }
503 	  /* Fatal mismatch.  */
504 	  matches[0] = false;
505           return false;
506         }
507 
508       lhs = gimple_get_lhs (stmt);
509       if (lhs == NULL_TREE)
510 	{
511 	  if (dump_enabled_p ())
512 	    {
513 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 			       "Build SLP failed: not GIMPLE_ASSIGN nor "
515 			       "GIMPLE_CALL ");
516 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
518 	    }
519 	  /* Fatal mismatch.  */
520 	  matches[0] = false;
521 	  return false;
522 	}
523 
524       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
525       vectype = get_vectype_for_scalar_type (scalar_type);
526       if (!vectype)
527         {
528           if (dump_enabled_p ())
529             {
530               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 			       "Build SLP failed: unsupported data-type ");
532               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
533 				 scalar_type);
534               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
535             }
536 	  /* Fatal mismatch.  */
537 	  matches[0] = false;
538           return false;
539         }
540 
541       /* If populating the vector type requires unrolling then fail
542          before adjusting *max_nunits for basic-block vectorization.  */
543       if (is_a <bb_vec_info> (vinfo)
544 	  && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
545 	{
546 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547 			   "Build SLP failed: unrolling required "
548 			   "in basic block SLP\n");
549 	  /* Fatal mismatch.  */
550 	  matches[0] = false;
551 	  return false;
552 	}
553 
554       /* In case of multiple types we need to detect the smallest type.  */
555       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
556 	*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
557 
558       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
559 	{
560 	  rhs_code = CALL_EXPR;
561 	  if (gimple_call_internal_p (call_stmt)
562 	      || gimple_call_tail_p (call_stmt)
563 	      || gimple_call_noreturn_p (call_stmt)
564 	      || !gimple_call_nothrow_p (call_stmt)
565 	      || gimple_call_chain (call_stmt))
566 	    {
567 	      if (dump_enabled_p ())
568 		{
569 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570 				   "Build SLP failed: unsupported call type ");
571 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
572 				    call_stmt, 0);
573                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
574 		}
575 	      /* Fatal mismatch.  */
576 	      matches[0] = false;
577 	      return false;
578 	    }
579 	}
580       else
581 	rhs_code = gimple_assign_rhs_code (stmt);
582 
583       /* Check the operation.  */
584       if (i == 0)
585 	{
586 	  first_stmt_code = rhs_code;
587 
588 	  /* Shift arguments should be equal in all the packed stmts for a
589 	     vector shift with scalar shift operand.  */
590 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
591 	      || rhs_code == LROTATE_EXPR
592 	      || rhs_code == RROTATE_EXPR)
593 	    {
594 	      vec_mode = TYPE_MODE (vectype);
595 
596 	      /* First see if we have a vector/vector shift.  */
597 	      optab = optab_for_tree_code (rhs_code, vectype,
598 					   optab_vector);
599 
600 	      if (!optab
601 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
602 		{
603 		  /* No vector/vector shift, try for a vector/scalar shift.  */
604 		  optab = optab_for_tree_code (rhs_code, vectype,
605 					       optab_scalar);
606 
607 		  if (!optab)
608 		    {
609 		      if (dump_enabled_p ())
610 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 					 "Build SLP failed: no optab.\n");
612 		      /* Fatal mismatch.  */
613 		      matches[0] = false;
614 		      return false;
615 		    }
616 		  icode = (int) optab_handler (optab, vec_mode);
617 		  if (icode == CODE_FOR_nothing)
618 		    {
619 		      if (dump_enabled_p ())
620 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
621 					 "Build SLP failed: "
622 					 "op not supported by target.\n");
623 		      /* Fatal mismatch.  */
624 		      matches[0] = false;
625 		      return false;
626 		    }
627 		  optab_op2_mode = insn_data[icode].operand[2].mode;
628 		  if (!VECTOR_MODE_P (optab_op2_mode))
629 		    {
630 		      need_same_oprnds = true;
631 		      first_op1 = gimple_assign_rhs2 (stmt);
632 		    }
633 		}
634 	    }
635 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
636             {
637               need_same_oprnds = true;
638               first_op1 = gimple_assign_rhs2 (stmt);
639             }
640 	}
641       else
642 	{
643 	  if (first_stmt_code != rhs_code
644 	      && alt_stmt_code == ERROR_MARK)
645 	    alt_stmt_code = rhs_code;
646 	  if (first_stmt_code != rhs_code
647 	      && (first_stmt_code != IMAGPART_EXPR
648 		  || rhs_code != REALPART_EXPR)
649 	      && (first_stmt_code != REALPART_EXPR
650 		  || rhs_code != IMAGPART_EXPR)
651 	      /* Handle mismatches in plus/minus by computing both
652 		 and merging the results.  */
653 	      && !((first_stmt_code == PLUS_EXPR
654 		    || first_stmt_code == MINUS_EXPR)
655 		   && (alt_stmt_code == PLUS_EXPR
656 		       || alt_stmt_code == MINUS_EXPR)
657 		   && rhs_code == alt_stmt_code)
658               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
659                    && (first_stmt_code == ARRAY_REF
660                        || first_stmt_code == BIT_FIELD_REF
661                        || first_stmt_code == INDIRECT_REF
662                        || first_stmt_code == COMPONENT_REF
663                        || first_stmt_code == MEM_REF)))
664 	    {
665 	      if (dump_enabled_p ())
666 		{
667 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
668 				   "Build SLP failed: different operation "
669 				   "in stmt ");
670 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
671 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
672 				   "original stmt ");
673 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
674 				    first_stmt, 0);
675 		}
676 	      /* Mismatch.  */
677 	      continue;
678 	    }
679 
680 	  if (need_same_oprnds
681 	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
682 	    {
683 	      if (dump_enabled_p ())
684 		{
685 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
686 				   "Build SLP failed: different shift "
687 				   "arguments in ");
688 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
689                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
690 		}
691 	      /* Mismatch.  */
692 	      continue;
693 	    }
694 
695 	  if (rhs_code == CALL_EXPR)
696 	    {
697 	      gimple *first_stmt = stmts[0];
698 	      if (gimple_call_num_args (stmt) != nops
699 		  || !operand_equal_p (gimple_call_fn (first_stmt),
700 				       gimple_call_fn (stmt), 0)
701 		  || gimple_call_fntype (first_stmt)
702 		     != gimple_call_fntype (stmt))
703 		{
704 		  if (dump_enabled_p ())
705 		    {
706 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
707 				       "Build SLP failed: different calls in ");
708 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709 					stmt, 0);
710                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
711 		    }
712 		  /* Mismatch.  */
713 		  continue;
714 		}
715 	    }
716 	}
717 
718       /* Grouped store or load.  */
719       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
720 	{
721 	  if (REFERENCE_CLASS_P (lhs))
722 	    {
723 	      /* Store.  */
724 	      ;
725 	    }
726 	  else
727 	    {
728 	      /* Load.  */
729               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
730               if (prev_first_load)
731                 {
732                   /* Check that there are no loads from different interleaving
733                      chains in the same node.  */
734                   if (prev_first_load != first_load)
735                     {
736                       if (dump_enabled_p ())
737                         {
738                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
739 					   vect_location,
740 					   "Build SLP failed: different "
741 					   "interleaving chains in one node ");
742                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
743 					    stmt, 0);
744                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
745                         }
746 		      /* Mismatch.  */
747 		      continue;
748                     }
749                 }
750               else
751                 prev_first_load = first_load;
752            }
753         } /* Grouped access.  */
754       else
755 	{
756 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
757 	    {
758 	      /* Not grouped load.  */
759 	      if (dump_enabled_p ())
760 		{
761 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 				   "Build SLP failed: not grouped load ");
763 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
764                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
765 		}
766 
767 	      /* FORNOW: Not grouped loads are not supported.  */
768 	      /* Fatal mismatch.  */
769 	      matches[0] = false;
770 	      return false;
771 	    }
772 
773 	  /* Not memory operation.  */
774 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
775 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
776 	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
777 	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
778 	      && rhs_code != CALL_EXPR)
779 	    {
780 	      if (dump_enabled_p ())
781 		{
782 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783 				   "Build SLP failed: operation");
784 		  dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
785 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
786                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
787 		}
788 	      /* Fatal mismatch.  */
789 	      matches[0] = false;
790 	      return false;
791 	    }
792 
793           if (rhs_code == COND_EXPR)
794             {
795               tree cond_expr = gimple_assign_rhs1 (stmt);
796 
797 	      if (i == 0)
798 		first_cond_code = TREE_CODE (cond_expr);
799               else if (first_cond_code != TREE_CODE (cond_expr))
800                 {
801                   if (dump_enabled_p ())
802                     {
803                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804 				       "Build SLP failed: different"
805 				       " operation");
806                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
807 					stmt, 0);
808                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
809                     }
810 		  /* Mismatch.  */
811 		  continue;
812 		}
813             }
814 	}
815 
816       matches[i] = true;
817     }
818 
819   for (i = 0; i < group_size; ++i)
820     if (!matches[i])
821       return false;
822 
823   /* If we allowed a two-operation SLP node verify the target can cope
824      with the permute we are going to use.  */
825   if (alt_stmt_code != ERROR_MARK
826       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
827     {
828       unsigned char *sel
829 	= XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
830       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
831 	{
832 	  sel[i] = i;
833 	  if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
834 	    sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
835 	}
836       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
837 	{
838 	  for (i = 0; i < group_size; ++i)
839 	    if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
840 	      {
841 		matches[i] = false;
842 		if (dump_enabled_p ())
843 		  {
844 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
845 				     "Build SLP failed: different operation "
846 				     "in stmt ");
847 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
848 				      stmts[i], 0);
849 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 				     "original stmt ");
851 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
852 				      first_stmt, 0);
853 		  }
854 	      }
855 	  return false;
856 	}
857       *two_operators = true;
858     }
859 
860   return true;
861 }
862 
863 /* Recursively build an SLP tree starting from NODE.
864    Fail (and return a value not equal to zero) if def-stmts are not
865    isomorphic, require data permutation or are of unsupported types of
866    operation.  Otherwise, return 0.
867    The value returned is the depth in the SLP tree where a mismatch
868    was found.  */
869 
870 static slp_tree
vect_build_slp_tree(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,unsigned int * max_nunits,vec<slp_tree> * loads,bool * matches,unsigned * npermutes,unsigned * tree_size,unsigned max_tree_size)871 vect_build_slp_tree (vec_info *vinfo,
872                      vec<gimple *> stmts, unsigned int group_size,
873                      unsigned int *max_nunits,
874                      vec<slp_tree> *loads,
875 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
876 		     unsigned max_tree_size)
877 {
878   unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
879   gimple *stmt;
880   slp_tree node;
881 
882   matches[0] = false;
883 
884   stmt = stmts[0];
885   if (is_gimple_call (stmt))
886     nops = gimple_call_num_args (stmt);
887   else if (is_gimple_assign (stmt))
888     {
889       nops = gimple_num_ops (stmt) - 1;
890       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
891 	nops++;
892     }
893   else
894     return NULL;
895 
896   bool two_operators = false;
897   if (!vect_build_slp_tree_1 (vinfo,
898 			      stmts, group_size, nops,
899 			      &this_max_nunits, matches, &two_operators))
900     return NULL;
901 
902   /* If the SLP node is a load, terminate the recursion.  */
903   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
904       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
905     {
906       *max_nunits = this_max_nunits;
907       node = vect_create_new_slp_node (stmts);
908       loads->safe_push (node);
909       return node;
910     }
911 
912   /* Get at the operands, verifying they are compatible.  */
913   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
914   slp_oprnd_info oprnd_info;
915   FOR_EACH_VEC_ELT (stmts, i, stmt)
916     {
917       switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
918 	{
919 	case 0:
920 	  break;
921 	case -1:
922 	  matches[0] = false;
923 	  vect_free_oprnd_info (oprnds_info);
924 	  return NULL;
925 	case 1:
926 	  matches[i] = false;
927 	  break;
928 	}
929     }
930   for (i = 0; i < group_size; ++i)
931     if (!matches[i])
932       {
933 	vect_free_oprnd_info (oprnds_info);
934 	return NULL;
935       }
936 
937   auto_vec<slp_tree, 4> children;
938   auto_vec<slp_tree> this_loads;
939 
940   stmt = stmts[0];
941 
942   /* Create SLP_TREE nodes for the definition node/s.  */
943   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
944     {
945       slp_tree child;
946       unsigned old_nloads = this_loads.length ();
947       unsigned old_tree_size = this_tree_size;
948       unsigned int j;
949 
950       if (oprnd_info->first_dt != vect_internal_def)
951         continue;
952 
953       if (++this_tree_size > max_tree_size)
954 	{
955 	  FOR_EACH_VEC_ELT (children, j, child)
956 	    vect_free_slp_tree (child);
957 	  vect_free_oprnd_info (oprnds_info);
958 	  return NULL;
959 	}
960 
961       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
962 					group_size, &this_max_nunits,
963 					&this_loads, matches, npermutes,
964 					&this_tree_size,
965 					max_tree_size)) != NULL)
966 	{
967 	  /* If we have all children of child built up from scalars then just
968 	     throw that away and build it up this node from scalars.  */
969 	  if (!SLP_TREE_CHILDREN (child).is_empty ()
970 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
971 		 do extra work to cancel the pattern so the uses see the
972 		 scalar version.  */
973 	      && !is_pattern_stmt_p
974 	            (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
975 	    {
976 	      slp_tree grandchild;
977 
978 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
979 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
980 		  break;
981 	      if (!grandchild)
982 		{
983 		  /* Roll back.  */
984 		  this_loads.truncate (old_nloads);
985 		  this_tree_size = old_tree_size;
986 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
987 		    vect_free_slp_tree (grandchild);
988 		  SLP_TREE_CHILDREN (child).truncate (0);
989 
990 		  dump_printf_loc (MSG_NOTE, vect_location,
991 				   "Building parent vector operands from "
992 				   "scalars instead\n");
993 		  oprnd_info->def_stmts = vNULL;
994 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
995 		  children.safe_push (child);
996 		  continue;
997 		}
998 	    }
999 
1000 	  oprnd_info->def_stmts = vNULL;
1001 	  children.safe_push (child);
1002 	  continue;
1003 	}
1004 
1005       /* If the SLP build failed fatally and we analyze a basic-block
1006          simply treat nodes we fail to build as externally defined
1007 	 (and thus build vectors from the scalar defs).
1008 	 The cost model will reject outright expensive cases.
1009 	 ???  This doesn't treat cases where permutation ultimatively
1010 	 fails (or we don't try permutation below).  Ideally we'd
1011 	 even compute a permutation that will end up with the maximum
1012 	 SLP tree size...  */
1013       if (is_a <bb_vec_info> (vinfo)
1014 	  && !matches[0]
1015 	  /* ???  Rejecting patterns this way doesn't work.  We'd have to
1016 	     do extra work to cancel the pattern so the uses see the
1017 	     scalar version.  */
1018 	  && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1019 	{
1020 	  dump_printf_loc (MSG_NOTE, vect_location,
1021 			   "Building vector operands from scalars\n");
1022 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
1023 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1024 	  children.safe_push (child);
1025 	  oprnd_info->def_stmts = vNULL;
1026 	  continue;
1027 	}
1028 
1029       /* If the SLP build for operand zero failed and operand zero
1030 	 and one can be commutated try that for the scalar stmts
1031 	 that failed the match.  */
1032       if (i == 0
1033 	  /* A first scalar stmt mismatch signals a fatal mismatch.  */
1034 	  && matches[0]
1035 	  /* ???  For COND_EXPRs we can swap the comparison operands
1036 	     as well as the arms under some constraints.  */
1037 	  && nops == 2
1038 	  && oprnds_info[1]->first_dt == vect_internal_def
1039 	  && is_gimple_assign (stmt)
1040 	  && commutative_tree_code (gimple_assign_rhs_code (stmt))
1041 	  && ! two_operators
1042 	  /* Do so only if the number of not successful permutes was nor more
1043 	     than a cut-ff as re-trying the recursive match on
1044 	     possibly each level of the tree would expose exponential
1045 	     behavior.  */
1046 	  && *npermutes < 4)
1047 	{
1048 	  /* Verify if we can safely swap or if we committed to a specific
1049 	     operand order already.  */
1050 	  for (j = 0; j < group_size; ++j)
1051 	    if (!matches[j]
1052 		&& STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])) != 0)
1053 	      {
1054 		if (dump_enabled_p ())
1055 		  {
1056 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1057 				     "Build SLP failed: cannot swap operands "
1058 				     "of shared stmt ");
1059 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1060 				      stmts[j], 0);
1061 		  }
1062 		goto fail;
1063 	      }
1064 
1065 	  /* Swap mismatched definition stmts.  */
1066 	  dump_printf_loc (MSG_NOTE, vect_location,
1067 			   "Re-trying with swapped operands of stmts ");
1068 	  for (j = 0; j < group_size; ++j)
1069 	    if (!matches[j])
1070 	      {
1071 		std::swap (oprnds_info[0]->def_stmts[j],
1072 			   oprnds_info[1]->def_stmts[j]);
1073 		dump_printf (MSG_NOTE, "%d ", j);
1074 	      }
1075 	  dump_printf (MSG_NOTE, "\n");
1076 	  /* And try again with scratch 'matches' ... */
1077 	  bool *tem = XALLOCAVEC (bool, group_size);
1078 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1079 					    group_size, &this_max_nunits,
1080 					    &this_loads, tem, npermutes,
1081 					    &this_tree_size,
1082 					    max_tree_size)) != NULL)
1083 	    {
1084 	      /* ... so if successful we can apply the operand swapping
1085 		 to the GIMPLE IL.  This is necessary because for example
1086 		 vect_get_slp_defs uses operand indexes and thus expects
1087 		 canonical operand order.  This is also necessary even
1088 		 if we end up building the operand from scalars as
1089 		 we'll continue to process swapped operand two.  */
1090 	      for (j = 0; j < group_size; ++j)
1091 		{
1092 		  gimple *stmt = stmts[j];
1093 		  gimple_set_plf (stmt, GF_PLF_1, false);
1094 		}
1095 	      for (j = 0; j < group_size; ++j)
1096 		{
1097 		  gimple *stmt = stmts[j];
1098 		  if (!matches[j])
1099 		    {
1100 		      /* Avoid swapping operands twice.  */
1101 		      if (gimple_plf (stmt, GF_PLF_1))
1102 			continue;
1103 		      swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1104 					 gimple_assign_rhs2_ptr (stmt));
1105 		      gimple_set_plf (stmt, GF_PLF_1, true);
1106 		    }
1107 		}
1108 	      /* Verify we swap all duplicates or none.  */
1109 	      if (flag_checking)
1110 		for (j = 0; j < group_size; ++j)
1111 		  {
1112 		    gimple *stmt = stmts[j];
1113 		    gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1114 		  }
1115 
1116 	      /* If we have all children of child built up from scalars then
1117 		 just throw that away and build it up this node from scalars.  */
1118 	      if (!SLP_TREE_CHILDREN (child).is_empty ()
1119 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
1120 		     to do extra work to cancel the pattern so the uses see the
1121 		     scalar version.  */
1122 		  && !is_pattern_stmt_p
1123 			(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1124 		{
1125 		  unsigned int j;
1126 		  slp_tree grandchild;
1127 
1128 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1129 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1130 		      break;
1131 		  if (!grandchild)
1132 		    {
1133 		      /* Roll back.  */
1134 		      this_loads.truncate (old_nloads);
1135 		      this_tree_size = old_tree_size;
1136 		      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1137 			vect_free_slp_tree (grandchild);
1138 		      SLP_TREE_CHILDREN (child).truncate (0);
1139 
1140 		      dump_printf_loc (MSG_NOTE, vect_location,
1141 				       "Building parent vector operands from "
1142 				       "scalars instead\n");
1143 		      oprnd_info->def_stmts = vNULL;
1144 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
1145 		      children.safe_push (child);
1146 		      continue;
1147 		    }
1148 		}
1149 
1150 	      oprnd_info->def_stmts = vNULL;
1151 	      children.safe_push (child);
1152 	      continue;
1153 	    }
1154 
1155 	  ++*npermutes;
1156 	}
1157 
1158 fail:
1159       gcc_assert (child == NULL);
1160       FOR_EACH_VEC_ELT (children, j, child)
1161 	vect_free_slp_tree (child);
1162       vect_free_oprnd_info (oprnds_info);
1163       return NULL;
1164     }
1165 
1166   vect_free_oprnd_info (oprnds_info);
1167 
1168   if (tree_size)
1169     *tree_size += this_tree_size;
1170   *max_nunits = this_max_nunits;
1171   loads->safe_splice (this_loads);
1172 
1173   node = vect_create_new_slp_node (stmts);
1174   SLP_TREE_TWO_OPERATORS (node) = two_operators;
1175   SLP_TREE_CHILDREN (node).splice (children);
1176   return node;
1177 }
1178 
1179 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1180 
1181 static void
vect_print_slp_tree(int dump_kind,location_t loc,slp_tree node)1182 vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
1183 {
1184   int i;
1185   gimple *stmt;
1186   slp_tree child;
1187 
1188   dump_printf_loc (dump_kind, loc, "node%s\n",
1189 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
1190 		   ? " (external)" : "");
1191   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1192     {
1193       dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1194       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1195     }
1196   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1197     vect_print_slp_tree (dump_kind, loc, child);
1198 }
1199 
1200 
1201 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1202    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1203    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1204    stmts in NODE are to be marked.  */
1205 
1206 static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)1207 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1208 {
1209   int i;
1210   gimple *stmt;
1211   slp_tree child;
1212 
1213   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1214     return;
1215 
1216   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1217     if (j < 0 || i == j)
1218       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1219 
1220   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1221     vect_mark_slp_stmts (child, mark, j);
1222 }
1223 
1224 
1225 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1226 
1227 static void
vect_mark_slp_stmts_relevant(slp_tree node)1228 vect_mark_slp_stmts_relevant (slp_tree node)
1229 {
1230   int i;
1231   gimple *stmt;
1232   stmt_vec_info stmt_info;
1233   slp_tree child;
1234 
1235   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1236     return;
1237 
1238   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1239     {
1240       stmt_info = vinfo_for_stmt (stmt);
1241       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1242                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1243       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1244     }
1245 
1246   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1247     vect_mark_slp_stmts_relevant (child);
1248 }
1249 
1250 
1251 /* Rearrange the statements of NODE according to PERMUTATION.  */
1252 
1253 static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,vec<unsigned> permutation)1254 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1255                           vec<unsigned> permutation)
1256 {
1257   gimple *stmt;
1258   vec<gimple *> tmp_stmts;
1259   unsigned int i;
1260   slp_tree child;
1261 
1262   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1263     vect_slp_rearrange_stmts (child, group_size, permutation);
1264 
1265   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1266   tmp_stmts.create (group_size);
1267   tmp_stmts.quick_grow_cleared (group_size);
1268 
1269   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1270     tmp_stmts[permutation[i]] = stmt;
1271 
1272   SLP_TREE_SCALAR_STMTS (node).release ();
1273   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1274 }
1275 
1276 
1277 /* Attempt to reorder stmts in a reduction chain so that we don't
1278    require any load permutation.  Return true if that was possible,
1279    otherwise return false.  */
1280 
1281 static bool
vect_attempt_slp_rearrange_stmts(slp_instance slp_instn)1282 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1283 {
1284   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1285   unsigned int i, j;
1286   sbitmap load_index;
1287   unsigned int lidx;
1288   slp_tree node, load;
1289 
1290   /* Compare all the permutation sequences to the first one.  We know
1291      that at least one load is permuted.  */
1292   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1293   if (!node->load_permutation.exists ())
1294     return false;
1295   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1296     {
1297       if (!load->load_permutation.exists ())
1298 	return false;
1299       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1300 	if (lidx != node->load_permutation[j])
1301 	  return false;
1302     }
1303 
1304   /* Check that the loads in the first sequence are different and there
1305      are no gaps between them.  */
1306   load_index = sbitmap_alloc (group_size);
1307   bitmap_clear (load_index);
1308   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1309     {
1310       if (lidx >= group_size)
1311 	return false;
1312       if (bitmap_bit_p (load_index, lidx))
1313 	{
1314 	  sbitmap_free (load_index);
1315 	  return false;
1316 	}
1317       bitmap_set_bit (load_index, lidx);
1318     }
1319   for (i = 0; i < group_size; i++)
1320     if (!bitmap_bit_p (load_index, i))
1321       {
1322 	sbitmap_free (load_index);
1323 	return false;
1324       }
1325   sbitmap_free (load_index);
1326 
1327   /* This permutation is valid for reduction.  Since the order of the
1328      statements in the nodes is not important unless they are memory
1329      accesses, we can rearrange the statements in all the nodes
1330      according to the order of the loads.  */
1331   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1332 			    node->load_permutation);
1333 
1334   /* We are done, no actual permutations need to be generated.  */
1335   unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1336   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1337     {
1338       gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1339       first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1340       /* But we have to keep those permutations that are required because
1341          of handling of gaps.  */
1342       if (unrolling_factor == 1
1343 	  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1344 	      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1345 	SLP_TREE_LOAD_PERMUTATION (node).release ();
1346       else
1347 	for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1348 	  SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1349     }
1350 
1351   return true;
1352 }
1353 
1354 /* Check if the required load permutations in the SLP instance
1355    SLP_INSTN are supported.  */
1356 
1357 static bool
vect_supported_load_permutation_p(slp_instance slp_instn)1358 vect_supported_load_permutation_p (slp_instance slp_instn)
1359 {
1360   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1361   unsigned int i, j, k, next;
1362   slp_tree node;
1363   gimple *stmt, *load, *next_load;
1364 
1365   if (dump_enabled_p ())
1366     {
1367       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1368       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1369 	if (node->load_permutation.exists ())
1370 	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1371 	    dump_printf (MSG_NOTE, "%d ", next);
1372 	else
1373 	  for (k = 0; k < group_size; ++k)
1374 	    dump_printf (MSG_NOTE, "%d ", k);
1375       dump_printf (MSG_NOTE, "\n");
1376     }
1377 
1378   /* In case of reduction every load permutation is allowed, since the order
1379      of the reduction statements is not important (as opposed to the case of
1380      grouped stores).  The only condition we need to check is that all the
1381      load nodes are of the same size and have the same permutation (and then
1382      rearrange all the nodes of the SLP instance according to this
1383      permutation).  */
1384 
1385   /* Check that all the load nodes are of the same size.  */
1386   /* ???  Can't we assert this? */
1387   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1388     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1389       return false;
1390 
1391   node = SLP_INSTANCE_TREE (slp_instn);
1392   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1393 
1394   /* Reduction (there are no data-refs in the root).
1395      In reduction chain the order of the loads is not important.  */
1396   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1397       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1398     vect_attempt_slp_rearrange_stmts (slp_instn);
1399 
1400   /* In basic block vectorization we allow any subchain of an interleaving
1401      chain.
1402      FORNOW: not supported in loop SLP because of realignment compications.  */
1403   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1404     {
1405       /* Check whether the loads in an instance form a subchain and thus
1406          no permutation is necessary.  */
1407       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1408         {
1409 	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1410 	    continue;
1411 	  bool subchain_p = true;
1412           next_load = NULL;
1413           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1414             {
1415               if (j != 0
1416 		  && (next_load != load
1417 		      || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1418 		{
1419 		  subchain_p = false;
1420 		  break;
1421 		}
1422               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1423             }
1424 	  if (subchain_p)
1425 	    SLP_TREE_LOAD_PERMUTATION (node).release ();
1426 	  else
1427 	    {
1428 	      /* Verify the permutation can be generated.  */
1429 	      vec<tree> tem;
1430 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
1431 						 1, slp_instn, true))
1432 		{
1433 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1434 				   vect_location,
1435 				   "unsupported load permutation\n");
1436 		  return false;
1437 		}
1438 	    }
1439         }
1440       return true;
1441     }
1442 
1443   /* For loop vectorization verify we can generate the permutation.  */
1444   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1445     if (node->load_permutation.exists ()
1446 	&& !vect_transform_slp_perm_load
1447 	      (node, vNULL, NULL,
1448 	       SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1449       return false;
1450 
1451   return true;
1452 }
1453 
1454 
1455 /* Find the last store in SLP INSTANCE.  */
1456 
1457 gimple *
vect_find_last_scalar_stmt_in_slp(slp_tree node)1458 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1459 {
1460   gimple *last = NULL, *stmt;
1461 
1462   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1463     {
1464       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1465       if (is_pattern_stmt_p (stmt_vinfo))
1466 	last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1467       else
1468 	last = get_later_stmt (stmt, last);
1469     }
1470 
1471   return last;
1472 }
1473 
1474 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1475 
1476 static void
vect_analyze_slp_cost_1(slp_instance instance,slp_tree node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,unsigned ncopies_for_cost)1477 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1478 			 stmt_vector_for_cost *prologue_cost_vec,
1479 			 stmt_vector_for_cost *body_cost_vec,
1480 			 unsigned ncopies_for_cost)
1481 {
1482   unsigned i, j;
1483   slp_tree child;
1484   gimple *stmt;
1485   stmt_vec_info stmt_info;
1486   tree lhs;
1487 
1488   /* Recurse down the SLP tree.  */
1489   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1490     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1491       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1492 			       body_cost_vec, ncopies_for_cost);
1493 
1494   /* Look at the first scalar stmt to determine the cost.  */
1495   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1496   stmt_info = vinfo_for_stmt (stmt);
1497   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1498     {
1499       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1500 	vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1501 			       vect_uninitialized_def,
1502 			       node, prologue_cost_vec, body_cost_vec);
1503       else
1504 	{
1505 	  gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1506 	  if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1507 	    {
1508 	      /* If the load is permuted then the alignment is determined by
1509 		 the first group element not by the first scalar stmt DR.  */
1510 	      stmt = GROUP_FIRST_ELEMENT (stmt_info);
1511 	      stmt_info = vinfo_for_stmt (stmt);
1512 	      /* Record the cost for the permutation.  */
1513 	      record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1514 				stmt_info, 0, vect_body);
1515 	      /* And adjust the number of loads performed.  */
1516 	      unsigned nunits
1517 		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1518 	      ncopies_for_cost
1519 	        = (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1520 		   + nunits - 1) / nunits;
1521 	      ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1522 	    }
1523 	  /* Record the cost for the vector loads.  */
1524 	  vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1525 				node, prologue_cost_vec, body_cost_vec);
1526 	  return;
1527 	}
1528     }
1529   else
1530     {
1531       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1532 			stmt_info, 0, vect_body);
1533       if (SLP_TREE_TWO_OPERATORS (node))
1534 	{
1535 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1536 			    stmt_info, 0, vect_body);
1537 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1538 			    stmt_info, 0, vect_body);
1539 	}
1540     }
1541 
1542   /* Push SLP node def-type to stmts.  */
1543   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1544     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1545       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1546 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1547 
1548   /* Scan operands and account for prologue cost of constants/externals.
1549      ???  This over-estimates cost for multiple uses and should be
1550      re-engineered.  */
1551   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1552   lhs = gimple_get_lhs (stmt);
1553   for (i = 0; i < gimple_num_ops (stmt); ++i)
1554     {
1555       tree op = gimple_op (stmt, i);
1556       gimple *def_stmt;
1557       enum vect_def_type dt;
1558       if (!op || op == lhs)
1559 	continue;
1560       if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1561 	{
1562 	  /* Without looking at the actual initializer a vector of
1563 	     constants can be implemented as load from the constant pool.
1564 	     ???  We need to pass down stmt_info for a vector type
1565 	     even if it points to the wrong stmt.  */
1566 	  if (dt == vect_constant_def)
1567 	    record_stmt_cost (prologue_cost_vec, 1, vector_load,
1568 			      stmt_info, 0, vect_prologue);
1569 	  else if (dt == vect_external_def)
1570 	    record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1571 			      stmt_info, 0, vect_prologue);
1572 	}
1573     }
1574 
1575   /* Restore stmt def-types.  */
1576   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1577     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1578       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1579 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1580 }
1581 
1582 /* Compute the cost for the SLP instance INSTANCE.  */
1583 
1584 static void
vect_analyze_slp_cost(slp_instance instance,void * data)1585 vect_analyze_slp_cost (slp_instance instance, void *data)
1586 {
1587   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1588   unsigned ncopies_for_cost;
1589   stmt_info_for_cost *si;
1590   unsigned i;
1591 
1592   if (dump_enabled_p ())
1593     dump_printf_loc (MSG_NOTE, vect_location,
1594 		     "=== vect_analyze_slp_cost ===\n");
1595 
1596   /* Calculate the number of vector stmts to create based on the unrolling
1597      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1598      GROUP_SIZE / NUNITS otherwise.  */
1599   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1600   slp_tree node = SLP_INSTANCE_TREE (instance);
1601   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1602   /* Adjust the group_size by the vectorization factor which is always one
1603      for basic-block vectorization.  */
1604   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1605     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1606   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1607   /* For reductions look at a reduction operand in case the reduction
1608      operation is widening like DOT_PROD or SAD.  */
1609   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1610     {
1611       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1612       switch (gimple_assign_rhs_code (stmt))
1613 	{
1614 	case DOT_PROD_EXPR:
1615 	case SAD_EXPR:
1616 	  nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1617 				(TREE_TYPE (gimple_assign_rhs1 (stmt))));
1618 	  break;
1619 	default:;
1620 	}
1621     }
1622   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1623 
1624   prologue_cost_vec.create (10);
1625   body_cost_vec.create (10);
1626   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1627 			   &prologue_cost_vec, &body_cost_vec,
1628 			   ncopies_for_cost);
1629 
1630   /* Record the prologue costs, which were delayed until we were
1631      sure that SLP was successful.  */
1632   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1633     {
1634       struct _stmt_vec_info *stmt_info
1635 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1636       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1637 			    si->misalign, vect_prologue);
1638     }
1639 
1640   /* Record the instance's instructions in the target cost model.  */
1641   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1642     {
1643       struct _stmt_vec_info *stmt_info
1644 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1645       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1646 			    si->misalign, vect_body);
1647     }
1648 
1649   prologue_cost_vec.release ();
1650   body_cost_vec.release ();
1651 }
1652 
1653 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1654    one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1655    the first GROUP1_SIZE stmts, since stores are consecutive), the second
1656    containing the remainder.
1657    Return the first stmt in the second group.  */
1658 
1659 static gimple *
vect_split_slp_store_group(gimple * first_stmt,unsigned group1_size)1660 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1661 {
1662   stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1663   gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1664   gcc_assert (group1_size > 0);
1665   int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1666   gcc_assert (group2_size > 0);
1667   GROUP_SIZE (first_vinfo) = group1_size;
1668 
1669   gimple *stmt = first_stmt;
1670   for (unsigned i = group1_size; i > 1; i--)
1671     {
1672       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1673       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1674     }
1675   /* STMT is now the last element of the first group.  */
1676   gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1677   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1678 
1679   GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1680   for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1681     {
1682       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1683       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1684     }
1685 
1686   /* For the second group, the GROUP_GAP is that before the original group,
1687      plus skipping over the first vector.  */
1688   GROUP_GAP (vinfo_for_stmt (group2)) =
1689     GROUP_GAP (first_vinfo) + group1_size;
1690 
1691   /* GROUP_GAP of the first group now has to skip over the second group too.  */
1692   GROUP_GAP (first_vinfo) += group2_size;
1693 
1694   if (dump_enabled_p ())
1695     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1696 		     group1_size, group2_size);
1697 
1698   return group2;
1699 }
1700 
1701 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1702    vect_build_slp_tree to build a tree of packed stmts if possible.
1703    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1704 
1705 static bool
vect_analyze_slp_instance(vec_info * vinfo,gimple * stmt,unsigned max_tree_size)1706 vect_analyze_slp_instance (vec_info *vinfo,
1707 			   gimple *stmt, unsigned max_tree_size)
1708 {
1709   slp_instance new_instance;
1710   slp_tree node;
1711   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1712   unsigned int unrolling_factor = 1, nunits;
1713   tree vectype, scalar_type = NULL_TREE;
1714   gimple *next;
1715   unsigned int i;
1716   unsigned int max_nunits = 0;
1717   vec<slp_tree> loads;
1718   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1719   vec<gimple *> scalar_stmts;
1720 
1721   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1722     {
1723       if (dr)
1724         {
1725           scalar_type = TREE_TYPE (DR_REF (dr));
1726           vectype = get_vectype_for_scalar_type (scalar_type);
1727         }
1728       else
1729         {
1730           gcc_assert (is_a <loop_vec_info> (vinfo));
1731           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1732         }
1733 
1734       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1735     }
1736   else
1737     {
1738       gcc_assert (is_a <loop_vec_info> (vinfo));
1739       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1740       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1741     }
1742 
1743   if (!vectype)
1744     {
1745       if (dump_enabled_p ())
1746         {
1747           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1748 			   "Build SLP failed: unsupported data-type ");
1749           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1750           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1751         }
1752 
1753       return false;
1754     }
1755   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756 
1757   /* Calculate the unrolling factor.  */
1758   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1759   if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1760     {
1761       if (dump_enabled_p ())
1762         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 			 "Build SLP failed: unrolling required in basic"
1764 			 " block SLP\n");
1765 
1766       return false;
1767     }
1768 
1769   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1770   scalar_stmts.create (group_size);
1771   next = stmt;
1772   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1773     {
1774       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1775       while (next)
1776         {
1777 	  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1778 	      && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1779 	    scalar_stmts.safe_push (
1780 		  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1781 	  else
1782             scalar_stmts.safe_push (next);
1783           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1784         }
1785       /* Mark the first element of the reduction chain as reduction to properly
1786 	 transform the node.  In the reduction analysis phase only the last
1787 	 element of the chain is marked as reduction.  */
1788       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1789 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1790     }
1791   else
1792     {
1793       /* Collect reduction statements.  */
1794       vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1795       for (i = 0; reductions.iterate (i, &next); i++)
1796 	scalar_stmts.safe_push (next);
1797     }
1798 
1799   loads.create (group_size);
1800 
1801   /* Build the tree for the SLP instance.  */
1802   bool *matches = XALLOCAVEC (bool, group_size);
1803   unsigned npermutes = 0;
1804   if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1805 				   &max_nunits, &loads, matches, &npermutes,
1806 				   NULL, max_tree_size)) != NULL)
1807     {
1808       /* Calculate the unrolling factor based on the smallest type.  */
1809       if (max_nunits > nunits)
1810         unrolling_factor = least_common_multiple (max_nunits, group_size)
1811                            / group_size;
1812 
1813       if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1814         {
1815           if (dump_enabled_p ())
1816             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817 			     "Build SLP failed: unrolling required in basic"
1818 			     " block SLP\n");
1819 	  vect_free_slp_tree (node);
1820 	  loads.release ();
1821           return false;
1822         }
1823 
1824       /* Create a new SLP instance.  */
1825       new_instance = XNEW (struct _slp_instance);
1826       SLP_INSTANCE_TREE (new_instance) = node;
1827       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1828       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1829       SLP_INSTANCE_LOADS (new_instance) = loads;
1830 
1831       /* Compute the load permutation.  */
1832       slp_tree load_node;
1833       bool loads_permuted = false;
1834       FOR_EACH_VEC_ELT (loads, i, load_node)
1835 	{
1836 	  vec<unsigned> load_permutation;
1837 	  int j;
1838 	  gimple *load, *first_stmt;
1839 	  bool this_load_permuted = false;
1840 	  load_permutation.create (group_size);
1841 	  first_stmt = GROUP_FIRST_ELEMENT
1842 	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1843 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1844 	    {
1845 	      int load_place
1846 		= vect_get_place_in_interleaving_chain (load, first_stmt);
1847 	      gcc_assert (load_place != -1);
1848 	      if (load_place != j)
1849 		this_load_permuted = true;
1850 	      load_permutation.safe_push (load_place);
1851 	    }
1852 	  if (!this_load_permuted
1853 	      /* The load requires permutation when unrolling exposes
1854 	         a gap either because the group is larger than the SLP
1855 		 group-size or because there is a gap between the groups.  */
1856 	      && (unrolling_factor == 1
1857 		  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1858 		      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1859 	    {
1860 	      load_permutation.release ();
1861 	      continue;
1862 	    }
1863 	  SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1864 	  loads_permuted = true;
1865 	}
1866 
1867       if (loads_permuted)
1868         {
1869           if (!vect_supported_load_permutation_p (new_instance))
1870             {
1871               if (dump_enabled_p ())
1872                 {
1873                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1874 				   "Build SLP failed: unsupported load "
1875 				   "permutation ");
1876                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1877                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1878                 }
1879               vect_free_slp_instance (new_instance);
1880               return false;
1881             }
1882         }
1883 
1884       /* If the loads and stores can be handled with load/store-lane
1885 	 instructions do not generate this SLP instance.  */
1886       if (is_a <loop_vec_info> (vinfo)
1887 	  && loads_permuted
1888 	  && dr && vect_store_lanes_supported (vectype, group_size))
1889 	{
1890 	  slp_tree load_node;
1891 	  FOR_EACH_VEC_ELT (loads, i, load_node)
1892 	    {
1893 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
1894 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1895 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
1896 	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
1897 	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1898 		  || ! vect_load_lanes_supported
1899 			(STMT_VINFO_VECTYPE (stmt_vinfo),
1900 			 GROUP_SIZE (stmt_vinfo)))
1901 		break;
1902 	    }
1903 	  if (i == loads.length ())
1904 	    {
1905 	      if (dump_enabled_p ())
1906 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907 				 "Built SLP cancelled: can use "
1908 				 "load/store-lanes\n");
1909 	      vect_free_slp_instance (new_instance);
1910 	      return false;
1911 	    }
1912 	}
1913 
1914       vinfo->slp_instances.safe_push (new_instance);
1915 
1916       if (dump_enabled_p ())
1917 	{
1918 	  dump_printf_loc (MSG_NOTE, vect_location,
1919 			   "Final SLP tree for instance:\n");
1920 	  vect_print_slp_tree (MSG_NOTE, vect_location, node);
1921 	}
1922 
1923       return true;
1924     }
1925 
1926   /* Failed to SLP.  */
1927   /* Free the allocated memory.  */
1928   scalar_stmts.release ();
1929   loads.release ();
1930 
1931   /* For basic block SLP, try to break the group up into multiples of the
1932      vector size.  */
1933   if (is_a <bb_vec_info> (vinfo)
1934       && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1935       && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1936     {
1937       /* We consider breaking the group only on VF boundaries from the existing
1938 	 start.  */
1939       for (i = 0; i < group_size; i++)
1940 	if (!matches[i]) break;
1941 
1942       if (i >= nunits && i < group_size)
1943 	{
1944 	  /* Split into two groups at the first vector boundary before i.  */
1945 	  gcc_assert ((nunits & (nunits - 1)) == 0);
1946 	  unsigned group1_size = i & ~(nunits - 1);
1947 
1948 	  gimple *rest = vect_split_slp_store_group (stmt, group1_size);
1949 	  bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
1950 	  /* If the first non-match was in the middle of a vector,
1951 	     skip the rest of that vector.  */
1952 	  if (group1_size < i)
1953 	    {
1954 	      i = group1_size + nunits;
1955 	      if (i < group_size)
1956 		rest = vect_split_slp_store_group (rest, nunits);
1957 	    }
1958 	  if (i < group_size)
1959 	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
1960 	  return res;
1961 	}
1962       /* Even though the first vector did not all match, we might be able to SLP
1963 	 (some) of the remainder.  FORNOW ignore this possibility.  */
1964     }
1965 
1966   return false;
1967 }
1968 
1969 
1970 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1971    trees of packed scalar stmts if SLP is possible.  */
1972 
1973 bool
vect_analyze_slp(vec_info * vinfo,unsigned max_tree_size)1974 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
1975 {
1976   unsigned int i;
1977   gimple *first_element;
1978   bool ok = false;
1979 
1980   if (dump_enabled_p ())
1981     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1982 
1983   /* Find SLP sequences starting from groups of grouped stores.  */
1984   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1985     if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
1986       ok = true;
1987 
1988   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
1989     {
1990       if (loop_vinfo->reduction_chains.length () > 0)
1991 	{
1992 	  /* Find SLP sequences starting from reduction chains.  */
1993 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1994 	      if (vect_analyze_slp_instance (vinfo, first_element,
1995 					     max_tree_size))
1996 		ok = true;
1997 	      else
1998 		return false;
1999 
2000 	  /* Don't try to vectorize SLP reductions if reduction chain was
2001 	     detected.  */
2002 	  return ok;
2003 	}
2004 
2005       /* Find SLP sequences starting from groups of reductions.  */
2006       if (loop_vinfo->reductions.length () > 1
2007 	  && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2008 					max_tree_size))
2009 	ok = true;
2010     }
2011 
2012   return true;
2013 }
2014 
2015 
2016 /* For each possible SLP instance decide whether to SLP it and calculate overall
2017    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2018    least one instance.  */
2019 
2020 bool
vect_make_slp_decision(loop_vec_info loop_vinfo)2021 vect_make_slp_decision (loop_vec_info loop_vinfo)
2022 {
2023   unsigned int i, unrolling_factor = 1;
2024   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2025   slp_instance instance;
2026   int decided_to_slp = 0;
2027 
2028   if (dump_enabled_p ())
2029     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2030                      "\n");
2031 
2032   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2033     {
2034       /* FORNOW: SLP if you can.  */
2035       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2036 	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2037 
2038       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2039 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2040 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2041       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2042       decided_to_slp++;
2043     }
2044 
2045   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2046 
2047   if (decided_to_slp && dump_enabled_p ())
2048     dump_printf_loc (MSG_NOTE, vect_location,
2049 		     "Decided to SLP %d instances. Unrolling factor %d\n",
2050 		     decided_to_slp, unrolling_factor);
2051 
2052   return (decided_to_slp > 0);
2053 }
2054 
2055 
2056 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2057    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2058 
2059 static void
vect_detect_hybrid_slp_stmts(slp_tree node,unsigned i,slp_vect_type stype)2060 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2061 {
2062   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2063   imm_use_iterator imm_iter;
2064   gimple *use_stmt;
2065   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2066   slp_tree child;
2067   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2068   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2069   int j;
2070 
2071   /* Propagate hybrid down the SLP tree.  */
2072   if (stype == hybrid)
2073     ;
2074   else if (HYBRID_SLP_STMT (stmt_vinfo))
2075     stype = hybrid;
2076   else
2077     {
2078       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2079       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2080       /* If we get a pattern stmt here we have to use the LHS of the
2081          original stmt for immediate uses.  */
2082       if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2083 	  && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2084 	stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2085       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2086 	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2087 	  {
2088 	    if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2089 	      continue;
2090 	    use_vinfo = vinfo_for_stmt (use_stmt);
2091 	    if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2092 		&& STMT_VINFO_RELATED_STMT (use_vinfo))
2093 	      use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2094 	    if (!STMT_SLP_TYPE (use_vinfo)
2095 		&& (STMT_VINFO_RELEVANT (use_vinfo)
2096 		    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2097 		&& !(gimple_code (use_stmt) == GIMPLE_PHI
2098 		     && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2099 	      {
2100 		if (dump_enabled_p ())
2101 		  {
2102 		    dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2103 				     "def in non-SLP stmt: ");
2104 		    dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2105 		  }
2106 		stype = hybrid;
2107 	      }
2108 	  }
2109     }
2110 
2111   if (stype == hybrid
2112       && !HYBRID_SLP_STMT (stmt_vinfo))
2113     {
2114       if (dump_enabled_p ())
2115 	{
2116 	  dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2117 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2118 	}
2119       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2120     }
2121 
2122   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2123     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2124       vect_detect_hybrid_slp_stmts (child, i, stype);
2125 }
2126 
2127 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2128 
2129 static tree
vect_detect_hybrid_slp_1(tree * tp,int *,void * data)2130 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2131 {
2132   walk_stmt_info *wi = (walk_stmt_info *)data;
2133   struct loop *loopp = (struct loop *)wi->info;
2134 
2135   if (wi->is_lhs)
2136     return NULL_TREE;
2137 
2138   if (TREE_CODE (*tp) == SSA_NAME
2139       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2140     {
2141       gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2142       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2143 	  && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2144 	{
2145 	  if (dump_enabled_p ())
2146 	    {
2147 	      dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2148 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2149 	    }
2150 	  STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2151 	}
2152     }
2153 
2154   return NULL_TREE;
2155 }
2156 
2157 static tree
vect_detect_hybrid_slp_2(gimple_stmt_iterator * gsi,bool * handled,walk_stmt_info *)2158 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2159 			  walk_stmt_info *)
2160 {
2161   /* If the stmt is in a SLP instance then this isn't a reason
2162      to mark use definitions in other SLP instances as hybrid.  */
2163   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2164     *handled = true;
2165   return NULL_TREE;
2166 }
2167 
2168 /* Find stmts that must be both vectorized and SLPed.  */
2169 
2170 void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)2171 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2172 {
2173   unsigned int i;
2174   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2175   slp_instance instance;
2176 
2177   if (dump_enabled_p ())
2178     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2179                      "\n");
2180 
2181   /* First walk all pattern stmt in the loop and mark defs of uses as
2182      hybrid because immediate uses in them are not recorded.  */
2183   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2184     {
2185       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2186       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2187 	   gsi_next (&gsi))
2188 	{
2189 	  gimple *stmt = gsi_stmt (gsi);
2190 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2191 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2192 	    {
2193 	      walk_stmt_info wi;
2194 	      memset (&wi, 0, sizeof (wi));
2195 	      wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2196 	      gimple_stmt_iterator gsi2
2197 		= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2198 	      walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2199 				vect_detect_hybrid_slp_1, &wi);
2200 	      walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2201 			       vect_detect_hybrid_slp_2,
2202 			       vect_detect_hybrid_slp_1, &wi);
2203 	    }
2204 	}
2205     }
2206 
2207   /* Then walk the SLP instance trees marking stmts with uses in
2208      non-SLP stmts as hybrid, also propagating hybrid down the
2209      SLP tree, collecting the above info on-the-fly.  */
2210   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2211     {
2212       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2213 	vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2214 				      i, pure_slp);
2215     }
2216 }
2217 
2218 
2219 /* Create and initialize a new bb_vec_info struct for BB, as well as
2220    stmt_vec_info structs for all the stmts in it.  */
2221 
2222 static bb_vec_info
new_bb_vec_info(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end)2223 new_bb_vec_info (gimple_stmt_iterator region_begin,
2224 		 gimple_stmt_iterator region_end)
2225 {
2226   basic_block bb = gsi_bb (region_begin);
2227   bb_vec_info res = NULL;
2228   gimple_stmt_iterator gsi;
2229 
2230   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2231   res->kind = vec_info::bb;
2232   BB_VINFO_BB (res) = bb;
2233   res->region_begin = region_begin;
2234   res->region_end = region_end;
2235 
2236   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2237        gsi_next (&gsi))
2238     {
2239       gimple *stmt = gsi_stmt (gsi);
2240       gimple_set_uid (stmt, 0);
2241       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2242     }
2243 
2244   BB_VINFO_GROUPED_STORES (res).create (10);
2245   BB_VINFO_SLP_INSTANCES (res).create (2);
2246   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2247 
2248   bb->aux = res;
2249   return res;
2250 }
2251 
2252 
2253 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2254    stmts in the basic block.  */
2255 
2256 static void
destroy_bb_vec_info(bb_vec_info bb_vinfo)2257 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2258 {
2259   slp_instance instance;
2260   unsigned i;
2261 
2262   if (!bb_vinfo)
2263     return;
2264 
2265   vect_destroy_datarefs (bb_vinfo);
2266   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2267   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2268   FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2269     vect_free_slp_instance (instance);
2270   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2271   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2272 
2273   for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2274        gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2275     {
2276       gimple *stmt = gsi_stmt (si);
2277       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2278 
2279       if (stmt_info)
2280         /* Free stmt_vec_info.  */
2281         free_stmt_vec_info (stmt);
2282 
2283       /* Reset region marker.  */
2284       gimple_set_uid (stmt, -1);
2285     }
2286 
2287   BB_VINFO_BB (bb_vinfo)->aux = NULL;
2288   free (bb_vinfo);
2289 }
2290 
2291 
2292 /* Analyze statements contained in SLP tree node after recursively analyzing
2293    the subtree. Return TRUE if the operations are supported.  */
2294 
2295 static bool
vect_slp_analyze_node_operations(slp_tree node)2296 vect_slp_analyze_node_operations (slp_tree node)
2297 {
2298   bool dummy;
2299   int i, j;
2300   gimple *stmt;
2301   slp_tree child;
2302 
2303   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2304     return true;
2305 
2306   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2307     if (!vect_slp_analyze_node_operations (child))
2308       return false;
2309 
2310   bool res = true;
2311   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2312     {
2313       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2314       gcc_assert (stmt_info);
2315       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2316 
2317       /* Push SLP node def-type to stmt operands.  */
2318       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2319 	if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2320 	  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2321 	    = SLP_TREE_DEF_TYPE (child);
2322       res = vect_analyze_stmt (stmt, &dummy, node);
2323       /* Restore def-types.  */
2324       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2325 	if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2326 	  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2327 	    = vect_internal_def;
2328       if (! res)
2329 	break;
2330     }
2331 
2332   return res;
2333 }
2334 
2335 
2336 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2337    operations are supported. */
2338 
2339 bool
vect_slp_analyze_operations(vec<slp_instance> slp_instances,void * data)2340 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2341 {
2342   slp_instance instance;
2343   int i;
2344 
2345   if (dump_enabled_p ())
2346     dump_printf_loc (MSG_NOTE, vect_location,
2347 		     "=== vect_slp_analyze_operations ===\n");
2348 
2349   for (i = 0; slp_instances.iterate (i, &instance); )
2350     {
2351       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2352         {
2353 	  dump_printf_loc (MSG_NOTE, vect_location,
2354 			   "removing SLP instance operations starting from: ");
2355 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2356 			    SLP_TREE_SCALAR_STMTS
2357 			      (SLP_INSTANCE_TREE (instance))[0], 0);
2358 	  vect_free_slp_instance (instance);
2359           slp_instances.ordered_remove (i);
2360 	}
2361       else
2362 	{
2363 	  /* Compute the costs of the SLP instance.  */
2364 	  vect_analyze_slp_cost (instance, data);
2365 	  i++;
2366 	}
2367     }
2368 
2369   if (!slp_instances.length ())
2370     return false;
2371 
2372   return true;
2373 }
2374 
2375 
2376 /* Compute the scalar cost of the SLP node NODE and its children
2377    and return it.  Do not account defs that are marked in LIFE and
2378    update LIFE according to uses of NODE.  */
2379 
2380 static unsigned
vect_bb_slp_scalar_cost(basic_block bb,slp_tree node,vec<bool,va_heap> * life)2381 vect_bb_slp_scalar_cost (basic_block bb,
2382 			 slp_tree node, vec<bool, va_heap> *life)
2383 {
2384   unsigned scalar_cost = 0;
2385   unsigned i;
2386   gimple *stmt;
2387   slp_tree child;
2388 
2389   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2390     {
2391       unsigned stmt_cost;
2392       ssa_op_iter op_iter;
2393       def_operand_p def_p;
2394       stmt_vec_info stmt_info;
2395 
2396       if ((*life)[i])
2397 	continue;
2398 
2399       /* If there is a non-vectorized use of the defs then the scalar
2400          stmt is kept live in which case we do not account it or any
2401 	 required defs in the SLP children in the scalar cost.  This
2402 	 way we make the vectorization more costly when compared to
2403 	 the scalar cost.  */
2404       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2405 	{
2406 	  imm_use_iterator use_iter;
2407 	  gimple *use_stmt;
2408 	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2409 	    if (!is_gimple_debug (use_stmt)
2410 		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2411 					     use_stmt)
2412 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2413 	      {
2414 		(*life)[i] = true;
2415 		BREAK_FROM_IMM_USE_STMT (use_iter);
2416 	      }
2417 	}
2418       if ((*life)[i])
2419 	continue;
2420 
2421       /* Count scalar stmts only once.  */
2422       if (gimple_visited_p (stmt))
2423 	continue;
2424       gimple_set_visited (stmt, true);
2425 
2426       stmt_info = vinfo_for_stmt (stmt);
2427       if (STMT_VINFO_DATA_REF (stmt_info))
2428         {
2429           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2430             stmt_cost = vect_get_stmt_cost (scalar_load);
2431           else
2432             stmt_cost = vect_get_stmt_cost (scalar_store);
2433         }
2434       else
2435         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2436 
2437       scalar_cost += stmt_cost;
2438     }
2439 
2440   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2441     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2442       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2443 
2444   return scalar_cost;
2445 }
2446 
2447 /* Check if vectorization of the basic block is profitable.  */
2448 
2449 static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)2450 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2451 {
2452   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2453   slp_instance instance;
2454   int i;
2455   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2456   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2457 
2458   /* Calculate scalar cost.  */
2459   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2460     {
2461       auto_vec<bool, 20> life;
2462       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2463       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2464 					      SLP_INSTANCE_TREE (instance),
2465 					      &life);
2466     }
2467 
2468   /* Unset visited flag.  */
2469   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2470        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2471     gimple_set_visited  (gsi_stmt (gsi), false);
2472 
2473   /* Complete the target-specific cost calculation.  */
2474   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2475 	       &vec_inside_cost, &vec_epilogue_cost);
2476 
2477   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2478 
2479   if (dump_enabled_p ())
2480     {
2481       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2482       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2483 		   vec_inside_cost);
2484       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2485       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2486       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2487     }
2488 
2489   /* Vectorization is profitable if its cost is more than the cost of scalar
2490      version.  Note that we err on the vector side for equal cost because
2491      the cost estimate is otherwise quite pessimistic (constant uses are
2492      free on the scalar side but cost a load on the vector side for
2493      example).  */
2494   if (vec_outside_cost + vec_inside_cost > scalar_cost)
2495     return false;
2496 
2497   return true;
2498 }
2499 
2500 /* Check if the basic block can be vectorized.  Returns a bb_vec_info
2501    if so and sets fatal to true if failure is independent of
2502    current_vector_size.  */
2503 
2504 static bb_vec_info
vect_slp_analyze_bb_1(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end,vec<data_reference_p> datarefs,int n_stmts,bool & fatal)2505 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2506 		       gimple_stmt_iterator region_end,
2507 		       vec<data_reference_p> datarefs, int n_stmts,
2508 		       bool &fatal)
2509 {
2510   bb_vec_info bb_vinfo;
2511   slp_instance instance;
2512   int i;
2513   int min_vf = 2;
2514 
2515   /* The first group of checks is independent of the vector size.  */
2516   fatal = true;
2517 
2518   if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2519     {
2520       if (dump_enabled_p ())
2521 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 			 "not vectorized: too many instructions in "
2523 			 "basic block.\n");
2524       free_data_refs (datarefs);
2525       return NULL;
2526     }
2527 
2528   bb_vinfo = new_bb_vec_info (region_begin, region_end);
2529   if (!bb_vinfo)
2530     return NULL;
2531 
2532   BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2533 
2534   /* Analyze the data references.  */
2535 
2536   if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2537     {
2538       if (dump_enabled_p ())
2539         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 			 "not vectorized: unhandled data-ref in basic "
2541 			 "block.\n");
2542 
2543       destroy_bb_vec_info (bb_vinfo);
2544       return NULL;
2545     }
2546 
2547   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2548     {
2549       if (dump_enabled_p ())
2550         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 			 "not vectorized: not enough data-refs in "
2552 			 "basic block.\n");
2553 
2554       destroy_bb_vec_info (bb_vinfo);
2555       return NULL;
2556     }
2557 
2558   if (!vect_analyze_data_ref_accesses (bb_vinfo))
2559     {
2560      if (dump_enabled_p ())
2561        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 			"not vectorized: unhandled data access in "
2563 			"basic block.\n");
2564 
2565       destroy_bb_vec_info (bb_vinfo);
2566       return NULL;
2567     }
2568 
2569   /* If there are no grouped stores in the region there is no need
2570      to continue with pattern recog as vect_analyze_slp will fail
2571      anyway.  */
2572   if (bb_vinfo->grouped_stores.is_empty ())
2573     {
2574       if (dump_enabled_p ())
2575 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 			 "not vectorized: no grouped stores in "
2577 			 "basic block.\n");
2578 
2579       destroy_bb_vec_info (bb_vinfo);
2580       return NULL;
2581     }
2582 
2583   /* While the rest of the analysis below depends on it in some way.  */
2584   fatal = false;
2585 
2586   vect_pattern_recog (bb_vinfo);
2587 
2588   /* Check the SLP opportunities in the basic block, analyze and build SLP
2589      trees.  */
2590   if (!vect_analyze_slp (bb_vinfo, n_stmts))
2591     {
2592       if (dump_enabled_p ())
2593 	{
2594 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 			   "Failed to SLP the basic block.\n");
2596 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 			   "not vectorized: failed to find SLP opportunities "
2598 			   "in basic block.\n");
2599 	}
2600 
2601       destroy_bb_vec_info (bb_vinfo);
2602       return NULL;
2603     }
2604 
2605   /* Analyze and verify the alignment of data references and the
2606      dependence in the SLP instances.  */
2607   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2608     {
2609       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2610 	  || ! vect_slp_analyze_instance_dependence (instance))
2611 	{
2612 	  dump_printf_loc (MSG_NOTE, vect_location,
2613 			   "removing SLP instance operations starting from: ");
2614 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2615 			    SLP_TREE_SCALAR_STMTS
2616 			      (SLP_INSTANCE_TREE (instance))[0], 0);
2617 	  vect_free_slp_instance (instance);
2618 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2619 	  continue;
2620 	}
2621 
2622       /* Mark all the statements that we want to vectorize as pure SLP and
2623 	 relevant.  */
2624       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2625       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2626 
2627       i++;
2628     }
2629   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2630     {
2631       destroy_bb_vec_info (bb_vinfo);
2632       return NULL;
2633     }
2634 
2635   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2636 				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2637     {
2638       if (dump_enabled_p ())
2639         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640 			 "not vectorized: bad operation in basic block.\n");
2641 
2642       destroy_bb_vec_info (bb_vinfo);
2643       return NULL;
2644     }
2645 
2646   /* Cost model: check if the vectorization is worthwhile.  */
2647   if (!unlimited_cost_model (NULL)
2648       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2649     {
2650       if (dump_enabled_p ())
2651         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2652 			 "not vectorized: vectorization is not "
2653 			 "profitable.\n");
2654 
2655       destroy_bb_vec_info (bb_vinfo);
2656       return NULL;
2657     }
2658 
2659   if (dump_enabled_p ())
2660     dump_printf_loc (MSG_NOTE, vect_location,
2661 		     "Basic block will be vectorized using SLP\n");
2662 
2663   return bb_vinfo;
2664 }
2665 
2666 
2667 /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
2668    true if anything in the basic-block was vectorized.  */
2669 
2670 bool
vect_slp_bb(basic_block bb)2671 vect_slp_bb (basic_block bb)
2672 {
2673   bb_vec_info bb_vinfo;
2674   gimple_stmt_iterator gsi;
2675   unsigned int vector_sizes;
2676   bool any_vectorized = false;
2677 
2678   if (dump_enabled_p ())
2679     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2680 
2681   /* Autodetect first vector size we try.  */
2682   current_vector_size = 0;
2683   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2684 
2685   gsi = gsi_start_bb (bb);
2686 
2687   while (1)
2688     {
2689       if (gsi_end_p (gsi))
2690 	break;
2691 
2692       gimple_stmt_iterator region_begin = gsi;
2693       vec<data_reference_p> datarefs = vNULL;
2694       int insns = 0;
2695 
2696       for (; !gsi_end_p (gsi); gsi_next (&gsi))
2697 	{
2698 	  gimple *stmt = gsi_stmt (gsi);
2699 	  if (is_gimple_debug (stmt))
2700 	    continue;
2701 	  insns++;
2702 
2703 	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
2704 	    vect_location = gimple_location (stmt);
2705 
2706 	  if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2707 	    break;
2708 	}
2709 
2710       /* Skip leading unhandled stmts.  */
2711       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2712 	{
2713 	  gsi_next (&gsi);
2714 	  continue;
2715 	}
2716 
2717       gimple_stmt_iterator region_end = gsi;
2718 
2719       bool vectorized = false;
2720       bool fatal = false;
2721       bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2722 					datarefs, insns, fatal);
2723       if (bb_vinfo
2724 	  && dbg_cnt (vect_slp))
2725 	{
2726 	  if (dump_enabled_p ())
2727 	    dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2728 
2729 	  vect_schedule_slp (bb_vinfo);
2730 
2731 	  if (dump_enabled_p ())
2732 	    dump_printf_loc (MSG_NOTE, vect_location,
2733 			     "basic block part vectorized\n");
2734 
2735 	  destroy_bb_vec_info (bb_vinfo);
2736 
2737 	  vectorized = true;
2738 	}
2739       else
2740 	destroy_bb_vec_info (bb_vinfo);
2741 
2742       any_vectorized |= vectorized;
2743 
2744       vector_sizes &= ~current_vector_size;
2745       if (vectorized
2746 	  || vector_sizes == 0
2747 	  || current_vector_size == 0
2748 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
2749 	     vector sizes will fail do not bother iterating.  */
2750 	  || fatal)
2751 	{
2752 	  if (gsi_end_p (region_end))
2753 	    break;
2754 
2755 	  /* Skip the unhandled stmt.  */
2756 	  gsi_next (&gsi);
2757 
2758 	  /* And reset vector sizes.  */
2759 	  current_vector_size = 0;
2760 	  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2761 	}
2762       else
2763 	{
2764 	  /* Try the next biggest vector size.  */
2765 	  current_vector_size = 1 << floor_log2 (vector_sizes);
2766 	  if (dump_enabled_p ())
2767 	    dump_printf_loc (MSG_NOTE, vect_location,
2768 			     "***** Re-trying analysis with "
2769 			     "vector size %d\n", current_vector_size);
2770 
2771 	  /* Start over.  */
2772 	  gsi = region_begin;
2773 	}
2774     }
2775 
2776   return any_vectorized;
2777 }
2778 
2779 
2780 /* Return 1 if vector type of boolean constant which is OPNUM
2781    operand in statement STMT is a boolean vector.  */
2782 
2783 static bool
vect_mask_constant_operand_p(gimple * stmt,int opnum)2784 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2785 {
2786   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2787   enum tree_code code = gimple_expr_code (stmt);
2788   tree op, vectype;
2789   gimple *def_stmt;
2790   enum vect_def_type dt;
2791 
2792   /* For comparison and COND_EXPR type is chosen depending
2793      on the other comparison operand.  */
2794   if (TREE_CODE_CLASS (code) == tcc_comparison)
2795     {
2796       if (opnum)
2797 	op = gimple_assign_rhs1 (stmt);
2798       else
2799 	op = gimple_assign_rhs2 (stmt);
2800 
2801       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2802 			       &dt, &vectype))
2803 	gcc_unreachable ();
2804 
2805       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2806     }
2807 
2808   if (code == COND_EXPR)
2809     {
2810       tree cond = gimple_assign_rhs1 (stmt);
2811 
2812       if (TREE_CODE (cond) == SSA_NAME)
2813 	return false;
2814 
2815       if (opnum)
2816 	op = TREE_OPERAND (cond, 1);
2817       else
2818 	op = TREE_OPERAND (cond, 0);
2819 
2820       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2821 			       &dt, &vectype))
2822 	gcc_unreachable ();
2823 
2824       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2825     }
2826 
2827   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2828 }
2829 
2830 
2831 /* For constant and loop invariant defs of SLP_NODE this function returns
2832    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2833    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2834    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2835    REDUC_INDEX is the index of the reduction operand in the statements, unless
2836    it is -1.  */
2837 
2838 static void
vect_get_constant_vectors(tree op,slp_tree slp_node,vec<tree> * vec_oprnds,unsigned int op_num,unsigned int number_of_vectors,int reduc_index)2839 vect_get_constant_vectors (tree op, slp_tree slp_node,
2840                            vec<tree> *vec_oprnds,
2841 			   unsigned int op_num, unsigned int number_of_vectors,
2842                            int reduc_index)
2843 {
2844   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2845   gimple *stmt = stmts[0];
2846   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2847   unsigned nunits;
2848   tree vec_cst;
2849   tree *elts;
2850   unsigned j, number_of_places_left_in_vector;
2851   tree vector_type;
2852   tree vop;
2853   int group_size = stmts.length ();
2854   unsigned int vec_num, i;
2855   unsigned number_of_copies = 1;
2856   vec<tree> voprnds;
2857   voprnds.create (number_of_vectors);
2858   bool constant_p, is_store;
2859   tree neutral_op = NULL;
2860   enum tree_code code = gimple_expr_code (stmt);
2861   gimple *def_stmt;
2862   struct loop *loop;
2863   gimple_seq ctor_seq = NULL;
2864 
2865   /* Check if vector type is a boolean vector.  */
2866   if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
2867       && vect_mask_constant_operand_p (stmt, op_num))
2868     vector_type
2869       = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2870   else
2871     vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2872   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2873 
2874   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2875       && reduc_index != -1)
2876     {
2877       op_num = reduc_index;
2878       op = gimple_op (stmt, op_num + 1);
2879       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2880          we need either neutral operands or the original operands.  See
2881          get_initial_def_for_reduction() for details.  */
2882       switch (code)
2883         {
2884           case WIDEN_SUM_EXPR:
2885           case DOT_PROD_EXPR:
2886 	  case SAD_EXPR:
2887           case PLUS_EXPR:
2888           case MINUS_EXPR:
2889           case BIT_IOR_EXPR:
2890           case BIT_XOR_EXPR:
2891              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2892                neutral_op = build_real (TREE_TYPE (op), dconst0);
2893              else
2894                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2895 
2896              break;
2897 
2898           case MULT_EXPR:
2899              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2900                neutral_op = build_real (TREE_TYPE (op), dconst1);
2901              else
2902                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2903 
2904              break;
2905 
2906           case BIT_AND_EXPR:
2907             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2908             break;
2909 
2910 	  /* For MIN/MAX we don't have an easy neutral operand but
2911 	     the initial values can be used fine here.  Only for
2912 	     a reduction chain we have to force a neutral element.  */
2913 	  case MAX_EXPR:
2914 	  case MIN_EXPR:
2915 	    if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2916 	      neutral_op = NULL;
2917 	    else
2918 	      {
2919 		def_stmt = SSA_NAME_DEF_STMT (op);
2920 		loop = (gimple_bb (stmt))->loop_father;
2921 		neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2922 						    loop_preheader_edge (loop));
2923 	      }
2924 	    break;
2925 
2926           default:
2927 	    gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2928             neutral_op = NULL;
2929         }
2930     }
2931 
2932   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2933     {
2934       is_store = true;
2935       op = gimple_assign_rhs1 (stmt);
2936     }
2937   else
2938     is_store = false;
2939 
2940   gcc_assert (op);
2941 
2942   if (CONSTANT_CLASS_P (op))
2943     constant_p = true;
2944   else
2945     constant_p = false;
2946 
2947   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2948      created vectors. It is greater than 1 if unrolling is performed.
2949 
2950      For example, we have two scalar operands, s1 and s2 (e.g., group of
2951      strided accesses of size two), while NUNITS is four (i.e., four scalars
2952      of this type can be packed in a vector).  The output vector will contain
2953      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2954      will be 2).
2955 
2956      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2957      containing the operands.
2958 
2959      For example, NUNITS is four as before, and the group size is 8
2960      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2961      {s5, s6, s7, s8}.  */
2962 
2963   number_of_copies = nunits * number_of_vectors / group_size;
2964 
2965   number_of_places_left_in_vector = nunits;
2966   elts = XALLOCAVEC (tree, nunits);
2967   bool place_after_defs = false;
2968   for (j = 0; j < number_of_copies; j++)
2969     {
2970       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2971         {
2972           if (is_store)
2973             op = gimple_assign_rhs1 (stmt);
2974           else
2975 	    {
2976 	      switch (code)
2977 		{
2978 		  case COND_EXPR:
2979 		    {
2980 		      tree cond = gimple_assign_rhs1 (stmt);
2981 		      if (TREE_CODE (cond) == SSA_NAME)
2982 			op = gimple_op (stmt, op_num + 1);
2983 		      else if (op_num == 0 || op_num == 1)
2984 			op = TREE_OPERAND (cond, op_num);
2985 		      else
2986 			{
2987 			  if (op_num == 2)
2988 			    op = gimple_assign_rhs2 (stmt);
2989 			  else
2990 			    op = gimple_assign_rhs3 (stmt);
2991 			}
2992 		    }
2993 		    break;
2994 
2995 		  case CALL_EXPR:
2996 		    op = gimple_call_arg (stmt, op_num);
2997 		    break;
2998 
2999 		  case LSHIFT_EXPR:
3000 		  case RSHIFT_EXPR:
3001 		  case LROTATE_EXPR:
3002 		  case RROTATE_EXPR:
3003 		    op = gimple_op (stmt, op_num + 1);
3004 		    /* Unlike the other binary operators, shifts/rotates have
3005 		       the shift count being int, instead of the same type as
3006 		       the lhs, so make sure the scalar is the right type if
3007 		       we are dealing with vectors of
3008 		       long long/long/short/char.  */
3009 		    if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3010 		      op = fold_convert (TREE_TYPE (vector_type), op);
3011 		    break;
3012 
3013 		  default:
3014 		    op = gimple_op (stmt, op_num + 1);
3015 		    break;
3016 		}
3017 	    }
3018 
3019           if (reduc_index != -1)
3020             {
3021               loop = (gimple_bb (stmt))->loop_father;
3022               def_stmt = SSA_NAME_DEF_STMT (op);
3023 
3024               gcc_assert (loop);
3025 
3026               /* Get the def before the loop.  In reduction chain we have only
3027                  one initial value.  */
3028               if ((j != (number_of_copies - 1)
3029                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
3030                        && i != 0))
3031                   && neutral_op)
3032                 op = neutral_op;
3033               else
3034                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3035                                             loop_preheader_edge (loop));
3036             }
3037 
3038           /* Create 'vect_ = {op0,op1,...,opn}'.  */
3039           number_of_places_left_in_vector--;
3040 	  tree orig_op = op;
3041 	  if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3042 	    {
3043 	      if (CONSTANT_CLASS_P (op))
3044 		{
3045 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3046 		    {
3047 		      /* Can't use VIEW_CONVERT_EXPR for booleans because
3048 			 of possibly different sizes of scalar value and
3049 			 vector element.  */
3050 		      if (integer_zerop (op))
3051 			op = build_int_cst (TREE_TYPE (vector_type), 0);
3052 		      else if (integer_onep (op))
3053 			op = build_all_ones_cst (TREE_TYPE (vector_type));
3054 		      else
3055 			gcc_unreachable ();
3056 		    }
3057 		  else
3058 		    op = fold_unary (VIEW_CONVERT_EXPR,
3059 				     TREE_TYPE (vector_type), op);
3060 		  gcc_assert (op && CONSTANT_CLASS_P (op));
3061 		}
3062 	      else
3063 		{
3064 		  tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3065 		  gimple *init_stmt;
3066 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3067 		    {
3068 		      tree true_val
3069 			= build_all_ones_cst (TREE_TYPE (vector_type));
3070 		      tree false_val
3071 			= build_zero_cst (TREE_TYPE (vector_type));
3072 		      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3073 		      init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3074 						       op, true_val,
3075 						       false_val);
3076 		    }
3077 		  else
3078 		    {
3079 		      op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3080 				   op);
3081 		      init_stmt
3082 			= gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3083 					       op);
3084 		    }
3085 		  gimple_seq_add_stmt (&ctor_seq, init_stmt);
3086 		  op = new_temp;
3087 		}
3088 	    }
3089 	  elts[number_of_places_left_in_vector] = op;
3090 	  if (!CONSTANT_CLASS_P (op))
3091 	    constant_p = false;
3092 	  if (TREE_CODE (orig_op) == SSA_NAME
3093 	      && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3094 	      && STMT_VINFO_BB_VINFO (stmt_vinfo)
3095 	      && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3096 		  == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3097 	    place_after_defs = true;
3098 
3099           if (number_of_places_left_in_vector == 0)
3100             {
3101               number_of_places_left_in_vector = nunits;
3102 
3103 	      if (constant_p)
3104 		vec_cst = build_vector (vector_type, elts);
3105 	      else
3106 		{
3107 		  vec<constructor_elt, va_gc> *v;
3108 		  unsigned k;
3109 		  vec_alloc (v, nunits);
3110 		  for (k = 0; k < nunits; ++k)
3111 		    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3112 		  vec_cst = build_constructor (vector_type, v);
3113 		}
3114 	      tree init;
3115 	      gimple_stmt_iterator gsi;
3116 	      if (place_after_defs)
3117 		{
3118 		  gsi = gsi_for_stmt
3119 		          (vect_find_last_scalar_stmt_in_slp (slp_node));
3120 		  init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3121 		}
3122 	      else
3123 		init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3124 	      if (ctor_seq != NULL)
3125 		{
3126 		  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3127 		  gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3128 							GSI_SAME_STMT);
3129 		  ctor_seq = NULL;
3130 		}
3131 	      voprnds.quick_push (init);
3132 	      place_after_defs = false;
3133             }
3134         }
3135     }
3136 
3137   /* Since the vectors are created in the reverse order, we should invert
3138      them.  */
3139   vec_num = voprnds.length ();
3140   for (j = vec_num; j != 0; j--)
3141     {
3142       vop = voprnds[j - 1];
3143       vec_oprnds->quick_push (vop);
3144     }
3145 
3146   voprnds.release ();
3147 
3148   /* In case that VF is greater than the unrolling factor needed for the SLP
3149      group of stmts, NUMBER_OF_VECTORS to be created is greater than
3150      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3151      to replicate the vectors.  */
3152   while (number_of_vectors > vec_oprnds->length ())
3153     {
3154       tree neutral_vec = NULL;
3155 
3156       if (neutral_op)
3157         {
3158           if (!neutral_vec)
3159 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
3160 
3161           vec_oprnds->quick_push (neutral_vec);
3162         }
3163       else
3164         {
3165           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3166             vec_oprnds->quick_push (vop);
3167         }
3168     }
3169 }
3170 
3171 
3172 /* Get vectorized definitions from SLP_NODE that contains corresponding
3173    vectorized def-stmts.  */
3174 
3175 static void
vect_get_slp_vect_defs(slp_tree slp_node,vec<tree> * vec_oprnds)3176 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3177 {
3178   tree vec_oprnd;
3179   gimple *vec_def_stmt;
3180   unsigned int i;
3181 
3182   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3183 
3184   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3185     {
3186       gcc_assert (vec_def_stmt);
3187       vec_oprnd = gimple_get_lhs (vec_def_stmt);
3188       vec_oprnds->quick_push (vec_oprnd);
3189     }
3190 }
3191 
3192 
3193 /* Get vectorized definitions for SLP_NODE.
3194    If the scalar definitions are loop invariants or constants, collect them and
3195    call vect_get_constant_vectors() to create vector stmts.
3196    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3197    must be stored in the corresponding child of SLP_NODE, and we call
3198    vect_get_slp_vect_defs () to retrieve them.  */
3199 
3200 void
vect_get_slp_defs(vec<tree> ops,slp_tree slp_node,vec<vec<tree>> * vec_oprnds,int reduc_index)3201 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3202 		   vec<vec<tree> > *vec_oprnds, int reduc_index)
3203 {
3204   gimple *first_stmt;
3205   int number_of_vects = 0, i;
3206   unsigned int child_index = 0;
3207   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3208   slp_tree child = NULL;
3209   vec<tree> vec_defs;
3210   tree oprnd;
3211   bool vectorized_defs;
3212 
3213   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3214   FOR_EACH_VEC_ELT (ops, i, oprnd)
3215     {
3216       /* For each operand we check if it has vectorized definitions in a child
3217 	 node or we need to create them (for invariants and constants).  We
3218 	 check if the LHS of the first stmt of the next child matches OPRND.
3219 	 If it does, we found the correct child.  Otherwise, we call
3220 	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3221 	 to check this child node for the next operand.  */
3222       vectorized_defs = false;
3223       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3224         {
3225           child = SLP_TREE_CHILDREN (slp_node)[child_index];
3226 
3227 	  /* We have to check both pattern and original def, if available.  */
3228 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3229 	    {
3230 	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3231 	      gimple *related
3232 		= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3233 
3234 	      if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3235 		  || (related
3236 		      && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3237 		{
3238 		  /* The number of vector defs is determined by the number of
3239 		     vector statements in the node from which we get those
3240 		     statements.  */
3241 		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3242 		  vectorized_defs = true;
3243 		  child_index++;
3244 		}
3245 	    }
3246 	  else
3247 	    child_index++;
3248         }
3249 
3250       if (!vectorized_defs)
3251         {
3252           if (i == 0)
3253             {
3254               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3255               /* Number of vector stmts was calculated according to LHS in
3256                  vect_schedule_slp_instance (), fix it by replacing LHS with
3257                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
3258                  details.  */
3259               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3260                                              &rhs_size_unit);
3261               if (rhs_size_unit != lhs_size_unit)
3262                 {
3263                   number_of_vects *= rhs_size_unit;
3264                   number_of_vects /= lhs_size_unit;
3265                 }
3266             }
3267         }
3268 
3269       /* Allocate memory for vectorized defs.  */
3270       vec_defs = vNULL;
3271       vec_defs.create (number_of_vects);
3272 
3273       /* For reduction defs we call vect_get_constant_vectors (), since we are
3274          looking for initial loop invariant values.  */
3275       if (vectorized_defs && reduc_index == -1)
3276         /* The defs are already vectorized.  */
3277 	vect_get_slp_vect_defs (child, &vec_defs);
3278       else
3279         /* Build vectors from scalar defs.  */
3280 	vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3281                                    number_of_vects, reduc_index);
3282 
3283       vec_oprnds->quick_push (vec_defs);
3284 
3285       /* For reductions, we only need initial values.  */
3286       if (reduc_index != -1)
3287         return;
3288     }
3289 }
3290 
3291 
3292 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3293    building a vector of type MASK_TYPE from it) and two input vectors placed in
3294    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3295    shifting by STRIDE elements of DR_CHAIN for every copy.
3296    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3297    copies).
3298    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3299    the created stmts must be inserted.  */
3300 
3301 static inline void
vect_create_mask_and_perm(gimple * stmt,tree mask,int first_vec_indx,int second_vec_indx,gimple_stmt_iterator * gsi,slp_tree node,tree vectype,vec<tree> dr_chain,int ncopies,int vect_stmts_counter)3302 vect_create_mask_and_perm (gimple *stmt,
3303                            tree mask, int first_vec_indx, int second_vec_indx,
3304                            gimple_stmt_iterator *gsi, slp_tree node,
3305                            tree vectype, vec<tree> dr_chain,
3306                            int ncopies, int vect_stmts_counter)
3307 {
3308   tree perm_dest;
3309   gimple *perm_stmt = NULL;
3310   int i, stride_in, stride_out;
3311   tree first_vec, second_vec, data_ref;
3312 
3313   stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3314   stride_in = dr_chain.length () / ncopies;
3315 
3316   /* Initialize the vect stmts of NODE to properly insert the generated
3317      stmts later.  */
3318   for (i = SLP_TREE_VEC_STMTS (node).length ();
3319        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3320     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3321 
3322   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3323   for (i = 0; i < ncopies; i++)
3324     {
3325       first_vec = dr_chain[first_vec_indx];
3326       second_vec = dr_chain[second_vec_indx];
3327 
3328       /* Generate the permute statement if necessary.  */
3329       if (mask)
3330 	{
3331 	  perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3332 					   first_vec, second_vec, mask);
3333 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
3334 	  gimple_set_lhs (perm_stmt, data_ref);
3335 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3336 	}
3337       else
3338 	/* If mask was NULL_TREE generate the requested identity transform.  */
3339 	perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3340 
3341       /* Store the vector statement in NODE.  */
3342       SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
3343 	= perm_stmt;
3344 
3345       first_vec_indx += stride_in;
3346       second_vec_indx += stride_in;
3347     }
3348 }
3349 
3350 
3351 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3352    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3353    permute statements for the SLP node NODE of the SLP instance
3354    SLP_NODE_INSTANCE.  */
3355 
3356 bool
vect_transform_slp_perm_load(slp_tree node,vec<tree> dr_chain,gimple_stmt_iterator * gsi,int vf,slp_instance slp_node_instance,bool analyze_only)3357 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3358                               gimple_stmt_iterator *gsi, int vf,
3359                               slp_instance slp_node_instance, bool analyze_only)
3360 {
3361   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3362   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3363   tree mask_element_type = NULL_TREE, mask_type;
3364   int nunits, vec_index = 0;
3365   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3366   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3367   int unroll_factor, mask_element, ncopies;
3368   unsigned char *mask;
3369   machine_mode mode;
3370 
3371   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3372     return false;
3373 
3374   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3375 
3376   mode = TYPE_MODE (vectype);
3377 
3378   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3379      same size as the vector element being permuted.  */
3380   mask_element_type = lang_hooks.types.type_for_mode
3381 		(int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3382   mask_type = get_vectype_for_scalar_type (mask_element_type);
3383   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3384   mask = XALLOCAVEC (unsigned char, nunits);
3385   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3386 
3387   /* Number of copies is determined by the final vectorization factor
3388      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3389   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3390 
3391   /* Generate permutation masks for every NODE. Number of masks for each NODE
3392      is equal to GROUP_SIZE.
3393      E.g., we have a group of three nodes with three loads from the same
3394      location in each node, and the vector size is 4. I.e., we have a
3395      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3396      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3397      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3398      ...
3399 
3400      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3401      The last mask is illegal since we assume two operands for permute
3402      operation, and the mask element values can't be outside that range.
3403      Hence, the last mask must be converted into {2,5,5,5}.
3404      For the first two permutations we need the first and the second input
3405      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3406      we need the second and the third vectors: {b1,c1,a2,b2} and
3407      {c2,a3,b3,c3}.  */
3408 
3409   int vect_stmts_counter = 0;
3410   int index = 0;
3411   int first_vec_index = -1;
3412   int second_vec_index = -1;
3413   bool noop_p = true;
3414 
3415   for (int j = 0; j < unroll_factor; j++)
3416     {
3417       for (int k = 0; k < group_size; k++)
3418 	{
3419 	  int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3420 		   + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3421 	  vec_index = i / nunits;
3422 	  mask_element = i % nunits;
3423 	  if (vec_index == first_vec_index
3424 	      || first_vec_index == -1)
3425 	    {
3426 	      first_vec_index = vec_index;
3427 	    }
3428 	  else if (vec_index == second_vec_index
3429 		   || second_vec_index == -1)
3430 	    {
3431 	      second_vec_index = vec_index;
3432 	      mask_element += nunits;
3433 	    }
3434 	  else
3435 	    {
3436 	      if (dump_enabled_p ())
3437 		{
3438 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3439 				   "permutation requires at "
3440 				   "least three vectors ");
3441 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3442 				    stmt, 0);
3443 		  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3444 		}
3445 	      return false;
3446 	    }
3447 
3448 	  gcc_assert (mask_element >= 0
3449 		      && mask_element < 2 * nunits);
3450 	  if (mask_element != index)
3451 	    noop_p = false;
3452 	  mask[index++] = mask_element;
3453 
3454 	  if (index == nunits)
3455 	    {
3456 	      if (! noop_p
3457 		  && ! can_vec_perm_p (mode, false, mask))
3458 		{
3459 		  if (dump_enabled_p ())
3460 		    {
3461 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3462 				       vect_location,
3463 				       "unsupported vect permute { ");
3464 		      for (i = 0; i < nunits; ++i)
3465 			dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3466 		      dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3467 		    }
3468 		  return false;
3469 		}
3470 
3471 	      if (!analyze_only)
3472 		{
3473 		  tree mask_vec = NULL_TREE;
3474 
3475 		  if (! noop_p)
3476 		    {
3477 		      tree *mask_elts = XALLOCAVEC (tree, nunits);
3478 		      for (int l = 0; l < nunits; ++l)
3479 			mask_elts[l] = build_int_cst (mask_element_type,
3480 						      mask[l]);
3481 		      mask_vec = build_vector (mask_type, mask_elts);
3482 		    }
3483 
3484 		  if (second_vec_index == -1)
3485 		    second_vec_index = first_vec_index;
3486 		  vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
3487 					     second_vec_index,
3488 					     gsi, node, vectype, dr_chain,
3489 					     ncopies, vect_stmts_counter++);
3490 		}
3491 
3492 	      index = 0;
3493 	      first_vec_index = -1;
3494 	      second_vec_index = -1;
3495 	      noop_p = true;
3496 	    }
3497 	}
3498     }
3499 
3500   return true;
3501 }
3502 
3503 
3504 
3505 /* Vectorize SLP instance tree in postorder.  */
3506 
3507 static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,unsigned int vectorization_factor)3508 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3509                             unsigned int vectorization_factor)
3510 {
3511   gimple *stmt;
3512   bool grouped_store, is_store;
3513   gimple_stmt_iterator si;
3514   stmt_vec_info stmt_info;
3515   unsigned int vec_stmts_size, nunits, group_size;
3516   tree vectype;
3517   int i, j;
3518   slp_tree child;
3519 
3520   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3521     return false;
3522 
3523   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3524     vect_schedule_slp_instance (child, instance, vectorization_factor);
3525 
3526   /* Push SLP node def-type to stmts.  */
3527   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3528     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3529       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3530 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3531 
3532   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3533   stmt_info = vinfo_for_stmt (stmt);
3534 
3535   /* VECTYPE is the type of the destination.  */
3536   vectype = STMT_VINFO_VECTYPE (stmt_info);
3537   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3538   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3539 
3540   /* For each SLP instance calculate number of vector stmts to be created
3541      for the scalar stmts in each node of the SLP tree.  Number of vector
3542      elements in one vector iteration is the number of scalar elements in
3543      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3544      size.
3545      Unless this is a SLP reduction in which case the number of vector
3546      stmts is equal to the number of vector stmts of the children.  */
3547   if (GROUP_FIRST_ELEMENT (stmt_info)
3548       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3549     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3550   else
3551     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3552 
3553   if (!SLP_TREE_VEC_STMTS (node).exists ())
3554     {
3555       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3556       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3557     }
3558 
3559   if (dump_enabled_p ())
3560     {
3561       dump_printf_loc (MSG_NOTE,vect_location,
3562 		       "------>vectorizing SLP node starting from: ");
3563       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3564       dump_printf (MSG_NOTE, "\n");
3565     }
3566 
3567   /* Vectorized stmts go before the last scalar stmt which is where
3568      all uses are ready.  */
3569   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3570 
3571   /* Mark the first element of the reduction chain as reduction to properly
3572      transform the node.  In the analysis phase only the last element of the
3573      chain is marked as reduction.  */
3574   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3575       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3576     {
3577       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3578       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3579     }
3580 
3581   /* Handle two-operation SLP nodes by vectorizing the group with
3582      both operations and then performing a merge.  */
3583   if (SLP_TREE_TWO_OPERATORS (node))
3584     {
3585       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3586       enum tree_code ocode = ERROR_MARK;
3587       gimple *ostmt;
3588       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3589       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3590 	if (gimple_assign_rhs_code (ostmt) != code0)
3591 	  {
3592 	    mask[i] = 1;
3593 	    ocode = gimple_assign_rhs_code (ostmt);
3594 	  }
3595 	else
3596 	  mask[i] = 0;
3597       if (ocode != ERROR_MARK)
3598 	{
3599 	  vec<gimple *> v0;
3600 	  vec<gimple *> v1;
3601 	  unsigned j;
3602 	  tree tmask = NULL_TREE;
3603 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3604 	  v0 = SLP_TREE_VEC_STMTS (node).copy ();
3605 	  SLP_TREE_VEC_STMTS (node).truncate (0);
3606 	  gimple_assign_set_rhs_code (stmt, ocode);
3607 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3608 	  gimple_assign_set_rhs_code (stmt, code0);
3609 	  v1 = SLP_TREE_VEC_STMTS (node).copy ();
3610 	  SLP_TREE_VEC_STMTS (node).truncate (0);
3611 	  tree meltype = build_nonstandard_integer_type
3612 	      (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3613 	  tree mvectype = get_same_sized_vectype (meltype, vectype);
3614 	  unsigned k = 0, l;
3615 	  for (j = 0; j < v0.length (); ++j)
3616 	    {
3617 	      tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3618 	      for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3619 		{
3620 		  if (k >= group_size)
3621 		    k = 0;
3622 		  melts[l] = build_int_cst
3623 		      (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3624 		}
3625 	      tmask = build_vector (mvectype, melts);
3626 
3627 	      /* ???  Not all targets support a VEC_PERM_EXPR with a
3628 	         constant mask that would translate to a vec_merge RTX
3629 		 (with their vec_perm_const_ok).  We can either not
3630 		 vectorize in that case or let veclower do its job.
3631 		 Unfortunately that isn't too great and at least for
3632 		 plus/minus we'd eventually like to match targets
3633 		 vector addsub instructions.  */
3634 	      gimple *vstmt;
3635 	      vstmt = gimple_build_assign (make_ssa_name (vectype),
3636 					   VEC_PERM_EXPR,
3637 					   gimple_assign_lhs (v0[j]),
3638 					   gimple_assign_lhs (v1[j]), tmask);
3639 	      vect_finish_stmt_generation (stmt, vstmt, &si);
3640 	      SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3641 	    }
3642 	  v0.release ();
3643 	  v1.release ();
3644 	  return false;
3645 	}
3646     }
3647   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3648 
3649   /* Restore stmt def-types.  */
3650   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3651     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3652       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3653 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3654 
3655   return is_store;
3656 }
3657 
3658 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3659    For loop vectorization this is done in vectorizable_call, but for SLP
3660    it needs to be deferred until end of vect_schedule_slp, because multiple
3661    SLP instances may refer to the same scalar stmt.  */
3662 
3663 static void
vect_remove_slp_scalar_calls(slp_tree node)3664 vect_remove_slp_scalar_calls (slp_tree node)
3665 {
3666   gimple *stmt, *new_stmt;
3667   gimple_stmt_iterator gsi;
3668   int i;
3669   slp_tree child;
3670   tree lhs;
3671   stmt_vec_info stmt_info;
3672 
3673   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3674     return;
3675 
3676   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3677     vect_remove_slp_scalar_calls (child);
3678 
3679   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3680     {
3681       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3682 	continue;
3683       stmt_info = vinfo_for_stmt (stmt);
3684       if (stmt_info == NULL
3685 	  || is_pattern_stmt_p (stmt_info)
3686 	  || !PURE_SLP_STMT (stmt_info))
3687 	continue;
3688       lhs = gimple_call_lhs (stmt);
3689       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3690       set_vinfo_for_stmt (new_stmt, stmt_info);
3691       set_vinfo_for_stmt (stmt, NULL);
3692       STMT_VINFO_STMT (stmt_info) = new_stmt;
3693       gsi = gsi_for_stmt (stmt);
3694       gsi_replace (&gsi, new_stmt, false);
3695       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3696     }
3697 }
3698 
3699 /* Generate vector code for all SLP instances in the loop/basic block.  */
3700 
3701 bool
vect_schedule_slp(vec_info * vinfo)3702 vect_schedule_slp (vec_info *vinfo)
3703 {
3704   vec<slp_instance> slp_instances;
3705   slp_instance instance;
3706   unsigned int i, vf;
3707   bool is_store = false;
3708 
3709   slp_instances = vinfo->slp_instances;
3710   if (is_a <loop_vec_info> (vinfo))
3711     vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3712   else
3713     vf = 1;
3714 
3715   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3716     {
3717       /* Schedule the tree of INSTANCE.  */
3718       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3719                                              instance, vf);
3720       if (dump_enabled_p ())
3721 	dump_printf_loc (MSG_NOTE, vect_location,
3722                          "vectorizing stmts using SLP.\n");
3723     }
3724 
3725   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3726     {
3727       slp_tree root = SLP_INSTANCE_TREE (instance);
3728       gimple *store;
3729       unsigned int j;
3730       gimple_stmt_iterator gsi;
3731 
3732       /* Remove scalar call stmts.  Do not do this for basic-block
3733 	 vectorization as not all uses may be vectorized.
3734 	 ???  Why should this be necessary?  DCE should be able to
3735 	 remove the stmts itself.
3736 	 ???  For BB vectorization we can as well remove scalar
3737 	 stmts starting from the SLP tree root if they have no
3738 	 uses.  */
3739       if (is_a <loop_vec_info> (vinfo))
3740 	vect_remove_slp_scalar_calls (root);
3741 
3742       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3743                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3744         {
3745           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3746             break;
3747 
3748          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3749            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3750           /* Free the attached stmt_vec_info and remove the stmt.  */
3751           gsi = gsi_for_stmt (store);
3752 	  unlink_stmt_vdef (store);
3753           gsi_remove (&gsi, true);
3754 	  release_defs (store);
3755           free_stmt_vec_info (store);
3756         }
3757     }
3758 
3759   return is_store;
3760 }
3761