1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
54 
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58   value_range_kind vr_type = get_range_info (var, min_value, max_value);
59   wide_int nonzero = get_nonzero_bits (var);
60   signop sgn = TYPE_SIGN (TREE_TYPE (var));
61   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 					 nonzero, sgn) == VR_RANGE)
63     {
64       if (dump_enabled_p ())
65 	{
66 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 	  dump_printf (MSG_NOTE, " has range [");
68 	  dump_hex (MSG_NOTE, *min_value);
69 	  dump_printf (MSG_NOTE, ", ");
70 	  dump_hex (MSG_NOTE, *max_value);
71 	  dump_printf (MSG_NOTE, "]\n");
72 	}
73       return true;
74     }
75   else
76     {
77       if (dump_enabled_p ())
78 	{
79 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 	  dump_printf (MSG_NOTE, " has no range info\n");
81 	}
82       return false;
83     }
84 }
85 
86 /* Report that we've found an instance of pattern PATTERN in
87    statement STMT.  */
88 
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92   if (dump_enabled_p ())
93     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95 
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97    return the pattern statement's stmt_vec_info.  Set its vector type to
98    VECTYPE if it doesn't have one already.  */
99 
100 static stmt_vec_info
vect_init_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 			tree vectype)
103 {
104   vec_info *vinfo = orig_stmt_info->vinfo;
105   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
106   if (pattern_stmt_info == NULL)
107     pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
108   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109 
110   pattern_stmt_info->pattern_stmt_p = true;
111   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115     {
116       gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
117 		  == vect_use_mask_type_p (orig_stmt_info));
118       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
119       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
120     }
121   return pattern_stmt_info;
122 }
123 
124 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
125    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
126    have one already.  */
127 
128 static void
vect_set_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)129 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
130 		       tree vectype)
131 {
132   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
133   STMT_VINFO_RELATED_STMT (orig_stmt_info)
134     = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
135 }
136 
137 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
138    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
139    be different from the vector type of the final pattern statement.
140    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
141    from which it was derived.  */
142 
143 static inline void
144 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
145 			tree vectype = NULL_TREE,
146 			tree scalar_type_for_mask = NULL_TREE)
147 {
148   gcc_assert (!scalar_type_for_mask
149 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150   vec_info *vinfo = stmt_info->vinfo;
151   if (vectype)
152     {
153       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
154       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
155       if (scalar_type_for_mask)
156 	new_stmt_info->mask_precision
157 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
158     }
159   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
160 				      new_stmt);
161 }
162 
163 /* The caller wants to perform new operations on vect_external variable
164    VAR, so that the result of the operations would also be vect_external.
165    Return the edge on which the operations can be performed, if one exists.
166    Return null if the operations should instead be treated as part of
167    the pattern that needs them.  */
168 
169 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)170 vect_get_external_def_edge (vec_info *vinfo, tree var)
171 {
172   edge e = NULL;
173   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
174     {
175       e = loop_preheader_edge (loop_vinfo->loop);
176       if (!SSA_NAME_IS_DEFAULT_DEF (var))
177 	{
178 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
179 	  if (bb == NULL
180 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
181 	    e = NULL;
182 	}
183     }
184   return e;
185 }
186 
187 /* Return true if the target supports a vector version of CODE,
188    where CODE is known to map to a direct optab.  ITYPE specifies
189    the type of (some of) the scalar inputs and OTYPE specifies the
190    type of the scalar result.
191 
192    If CODE allows the inputs and outputs to have different type
193    (such as for WIDEN_SUM_EXPR), it is the input mode rather
194    than the output mode that determines the appropriate target pattern.
195    Operand 0 of the target pattern then specifies the mode that the output
196    must have.
197 
198    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
199    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
200    is nonnull.  */
201 
202 static bool
203 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
204 				 tree itype, tree *vecotype_out,
205 				 tree *vecitype_out = NULL)
206 {
207   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
208   if (!vecitype)
209     return false;
210 
211   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
212   if (!vecotype)
213     return false;
214 
215   optab optab = optab_for_tree_code (code, vecitype, optab_default);
216   if (!optab)
217     return false;
218 
219   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
220   if (icode == CODE_FOR_nothing
221       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
222     return false;
223 
224   *vecotype_out = vecotype;
225   if (vecitype_out)
226     *vecitype_out = vecitype;
227   return true;
228 }
229 
230 /* Round bit precision PRECISION up to a full element.  */
231 
232 static unsigned int
vect_element_precision(unsigned int precision)233 vect_element_precision (unsigned int precision)
234 {
235   precision = 1 << ceil_log2 (precision);
236   return MAX (precision, BITS_PER_UNIT);
237 }
238 
239 /* If OP is defined by a statement that's being considered for vectorization,
240    return information about that statement, otherwise return NULL.  */
241 
242 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)243 vect_get_internal_def (vec_info *vinfo, tree op)
244 {
245   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
246   if (def_stmt_info
247       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
248     return def_stmt_info;
249   return NULL;
250 }
251 
252 /* Check whether NAME, an ssa-name used in STMT_VINFO,
253    is a result of a type promotion, such that:
254      DEF_STMT: NAME = NOP (name0)
255    If CHECK_SIGN is TRUE, check that either both types are signed or both are
256    unsigned.  */
257 
258 static bool
type_conversion_p(tree name,stmt_vec_info stmt_vinfo,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)259 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
260 		   tree *orig_type, gimple **def_stmt, bool *promotion)
261 {
262   tree type = TREE_TYPE (name);
263   tree oprnd0;
264   enum vect_def_type dt;
265 
266   stmt_vec_info def_stmt_info;
267   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
268 			   def_stmt))
269     return false;
270 
271   if (dt != vect_internal_def
272       && dt != vect_external_def && dt != vect_constant_def)
273     return false;
274 
275   if (!*def_stmt)
276     return false;
277 
278   if (!is_gimple_assign (*def_stmt))
279     return false;
280 
281   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282     return false;
283 
284   oprnd0 = gimple_assign_rhs1 (*def_stmt);
285 
286   *orig_type = TREE_TYPE (oprnd0);
287   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289     return false;
290 
291   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292     *promotion = true;
293   else
294     *promotion = false;
295 
296   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297     return false;
298 
299   return true;
300 }
301 
302 /* Holds information about an input operand after some sign changes
303    and type promotions have been peeled away.  */
304 class vect_unpromoted_value {
305 public:
306   vect_unpromoted_value ();
307 
308   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309 
310   /* The value obtained after peeling away zero or more casts.  */
311   tree op;
312 
313   /* The type of OP.  */
314   tree type;
315 
316   /* The definition type of OP.  */
317   vect_def_type dt;
318 
319   /* If OP is the result of peeling at least one cast, and if the cast
320      of OP itself is a vectorizable statement, CASTER identifies that
321      statement, otherwise it is null.  */
322   stmt_vec_info caster;
323 };
324 
vect_unpromoted_value()325 inline vect_unpromoted_value::vect_unpromoted_value ()
326   : op (NULL_TREE),
327     type (NULL_TREE),
328     dt (vect_uninitialized_def),
329     caster (NULL)
330 {
331 }
332 
333 /* Set the operand to OP_IN, its definition type to DT_IN, and the
334    statement that casts it to CASTER_IN.  */
335 
336 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)337 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
338 			       stmt_vec_info caster_in)
339 {
340   op = op_in;
341   type = TREE_TYPE (op);
342   dt = dt_in;
343   caster = caster_in;
344 }
345 
346 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
347    to reach some vectorizable inner operand OP', continuing as long as it
348    is possible to convert OP' back to OP using a possible sign change
349    followed by a possible promotion P.  Return this OP', or null if OP is
350    not a vectorizable SSA name.  If there is a promotion P, describe its
351    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
352    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
353    have more than one user.
354 
355    A successful return means that it is possible to go from OP' to OP
356    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
357    whereas the cast from UNPROM to OP might be a promotion, a sign
358    change, or a nop.
359 
360    E.g. say we have:
361 
362        signed short *ptr = ...;
363        signed short C = *ptr;
364        unsigned short B = (unsigned short) C;    // sign change
365        signed int A = (signed int) B;            // unsigned promotion
366        ...possible other uses of A...
367        unsigned int OP = (unsigned int) A;       // sign change
368 
369    In this case it's possible to go directly from C to OP using:
370 
371        OP = (unsigned int) (unsigned short) C;
372 	    +------------+ +--------------+
373 	       promotion      sign change
374 
375    so OP' would be C.  The input to the promotion is B, so UNPROM
376    would describe B.  */
377 
378 static tree
379 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
380 				      vect_unpromoted_value *unprom,
381 				      bool *single_use_p = NULL)
382 {
383   tree res = NULL_TREE;
384   tree op_type = TREE_TYPE (op);
385   unsigned int orig_precision = TYPE_PRECISION (op_type);
386   unsigned int min_precision = orig_precision;
387   stmt_vec_info caster = NULL;
388   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
389     {
390       /* See whether OP is simple enough to vectorize.  */
391       stmt_vec_info def_stmt_info;
392       gimple *def_stmt;
393       vect_def_type dt;
394       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
395 	break;
396 
397       /* If OP is the input of a demotion, skip over it to see whether
398 	 OP is itself the result of a promotion.  If so, the combined
399 	 effect of the promotion and the demotion might fit the required
400 	 pattern, otherwise neither operation fits.
401 
402 	 This copes with cases such as the result of an arithmetic
403 	 operation being truncated before being stored, and where that
404 	 arithmetic operation has been recognized as an over-widened one.  */
405       if (TYPE_PRECISION (op_type) <= min_precision)
406 	{
407 	  /* Use OP as the UNPROM described above if we haven't yet
408 	     found a promotion, or if using the new input preserves the
409 	     sign of the previous promotion.  */
410 	  if (!res
411 	      || TYPE_PRECISION (unprom->type) == orig_precision
412 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
413 	    {
414 	      unprom->set_op (op, dt, caster);
415 	      min_precision = TYPE_PRECISION (op_type);
416 	    }
417 	  /* Stop if we've already seen a promotion and if this
418 	     conversion does more than change the sign.  */
419 	  else if (TYPE_PRECISION (op_type)
420 		   != TYPE_PRECISION (unprom->type))
421 	    break;
422 
423 	  /* The sequence now extends to OP.  */
424 	  res = op;
425 	}
426 
427       /* See whether OP is defined by a cast.  Record it as CASTER if
428 	 the cast is potentially vectorizable.  */
429       if (!def_stmt)
430 	break;
431       caster = def_stmt_info;
432 
433       /* Ignore pattern statements, since we don't link uses for them.  */
434       if (caster
435 	  && single_use_p
436 	  && !STMT_VINFO_RELATED_STMT (caster)
437 	  && !has_single_use (res))
438 	*single_use_p = false;
439 
440       gassign *assign = dyn_cast <gassign *> (def_stmt);
441       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
442 	break;
443 
444       /* Continue with the input to the cast.  */
445       op = gimple_assign_rhs1 (def_stmt);
446       op_type = TREE_TYPE (op);
447     }
448   return res;
449 }
450 
451 /* OP is an integer operand to an operation that returns TYPE, and we
452    want to treat the operation as a widening one.  So far we can treat
453    it as widening from *COMMON_TYPE.
454 
455    Return true if OP is suitable for such a widening operation,
456    either widening from *COMMON_TYPE or from some supertype of it.
457    Update *COMMON_TYPE to the supertype in the latter case.
458 
459    SHIFT_P is true if OP is a shift amount.  */
460 
461 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)462 vect_joust_widened_integer (tree type, bool shift_p, tree op,
463 			    tree *common_type)
464 {
465   /* Calculate the minimum precision required by OP, without changing
466      the sign of either operand.  */
467   unsigned int precision;
468   if (shift_p)
469     {
470       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
471 	return false;
472       precision = TREE_INT_CST_LOW (op);
473     }
474   else
475     {
476       precision = wi::min_precision (wi::to_widest (op),
477 				     TYPE_SIGN (*common_type));
478       if (precision * 2 > TYPE_PRECISION (type))
479 	return false;
480     }
481 
482   /* If OP requires a wider type, switch to that type.  The checks
483      above ensure that this is still narrower than the result.  */
484   precision = vect_element_precision (precision);
485   if (TYPE_PRECISION (*common_type) < precision)
486     *common_type = build_nonstandard_integer_type
487       (precision, TYPE_UNSIGNED (*common_type));
488   return true;
489 }
490 
491 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
492    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
493 
494 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)495 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
496 {
497   if (types_compatible_p (*common_type, new_type))
498     return true;
499 
500   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
501   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
502       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
503     return true;
504 
505   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
506   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
507       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
508     {
509       *common_type = new_type;
510       return true;
511     }
512 
513   /* We have mismatched signs, with the signed type being
514      no wider than the unsigned type.  In this case we need
515      a wider signed type.  */
516   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
517 				TYPE_PRECISION (new_type));
518   precision *= 2;
519   if (precision * 2 > TYPE_PRECISION (type))
520     return false;
521 
522   *common_type = build_nonstandard_integer_type (precision, false);
523   return true;
524 }
525 
526 /* Check whether STMT_INFO can be viewed as a tree of integer operations
527    in which each node either performs CODE or WIDENED_CODE, and where
528    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
529    specifies the maximum number of leaf operands.  SHIFT_P says whether
530    CODE and WIDENED_CODE are some sort of shift.
531 
532    If STMT_INFO is such a tree, return the number of leaf operands
533    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
534    to a type that (a) is narrower than the result of STMT_INFO and
535    (b) can hold all leaf operand values.
536 
537    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
538    exists.  */
539 
540 static unsigned int
vect_widened_op_tree(stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)541 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
542 		      tree_code widened_code, bool shift_p,
543 		      unsigned int max_nops,
544 		      vect_unpromoted_value *unprom, tree *common_type)
545 {
546   /* Check for an integer operation with the right code.  */
547   vec_info *vinfo = stmt_info->vinfo;
548   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
549   if (!assign)
550     return 0;
551 
552   tree_code rhs_code = gimple_assign_rhs_code (assign);
553   if (rhs_code != code && rhs_code != widened_code)
554     return 0;
555 
556   tree type = gimple_expr_type (assign);
557   if (!INTEGRAL_TYPE_P (type))
558     return 0;
559 
560   /* Assume that both operands will be leaf operands.  */
561   max_nops -= 2;
562 
563   /* Check the operands.  */
564   unsigned int next_op = 0;
565   for (unsigned int i = 0; i < 2; ++i)
566     {
567       vect_unpromoted_value *this_unprom = &unprom[next_op];
568       unsigned int nops = 1;
569       tree op = gimple_op (assign, i + 1);
570       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
571 	{
572 	  /* We already have a common type from earlier operands.
573 	     Update it to account for OP.  */
574 	  this_unprom->set_op (op, vect_constant_def);
575 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
576 	    return 0;
577 	}
578       else
579 	{
580 	  /* Only allow shifts by constants.  */
581 	  if (shift_p && i == 1)
582 	    return 0;
583 
584 	  if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
585 						     this_unprom))
586 	    return 0;
587 
588 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
589 	    {
590 	      /* The operand isn't widened.  If STMT_INFO has the code
591 		 for an unwidened operation, recursively check whether
592 		 this operand is a node of the tree.  */
593 	      if (rhs_code != code
594 		  || max_nops == 0
595 		  || this_unprom->dt != vect_internal_def)
596 		return 0;
597 
598 	      /* Give back the leaf slot allocated above now that we're
599 		 not treating this as a leaf operand.  */
600 	      max_nops += 1;
601 
602 	      /* Recursively process the definition of the operand.  */
603 	      stmt_vec_info def_stmt_info
604 		= vinfo->lookup_def (this_unprom->op);
605 	      nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
606 					   shift_p, max_nops, this_unprom,
607 					   common_type);
608 	      if (nops == 0)
609 		return 0;
610 
611 	      max_nops -= nops;
612 	    }
613 	  else
614 	    {
615 	      /* Make sure that the operand is narrower than the result.  */
616 	      if (TYPE_PRECISION (this_unprom->type) * 2
617 		  > TYPE_PRECISION (type))
618 		return 0;
619 
620 	      /* Update COMMON_TYPE for the new operand.  */
621 	      if (i == 0)
622 		*common_type = this_unprom->type;
623 	      else if (!vect_joust_widened_type (type, this_unprom->type,
624 						 common_type))
625 		return 0;
626 	    }
627 	}
628       next_op += nops;
629     }
630   return next_op;
631 }
632 
633 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
634    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
635 
636 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)637 vect_recog_temp_ssa_var (tree type, gimple *stmt)
638 {
639   return make_temp_ssa_name (type, stmt, "patt");
640 }
641 
642 /* STMT2_INFO describes a type conversion that could be split into STMT1
643    followed by a version of STMT2_INFO that takes NEW_RHS as its first
644    input.  Try to do this using pattern statements, returning true on
645    success.  */
646 
647 static bool
vect_split_statement(stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)648 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
649 		      gimple *stmt1, tree vectype)
650 {
651   vec_info *vinfo = stmt2_info->vinfo;
652   if (is_pattern_stmt_p (stmt2_info))
653     {
654       /* STMT2_INFO is part of a pattern.  Get the statement to which
655 	 the pattern is attached.  */
656       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
657       vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
658 
659       if (dump_enabled_p ())
660 	dump_printf_loc (MSG_NOTE, vect_location,
661 			 "Splitting pattern statement: %G", stmt2_info->stmt);
662 
663       /* Since STMT2_INFO is a pattern statement, we can change it
664 	 in-situ without worrying about changing the code for the
665 	 containing block.  */
666       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
667 
668       if (dump_enabled_p ())
669 	{
670 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
671 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
672 			   stmt2_info->stmt);
673 	}
674 
675       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
676       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
677 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
678 	   to the end of the definition sequence.  */
679 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
680       else
681 	{
682 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
683 	     before it.  */
684 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
685 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 	}
687       return true;
688     }
689   else
690     {
691       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
692 	 two-statement pattern now.  */
693       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
694       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
695       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
696       if (!lhs_vectype)
697 	return false;
698 
699       if (dump_enabled_p ())
700 	dump_printf_loc (MSG_NOTE, vect_location,
701 			 "Splitting statement: %G", stmt2_info->stmt);
702 
703       /* Add STMT1 as a singleton pattern definition sequence.  */
704       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
705       vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
706       gimple_seq_add_stmt_without_update (def_seq, stmt1);
707 
708       /* Build the second of the two pattern statements.  */
709       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
710       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
711       vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
712 
713       if (dump_enabled_p ())
714 	{
715 	  dump_printf_loc (MSG_NOTE, vect_location,
716 			   "into pattern statements: %G", stmt1);
717 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
718 	}
719 
720       return true;
721     }
722 }
723 
724 /* Convert UNPROM to TYPE and return the result, adding new statements
725    to STMT_INFO's pattern definition statements if no better way is
726    available.  VECTYPE is the vector form of TYPE.  */
727 
728 static tree
vect_convert_input(stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)729 vect_convert_input (stmt_vec_info stmt_info, tree type,
730 		    vect_unpromoted_value *unprom, tree vectype)
731 {
732   vec_info *vinfo = stmt_info->vinfo;
733 
734   /* Check for a no-op conversion.  */
735   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
736     return unprom->op;
737 
738   /* Allow the caller to create constant vect_unpromoted_values.  */
739   if (TREE_CODE (unprom->op) == INTEGER_CST)
740     return wide_int_to_tree (type, wi::to_widest (unprom->op));
741 
742   tree input = unprom->op;
743   if (unprom->caster)
744     {
745       tree lhs = gimple_get_lhs (unprom->caster->stmt);
746       tree lhs_type = TREE_TYPE (lhs);
747 
748       /* If the result of the existing cast is the right width, use it
749 	 instead of the source of the cast.  */
750       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
751 	input = lhs;
752       /* If the precision we want is between the source and result
753 	 precisions of the existing cast, try splitting the cast into
754 	 two and tapping into a mid-way point.  */
755       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
756 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
757 	{
758 	  /* In order to preserve the semantics of the original cast,
759 	     give the mid-way point the same signedness as the input value.
760 
761 	     It would be possible to use a signed type here instead if
762 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
763 	     make the sign of the midtype sensitive to the order in
764 	     which we process the statements, since the signedness of
765 	     TYPE is the signedness required by just one of possibly
766 	     many users.  Also, unsigned promotions are usually as cheap
767 	     as or cheaper than signed ones, so it's better to keep an
768 	     unsigned promotion.  */
769 	  tree midtype = build_nonstandard_integer_type
770 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
771 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
772 	  if (vec_midtype)
773 	    {
774 	      input = vect_recog_temp_ssa_var (midtype, NULL);
775 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
776 						       unprom->op);
777 	      if (!vect_split_statement (unprom->caster, input, new_stmt,
778 					 vec_midtype))
779 		append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
780 	    }
781 	}
782 
783       /* See if we can reuse an existing result.  */
784       if (types_compatible_p (type, TREE_TYPE (input)))
785 	return input;
786     }
787 
788   /* We need a new conversion statement.  */
789   tree new_op = vect_recog_temp_ssa_var (type, NULL);
790   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
791 
792   /* If OP is an external value, see if we can insert the new statement
793      on an incoming edge.  */
794   if (input == unprom->op && unprom->dt == vect_external_def)
795     if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
796       {
797 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
798 	gcc_assert (!new_bb);
799 	return new_op;
800       }
801 
802   /* As a (common) last resort, add the statement to the pattern itself.  */
803   append_pattern_def_seq (stmt_info, new_stmt, vectype);
804   return new_op;
805 }
806 
807 /* Invoke vect_convert_input for N elements of UNPROM and store the
808    result in the corresponding elements of RESULT.  */
809 
810 static void
vect_convert_inputs(stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)811 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
812 		     tree *result, tree type, vect_unpromoted_value *unprom,
813 		     tree vectype)
814 {
815   for (unsigned int i = 0; i < n; ++i)
816     {
817       unsigned int j;
818       for (j = 0; j < i; ++j)
819 	if (unprom[j].op == unprom[i].op)
820 	  break;
821       if (j < i)
822 	result[i] = result[j];
823       else
824 	result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
825     }
826 }
827 
828 /* The caller has created a (possibly empty) sequence of pattern definition
829    statements followed by a single statement PATTERN_STMT.  Cast the result
830    of this final statement to TYPE.  If a new statement is needed, add
831    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
832    and return the new statement, otherwise return PATTERN_STMT as-is.
833    VECITYPE is the vector form of PATTERN_STMT's result type.  */
834 
835 static gimple *
vect_convert_output(stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)836 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
837 		     tree vecitype)
838 {
839   tree lhs = gimple_get_lhs (pattern_stmt);
840   if (!types_compatible_p (type, TREE_TYPE (lhs)))
841     {
842       append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
843       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
844       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
845     }
846   return pattern_stmt;
847 }
848 
849 /* Return true if STMT_VINFO describes a reduction for which reassociation
850    is allowed.  If STMT_INFO is part of a group, assume that it's part of
851    a reduction chain and optimistically assume that all statements
852    except the last allow reassociation.
853    Also require it to have code CODE and to be a reduction
854    in the outermost loop.  When returning true, store the operands in
855    *OP0_OUT and *OP1_OUT.  */
856 
857 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)858 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
859 				tree *op0_out, tree *op1_out)
860 {
861   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
862   if (!loop_info)
863     return false;
864 
865   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
866   if (!assign || gimple_assign_rhs_code (assign) != code)
867     return false;
868 
869   /* We don't allow changing the order of the computation in the inner-loop
870      when doing outer-loop vectorization.  */
871   class loop *loop = LOOP_VINFO_LOOP (loop_info);
872   if (loop && nested_in_vect_loop_p (loop, stmt_info))
873     return false;
874 
875   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
876     {
877       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
878 				       code))
879 	return false;
880     }
881   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
882     return false;
883 
884   *op0_out = gimple_assign_rhs1 (assign);
885   *op1_out = gimple_assign_rhs2 (assign);
886   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
887     std::swap (*op0_out, *op1_out);
888   return true;
889 }
890 
891 /* Function vect_recog_dot_prod_pattern
892 
893    Try to find the following pattern:
894 
895      type x_t, y_t;
896      TYPE1 prod;
897      TYPE2 sum = init;
898    loop:
899      sum_0 = phi <init, sum_1>
900      S1  x_t = ...
901      S2  y_t = ...
902      S3  x_T = (TYPE1) x_t;
903      S4  y_T = (TYPE1) y_t;
904      S5  prod = x_T * y_T;
905      [S6  prod = (TYPE2) prod;  #optional]
906      S7  sum_1 = prod + sum_0;
907 
908    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
909    same size of 'TYPE1' or bigger. This is a special case of a reduction
910    computation.
911 
912    Input:
913 
914    * STMT_VINFO: The stmt from which the pattern search begins.  In the
915    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
916    will be detected.
917 
918    Output:
919 
920    * TYPE_OUT: The type of the output  of this pattern.
921 
922    * Return value: A new stmt that will be used to replace the sequence of
923    stmts that constitute the pattern. In this case it will be:
924         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
925 
926    Note: The dot-prod idiom is a widening reduction pattern that is
927          vectorized without preserving all the intermediate results. It
928          produces only N/2 (widened) results (by summing up pairs of
929          intermediate results) rather than all N results.  Therefore, we
930          cannot allow this pattern when we want to get all the results and in
931          the correct order (as is the case when this computation is in an
932          inner-loop nested in an outer-loop that us being vectorized).  */
933 
934 static gimple *
vect_recog_dot_prod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)935 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
936 {
937   tree oprnd0, oprnd1;
938   gimple *last_stmt = stmt_vinfo->stmt;
939   vec_info *vinfo = stmt_vinfo->vinfo;
940   tree type, half_type;
941   gimple *pattern_stmt;
942   tree var;
943 
944   /* Look for the following pattern
945           DX = (TYPE1) X;
946           DY = (TYPE1) Y;
947           DPROD = DX * DY;
948           DDPROD = (TYPE2) DPROD;
949           sum_1 = DDPROD + sum_0;
950      In which
951      - DX is double the size of X
952      - DY is double the size of Y
953      - DX, DY, DPROD all have the same type
954      - sum is the same size of DPROD or bigger
955      - sum has been recognized as a reduction variable.
956 
957      This is equivalent to:
958        DPROD = X w* Y;          #widen mult
959        sum_1 = DPROD w+ sum_0;  #widen summation
960      or
961        DPROD = X w* Y;          #widen mult
962        sum_1 = DPROD + sum_0;   #summation
963    */
964 
965   /* Starting from LAST_STMT, follow the defs of its uses in search
966      of the above pattern.  */
967 
968   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
969 				       &oprnd0, &oprnd1))
970     return NULL;
971 
972   type = gimple_expr_type (last_stmt);
973 
974   vect_unpromoted_value unprom_mult;
975   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
976 
977   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
978      we know that oprnd1 is the reduction variable (defined by a loop-header
979      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
980      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
981   if (!oprnd0)
982     return NULL;
983 
984   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
985   if (!mult_vinfo)
986     return NULL;
987 
988   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
989      inside the loop (in case we are analyzing an outer-loop).  */
990   vect_unpromoted_value unprom0[2];
991   if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
992 			     false, 2, unprom0, &half_type))
993     return NULL;
994 
995   /* If there are two widening operations, make sure they agree on
996      the sign of the extension.  */
997   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
998       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
999     return NULL;
1000 
1001   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1002 
1003   tree half_vectype;
1004   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1005 					type_out, &half_vectype))
1006     return NULL;
1007 
1008   /* Get the inputs in the appropriate types.  */
1009   tree mult_oprnd[2];
1010   vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
1011 		       unprom0, half_vectype);
1012 
1013   var = vect_recog_temp_ssa_var (type, NULL);
1014   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1015 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
1016 
1017   return pattern_stmt;
1018 }
1019 
1020 
1021 /* Function vect_recog_sad_pattern
1022 
1023    Try to find the following Sum of Absolute Difference (SAD) pattern:
1024 
1025      type x_t, y_t;
1026      signed TYPE1 diff, abs_diff;
1027      TYPE2 sum = init;
1028    loop:
1029      sum_0 = phi <init, sum_1>
1030      S1  x_t = ...
1031      S2  y_t = ...
1032      S3  x_T = (TYPE1) x_t;
1033      S4  y_T = (TYPE1) y_t;
1034      S5  diff = x_T - y_T;
1035      S6  abs_diff = ABS_EXPR <diff>;
1036      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1037      S8  sum_1 = abs_diff + sum_0;
1038 
1039    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1040    same size of 'TYPE1' or bigger. This is a special case of a reduction
1041    computation.
1042 
1043    Input:
1044 
1045    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1046    example, when this function is called with S8, the pattern
1047    {S3,S4,S5,S6,S7,S8} will be detected.
1048 
1049    Output:
1050 
1051    * TYPE_OUT: The type of the output of this pattern.
1052 
1053    * Return value: A new stmt that will be used to replace the sequence of
1054    stmts that constitute the pattern. In this case it will be:
1055         SAD_EXPR <x_t, y_t, sum_0>
1056   */
1057 
1058 static gimple *
vect_recog_sad_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1059 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1060 {
1061   gimple *last_stmt = stmt_vinfo->stmt;
1062   vec_info *vinfo = stmt_vinfo->vinfo;
1063   tree half_type;
1064 
1065   /* Look for the following pattern
1066           DX = (TYPE1) X;
1067           DY = (TYPE1) Y;
1068           DDIFF = DX - DY;
1069           DAD = ABS_EXPR <DDIFF>;
1070           DDPROD = (TYPE2) DPROD;
1071           sum_1 = DAD + sum_0;
1072      In which
1073      - DX is at least double the size of X
1074      - DY is at least double the size of Y
1075      - DX, DY, DDIFF, DAD all have the same type
1076      - sum is the same size of DAD or bigger
1077      - sum has been recognized as a reduction variable.
1078 
1079      This is equivalent to:
1080        DDIFF = X w- Y;          #widen sub
1081        DAD = ABS_EXPR <DDIFF>;
1082        sum_1 = DAD w+ sum_0;    #widen summation
1083      or
1084        DDIFF = X w- Y;          #widen sub
1085        DAD = ABS_EXPR <DDIFF>;
1086        sum_1 = DAD + sum_0;     #summation
1087    */
1088 
1089   /* Starting from LAST_STMT, follow the defs of its uses in search
1090      of the above pattern.  */
1091 
1092   tree plus_oprnd0, plus_oprnd1;
1093   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1094 				       &plus_oprnd0, &plus_oprnd1))
1095     return NULL;
1096 
1097   tree sum_type = gimple_expr_type (last_stmt);
1098 
1099   /* Any non-truncating sequence of conversions is OK here, since
1100      with a successful match, the result of the ABS(U) is known to fit
1101      within the nonnegative range of the result type.  (It cannot be the
1102      negative of the minimum signed value due to the range of the widening
1103      MINUS_EXPR.)  */
1104   vect_unpromoted_value unprom_abs;
1105   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1106 						      &unprom_abs);
1107 
1108   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1109      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1110      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1111      Then check that plus_oprnd0 is defined by an abs_expr.  */
1112 
1113   if (!plus_oprnd0)
1114     return NULL;
1115 
1116   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1117   if (!abs_stmt_vinfo)
1118     return NULL;
1119 
1120   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1121      inside the loop (in case we are analyzing an outer-loop).  */
1122   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1123   if (!abs_stmt
1124       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1125 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1126     return NULL;
1127 
1128   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1129   tree abs_type = TREE_TYPE (abs_oprnd);
1130   if (TYPE_UNSIGNED (abs_type))
1131     return NULL;
1132 
1133   /* Peel off conversions from the ABS input.  This can involve sign
1134      changes (e.g. from an unsigned subtraction to a signed ABS input)
1135      or signed promotion, but it can't include unsigned promotion.
1136      (Note that ABS of an unsigned promotion should have been folded
1137      away before now anyway.)  */
1138   vect_unpromoted_value unprom_diff;
1139   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1140 						    &unprom_diff);
1141   if (!abs_oprnd)
1142     return NULL;
1143   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1144       && TYPE_UNSIGNED (unprom_diff.type))
1145     return NULL;
1146 
1147   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1148   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1149   if (!diff_stmt_vinfo)
1150     return NULL;
1151 
1152   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1153      inside the loop (in case we are analyzing an outer-loop).  */
1154   vect_unpromoted_value unprom[2];
1155   if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1156 			     false, 2, unprom, &half_type))
1157     return NULL;
1158 
1159   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1160 
1161   tree half_vectype;
1162   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1163 					type_out, &half_vectype))
1164     return NULL;
1165 
1166   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1167   tree sad_oprnd[2];
1168   vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1169 		       unprom, half_vectype);
1170 
1171   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1172   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1173 					      sad_oprnd[1], plus_oprnd1);
1174 
1175   return pattern_stmt;
1176 }
1177 
1178 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1179    so that it can be treated as though it had the form:
1180 
1181       A_TYPE a;
1182       B_TYPE b;
1183       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1184       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1185     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1186     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1187     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1188 
1189    Try to replace the pattern with:
1190 
1191       A_TYPE a;
1192       B_TYPE b;
1193       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1194       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1195     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1196     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1197 
1198    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1199 
1200    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1201    name of the pattern being matched, for dump purposes.  */
1202 
1203 static gimple *
vect_recog_widen_op_pattern(stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1204 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1205 			     tree_code orig_code, tree_code wide_code,
1206 			     bool shift_p, const char *name)
1207 {
1208   vec_info *vinfo = last_stmt_info->vinfo;
1209   gimple *last_stmt = last_stmt_info->stmt;
1210 
1211   vect_unpromoted_value unprom[2];
1212   tree half_type;
1213   if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1214 			     shift_p, 2, unprom, &half_type))
1215     return NULL;
1216 
1217   /* Pattern detected.  */
1218   vect_pattern_detected (name, last_stmt);
1219 
1220   tree type = gimple_expr_type (last_stmt);
1221   tree itype = type;
1222   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1223       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1224     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1225 					    TYPE_UNSIGNED (half_type));
1226 
1227   /* Check target support  */
1228   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1229   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1230   enum tree_code dummy_code;
1231   int dummy_int;
1232   auto_vec<tree> dummy_vec;
1233   if (!vectype
1234       || !vecitype
1235       || !supportable_widening_operation (wide_code, last_stmt_info,
1236 					  vecitype, vectype,
1237 					  &dummy_code, &dummy_code,
1238 					  &dummy_int, &dummy_vec))
1239     return NULL;
1240 
1241   *type_out = get_vectype_for_scalar_type (vinfo, type);
1242   if (!*type_out)
1243     return NULL;
1244 
1245   tree oprnd[2];
1246   vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1247 
1248   tree var = vect_recog_temp_ssa_var (itype, NULL);
1249   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1250 					      oprnd[0], oprnd[1]);
1251 
1252   return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1253 }
1254 
1255 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1256    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1257 
1258 static gimple *
vect_recog_widen_mult_pattern(stmt_vec_info last_stmt_info,tree * type_out)1259 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1260 {
1261   return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1262 				      WIDEN_MULT_EXPR, false,
1263 				      "vect_recog_widen_mult_pattern");
1264 }
1265 
1266 /* Function vect_recog_pow_pattern
1267 
1268    Try to find the following pattern:
1269 
1270      x = POW (y, N);
1271 
1272    with POW being one of pow, powf, powi, powif and N being
1273    either 2 or 0.5.
1274 
1275    Input:
1276 
1277    * STMT_VINFO: The stmt from which the pattern search begins.
1278 
1279    Output:
1280 
1281    * TYPE_OUT: The type of the output of this pattern.
1282 
1283    * Return value: A new stmt that will be used to replace the sequence of
1284    stmts that constitute the pattern. In this case it will be:
1285         x = x * x
1286    or
1287 	x = sqrt (x)
1288 */
1289 
1290 static gimple *
vect_recog_pow_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1291 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1292 {
1293   vec_info *vinfo = stmt_vinfo->vinfo;
1294   gimple *last_stmt = stmt_vinfo->stmt;
1295   tree base, exp;
1296   gimple *stmt;
1297   tree var;
1298 
1299   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1300     return NULL;
1301 
1302   switch (gimple_call_combined_fn (last_stmt))
1303     {
1304     CASE_CFN_POW:
1305     CASE_CFN_POWI:
1306       break;
1307 
1308     default:
1309       return NULL;
1310     }
1311 
1312   base = gimple_call_arg (last_stmt, 0);
1313   exp = gimple_call_arg (last_stmt, 1);
1314   if (TREE_CODE (exp) != REAL_CST
1315       && TREE_CODE (exp) != INTEGER_CST)
1316     {
1317       if (flag_unsafe_math_optimizations
1318 	  && TREE_CODE (base) == REAL_CST
1319 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1320 	{
1321 	  combined_fn log_cfn;
1322 	  built_in_function exp_bfn;
1323 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1324 	    {
1325 	    case BUILT_IN_POW:
1326 	      log_cfn = CFN_BUILT_IN_LOG;
1327 	      exp_bfn = BUILT_IN_EXP;
1328 	      break;
1329 	    case BUILT_IN_POWF:
1330 	      log_cfn = CFN_BUILT_IN_LOGF;
1331 	      exp_bfn = BUILT_IN_EXPF;
1332 	      break;
1333 	    case BUILT_IN_POWL:
1334 	      log_cfn = CFN_BUILT_IN_LOGL;
1335 	      exp_bfn = BUILT_IN_EXPL;
1336 	      break;
1337 	    default:
1338 	      return NULL;
1339 	    }
1340 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1341 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1342 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1343 	     does that, but if C is a power of 2, we want to use
1344 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1345 	     vectorization we don't have vectorized exp2.  */
1346 	  if (logc
1347 	      && TREE_CODE (logc) == REAL_CST
1348 	      && exp_decl
1349 	      && lookup_attribute ("omp declare simd",
1350 				   DECL_ATTRIBUTES (exp_decl)))
1351 	    {
1352 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1353 	      if (node->simd_clones == NULL)
1354 		{
1355 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1356 		      || node->definition)
1357 		    return NULL;
1358 		  expand_simd_clones (node);
1359 		  if (node->simd_clones == NULL)
1360 		    return NULL;
1361 		}
1362 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1363 	      if (!*type_out)
1364 		return NULL;
1365 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1366 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1367 	      append_pattern_def_seq (stmt_vinfo, g);
1368 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1369 	      g = gimple_build_call (exp_decl, 1, def);
1370 	      gimple_call_set_lhs (g, res);
1371 	      return g;
1372 	    }
1373 	}
1374 
1375       return NULL;
1376     }
1377 
1378   /* We now have a pow or powi builtin function call with a constant
1379      exponent.  */
1380 
1381   /* Catch squaring.  */
1382   if ((tree_fits_shwi_p (exp)
1383        && tree_to_shwi (exp) == 2)
1384       || (TREE_CODE (exp) == REAL_CST
1385           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1386     {
1387       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1388 					    TREE_TYPE (base), type_out))
1389 	return NULL;
1390 
1391       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1392       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1393       return stmt;
1394     }
1395 
1396   /* Catch square root.  */
1397   if (TREE_CODE (exp) == REAL_CST
1398       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1399     {
1400       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1401       if (*type_out
1402 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1403 					     OPTIMIZE_FOR_SPEED))
1404 	{
1405 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1406 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1407 	  gimple_call_set_lhs (stmt, var);
1408 	  gimple_call_set_nothrow (stmt, true);
1409 	  return stmt;
1410 	}
1411     }
1412 
1413   return NULL;
1414 }
1415 
1416 
1417 /* Function vect_recog_widen_sum_pattern
1418 
1419    Try to find the following pattern:
1420 
1421      type x_t;
1422      TYPE x_T, sum = init;
1423    loop:
1424      sum_0 = phi <init, sum_1>
1425      S1  x_t = *p;
1426      S2  x_T = (TYPE) x_t;
1427      S3  sum_1 = x_T + sum_0;
1428 
1429    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1430    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1431    a special case of a reduction computation.
1432 
1433    Input:
1434 
1435    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1436    when this function is called with S3, the pattern {S2,S3} will be detected.
1437 
1438    Output:
1439 
1440    * TYPE_OUT: The type of the output of this pattern.
1441 
1442    * Return value: A new stmt that will be used to replace the sequence of
1443    stmts that constitute the pattern. In this case it will be:
1444         WIDEN_SUM <x_t, sum_0>
1445 
1446    Note: The widening-sum idiom is a widening reduction pattern that is
1447 	 vectorized without preserving all the intermediate results. It
1448          produces only N/2 (widened) results (by summing up pairs of
1449 	 intermediate results) rather than all N results.  Therefore, we
1450 	 cannot allow this pattern when we want to get all the results and in
1451 	 the correct order (as is the case when this computation is in an
1452 	 inner-loop nested in an outer-loop that us being vectorized).  */
1453 
1454 static gimple *
vect_recog_widen_sum_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1455 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1456 {
1457   gimple *last_stmt = stmt_vinfo->stmt;
1458   tree oprnd0, oprnd1;
1459   vec_info *vinfo = stmt_vinfo->vinfo;
1460   tree type;
1461   gimple *pattern_stmt;
1462   tree var;
1463 
1464   /* Look for the following pattern
1465           DX = (TYPE) X;
1466           sum_1 = DX + sum_0;
1467      In which DX is at least double the size of X, and sum_1 has been
1468      recognized as a reduction variable.
1469    */
1470 
1471   /* Starting from LAST_STMT, follow the defs of its uses in search
1472      of the above pattern.  */
1473 
1474   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1475 				       &oprnd0, &oprnd1))
1476     return NULL;
1477 
1478   type = gimple_expr_type (last_stmt);
1479 
1480   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1481      we know that oprnd1 is the reduction variable (defined by a loop-header
1482      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1483      Left to check that oprnd0 is defined by a cast from type 'type' to type
1484      'TYPE'.  */
1485 
1486   vect_unpromoted_value unprom0;
1487   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1488       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1489     return NULL;
1490 
1491   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1492 
1493   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1494 					unprom0.type, type_out))
1495     return NULL;
1496 
1497   var = vect_recog_temp_ssa_var (type, NULL);
1498   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1499 
1500   return pattern_stmt;
1501 }
1502 
1503 /* Recognize cases in which an operation is performed in one type WTYPE
1504    but could be done more efficiently in a narrower type NTYPE.  For example,
1505    if we have:
1506 
1507      ATYPE a;  // narrower than NTYPE
1508      BTYPE b;  // narrower than NTYPE
1509      WTYPE aw = (WTYPE) a;
1510      WTYPE bw = (WTYPE) b;
1511      WTYPE res = aw + bw;  // only uses of aw and bw
1512 
1513    then it would be more efficient to do:
1514 
1515      NTYPE an = (NTYPE) a;
1516      NTYPE bn = (NTYPE) b;
1517      NTYPE resn = an + bn;
1518      WTYPE res = (WTYPE) resn;
1519 
1520    Other situations include things like:
1521 
1522      ATYPE a;  // NTYPE or narrower
1523      WTYPE aw = (WTYPE) a;
1524      WTYPE res = aw + b;
1525 
1526    when only "(NTYPE) res" is significant.  In that case it's more efficient
1527    to truncate "b" and do the operation on NTYPE instead:
1528 
1529      NTYPE an = (NTYPE) a;
1530      NTYPE bn = (NTYPE) b;  // truncation
1531      NTYPE resn = an + bn;
1532      WTYPE res = (WTYPE) resn;
1533 
1534    All users of "res" should then use "resn" instead, making the final
1535    statement dead (not marked as relevant).  The final statement is still
1536    needed to maintain the type correctness of the IR.
1537 
1538    vect_determine_precisions has already determined the minimum
1539    precison of the operation and the minimum precision required
1540    by users of the result.  */
1541 
1542 static gimple *
vect_recog_over_widening_pattern(stmt_vec_info last_stmt_info,tree * type_out)1543 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1544 {
1545   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1546   if (!last_stmt)
1547     return NULL;
1548 
1549   /* See whether we have found that this operation can be done on a
1550      narrower type without changing its semantics.  */
1551   unsigned int new_precision = last_stmt_info->operation_precision;
1552   if (!new_precision)
1553     return NULL;
1554 
1555   vec_info *vinfo = last_stmt_info->vinfo;
1556   tree lhs = gimple_assign_lhs (last_stmt);
1557   tree type = TREE_TYPE (lhs);
1558   tree_code code = gimple_assign_rhs_code (last_stmt);
1559 
1560   /* Keep the first operand of a COND_EXPR as-is: only the other two
1561      operands are interesting.  */
1562   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1563 
1564   /* Check the operands.  */
1565   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1566   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1567   unprom.quick_grow (nops);
1568   unsigned int min_precision = 0;
1569   bool single_use_p = false;
1570   for (unsigned int i = 0; i < nops; ++i)
1571     {
1572       tree op = gimple_op (last_stmt, first_op + i);
1573       if (TREE_CODE (op) == INTEGER_CST)
1574 	unprom[i].set_op (op, vect_constant_def);
1575       else if (TREE_CODE (op) == SSA_NAME)
1576 	{
1577 	  bool op_single_use_p = true;
1578 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1579 						     &op_single_use_p))
1580 	    return NULL;
1581 	  /* If:
1582 
1583 	     (1) N bits of the result are needed;
1584 	     (2) all inputs are widened from M<N bits; and
1585 	     (3) one operand OP is a single-use SSA name
1586 
1587 	     we can shift the M->N widening from OP to the output
1588 	     without changing the number or type of extensions involved.
1589 	     This then reduces the number of copies of STMT_INFO.
1590 
1591 	     If instead of (3) more than one operand is a single-use SSA name,
1592 	     shifting the extension to the output is even more of a win.
1593 
1594 	     If instead:
1595 
1596 	     (1) N bits of the result are needed;
1597 	     (2) one operand OP2 is widened from M2<N bits;
1598 	     (3) another operand OP1 is widened from M1<M2 bits; and
1599 	     (4) both OP1 and OP2 are single-use
1600 
1601 	     the choice is between:
1602 
1603 	     (a) truncating OP2 to M1, doing the operation on M1,
1604 		 and then widening the result to N
1605 
1606 	     (b) widening OP1 to M2, doing the operation on M2, and then
1607 		 widening the result to N
1608 
1609 	     Both shift the M2->N widening of the inputs to the output.
1610 	     (a) additionally shifts the M1->M2 widening to the output;
1611 	     it requires fewer copies of STMT_INFO but requires an extra
1612 	     M2->M1 truncation.
1613 
1614 	     Which is better will depend on the complexity and cost of
1615 	     STMT_INFO, which is hard to predict at this stage.  However,
1616 	     a clear tie-breaker in favor of (b) is the fact that the
1617 	     truncation in (a) increases the length of the operation chain.
1618 
1619 	     If instead of (4) only one of OP1 or OP2 is single-use,
1620 	     (b) is still a win over doing the operation in N bits:
1621 	     it still shifts the M2->N widening on the single-use operand
1622 	     to the output and reduces the number of STMT_INFO copies.
1623 
1624 	     If neither operand is single-use then operating on fewer than
1625 	     N bits might lead to more extensions overall.  Whether it does
1626 	     or not depends on global information about the vectorization
1627 	     region, and whether that's a good trade-off would again
1628 	     depend on the complexity and cost of the statements involved,
1629 	     as well as things like register pressure that are not normally
1630 	     modelled at this stage.  We therefore ignore these cases
1631 	     and just optimize the clear single-use wins above.
1632 
1633 	     Thus we take the maximum precision of the unpromoted operands
1634 	     and record whether any operand is single-use.  */
1635 	  if (unprom[i].dt == vect_internal_def)
1636 	    {
1637 	      min_precision = MAX (min_precision,
1638 				   TYPE_PRECISION (unprom[i].type));
1639 	      single_use_p |= op_single_use_p;
1640 	    }
1641 	}
1642     }
1643 
1644   /* Although the operation could be done in operation_precision, we have
1645      to balance that against introducing extra truncations or extensions.
1646      Calculate the minimum precision that can be handled efficiently.
1647 
1648      The loop above determined that the operation could be handled
1649      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1650      extension from the inputs to the output without introducing more
1651      instructions, and would reduce the number of instructions required
1652      for STMT_INFO itself.
1653 
1654      vect_determine_precisions has also determined that the result only
1655      needs min_output_precision bits.  Truncating by a factor of N times
1656      requires a tree of N - 1 instructions, so if TYPE is N times wider
1657      than min_output_precision, doing the operation in TYPE and truncating
1658      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1659      In contrast:
1660 
1661      - truncating the input to a unary operation and doing the operation
1662        in the new type requires at most N - 1 + 1 = N instructions per
1663        output vector
1664 
1665      - doing the same for a binary operation requires at most
1666        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1667 
1668      Both unary and binary operations require fewer instructions than
1669      this if the operands were extended from a suitable truncated form.
1670      Thus there is usually nothing to lose by doing operations in
1671      min_output_precision bits, but there can be something to gain.  */
1672   if (!single_use_p)
1673     min_precision = last_stmt_info->min_output_precision;
1674   else
1675     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1676 
1677   /* Apply the minimum efficient precision we just calculated.  */
1678   if (new_precision < min_precision)
1679     new_precision = min_precision;
1680   new_precision = vect_element_precision (new_precision);
1681   if (new_precision >= TYPE_PRECISION (type))
1682     return NULL;
1683 
1684   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1685 
1686   *type_out = get_vectype_for_scalar_type (vinfo, type);
1687   if (!*type_out)
1688     return NULL;
1689 
1690   /* We've found a viable pattern.  Get the new type of the operation.  */
1691   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1692   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1693 
1694   /* If we're truncating an operation, we need to make sure that we
1695      don't introduce new undefined overflow.  The codes tested here are
1696      a subset of those accepted by vect_truncatable_operation_p.  */
1697   tree op_type = new_type;
1698   if (TYPE_OVERFLOW_UNDEFINED (new_type)
1699       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1700     op_type = build_nonstandard_integer_type (new_precision, true);
1701 
1702   /* We specifically don't check here whether the target supports the
1703      new operation, since it might be something that a later pattern
1704      wants to rewrite anyway.  If targets have a minimum element size
1705      for some optabs, we should pattern-match smaller ops to larger ops
1706      where beneficial.  */
1707   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1708   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1709   if (!new_vectype || !op_vectype)
1710     return NULL;
1711 
1712   if (dump_enabled_p ())
1713     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1714 		     type, new_type);
1715 
1716   /* Calculate the rhs operands for an operation on OP_TYPE.  */
1717   tree ops[3] = {};
1718   for (unsigned int i = 1; i < first_op; ++i)
1719     ops[i - 1] = gimple_op (last_stmt, i);
1720   vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1721 		       op_type, &unprom[0], op_vectype);
1722 
1723   /* Use the operation to produce a result of type OP_TYPE.  */
1724   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1725   gimple *pattern_stmt = gimple_build_assign (new_var, code,
1726 					      ops[0], ops[1], ops[2]);
1727   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1728 
1729   if (dump_enabled_p ())
1730     dump_printf_loc (MSG_NOTE, vect_location,
1731 		     "created pattern stmt: %G", pattern_stmt);
1732 
1733   /* Convert back to the original signedness, if OP_TYPE is different
1734      from NEW_TYPE.  */
1735   if (op_type != new_type)
1736     pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1737 					pattern_stmt, op_vectype);
1738 
1739   /* Promote the result to the original type.  */
1740   pattern_stmt = vect_convert_output (last_stmt_info, type,
1741 				      pattern_stmt, new_vectype);
1742 
1743   return pattern_stmt;
1744 }
1745 
1746 /* Recognize the following patterns:
1747 
1748      ATYPE a;  // narrower than TYPE
1749      BTYPE b;  // narrower than TYPE
1750 
1751    1) Multiply high with scaling
1752      TYPE res = ((TYPE) a * (TYPE) b) >> c;
1753    2) ... or also with rounding
1754      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1755 
1756    where only the bottom half of res is used.  */
1757 
1758 static gimple *
vect_recog_mulhs_pattern(stmt_vec_info last_stmt_info,tree * type_out)1759 vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1760 {
1761   /* Check for a right shift.  */
1762   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1763   if (!last_stmt
1764       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1765     return NULL;
1766   vec_info *vinfo = last_stmt_info->vinfo;
1767 
1768   /* Check that the shift result is wider than the users of the
1769      result need (i.e. that narrowing would be a natural choice).  */
1770   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1771   unsigned int target_precision
1772     = vect_element_precision (last_stmt_info->min_output_precision);
1773   if (!INTEGRAL_TYPE_P (lhs_type)
1774       || target_precision >= TYPE_PRECISION (lhs_type))
1775     return NULL;
1776 
1777   /* Look through any change in sign on the outer shift input.  */
1778   vect_unpromoted_value unprom_rshift_input;
1779   tree rshift_input = vect_look_through_possible_promotion
1780     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1781   if (!rshift_input
1782       || TYPE_PRECISION (TREE_TYPE (rshift_input))
1783 	   != TYPE_PRECISION (lhs_type))
1784     return NULL;
1785 
1786   /* Get the definition of the shift input.  */
1787   stmt_vec_info rshift_input_stmt_info
1788     = vect_get_internal_def (vinfo, rshift_input);
1789   if (!rshift_input_stmt_info)
1790     return NULL;
1791   gassign *rshift_input_stmt
1792     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1793   if (!rshift_input_stmt)
1794     return NULL;
1795 
1796   stmt_vec_info mulh_stmt_info;
1797   tree scale_term;
1798   internal_fn ifn;
1799   unsigned int expect_offset;
1800 
1801   /* Check for the presence of the rounding term.  */
1802   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1803     {
1804       /* Check that the outer shift was by 1.  */
1805       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1806 	return NULL;
1807 
1808       /* Check that the second operand of the PLUS_EXPR is 1.  */
1809       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1810 	return NULL;
1811 
1812       /* Look through any change in sign on the addition input.  */
1813       vect_unpromoted_value unprom_plus_input;
1814       tree plus_input = vect_look_through_possible_promotion
1815 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1816       if (!plus_input
1817 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
1818 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
1819 	return NULL;
1820 
1821       /* Get the definition of the multiply-high-scale part.  */
1822       stmt_vec_info plus_input_stmt_info
1823 	= vect_get_internal_def (vinfo, plus_input);
1824       if (!plus_input_stmt_info)
1825 	return NULL;
1826       gassign *plus_input_stmt
1827 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1828       if (!plus_input_stmt
1829 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1830 	return NULL;
1831 
1832       /* Look through any change in sign on the scaling input.  */
1833       vect_unpromoted_value unprom_scale_input;
1834       tree scale_input = vect_look_through_possible_promotion
1835 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1836       if (!scale_input
1837 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
1838 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
1839 	return NULL;
1840 
1841       /* Get the definition of the multiply-high part.  */
1842       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1843       if (!mulh_stmt_info)
1844 	return NULL;
1845 
1846       /* Get the scaling term.  */
1847       scale_term = gimple_assign_rhs2 (plus_input_stmt);
1848 
1849       expect_offset = target_precision + 2;
1850       ifn = IFN_MULHRS;
1851     }
1852   else
1853     {
1854       mulh_stmt_info = rshift_input_stmt_info;
1855       scale_term = gimple_assign_rhs2 (last_stmt);
1856 
1857       expect_offset = target_precision + 1;
1858       ifn = IFN_MULHS;
1859     }
1860 
1861   /* Check that the scaling factor is correct.  */
1862   if (TREE_CODE (scale_term) != INTEGER_CST
1863       || wi::to_widest (scale_term) + expect_offset
1864 	   != TYPE_PRECISION (lhs_type))
1865     return NULL;
1866 
1867   /* Check whether the scaling input term can be seen as two widened
1868      inputs multiplied together.  */
1869   vect_unpromoted_value unprom_mult[2];
1870   tree new_type;
1871   unsigned int nops
1872     = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1873 			    false, 2, unprom_mult, &new_type);
1874   if (nops != 2)
1875     return NULL;
1876 
1877   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1878 
1879   /* Adjust output precision.  */
1880   if (TYPE_PRECISION (new_type) < target_precision)
1881     new_type = build_nonstandard_integer_type
1882       (target_precision, TYPE_UNSIGNED (new_type));
1883 
1884   /* Check for target support.  */
1885   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1886   if (!new_vectype
1887       || !direct_internal_fn_supported_p
1888 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1889     return NULL;
1890 
1891   /* The IR requires a valid vector type for the cast result, even though
1892      it's likely to be discarded.  */
1893   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1894   if (!*type_out)
1895     return NULL;
1896 
1897   /* Generate the IFN_MULHRS call.  */
1898   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1899   tree new_ops[2];
1900   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1901 		       unprom_mult, new_vectype);
1902   gcall *mulhrs_stmt
1903     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1904   gimple_call_set_lhs (mulhrs_stmt, new_var);
1905   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1906 
1907   if (dump_enabled_p ())
1908     dump_printf_loc (MSG_NOTE, vect_location,
1909 		     "created pattern stmt: %G", mulhrs_stmt);
1910 
1911   return vect_convert_output (last_stmt_info, lhs_type,
1912 			      mulhrs_stmt, new_vectype);
1913 }
1914 
1915 /* Recognize the patterns:
1916 
1917 	    ATYPE a;  // narrower than TYPE
1918 	    BTYPE b;  // narrower than TYPE
1919 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1920      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1921 
1922    where only the bottom half of avg is used.  Try to transform them into:
1923 
1924 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1925      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1926 
1927   followed by:
1928 
1929 	    TYPE avg = (TYPE) avg';
1930 
1931   where NTYPE is no wider than half of TYPE.  Since only the bottom half
1932   of avg is used, all or part of the cast of avg' should become redundant.
1933 
1934   If there is no target support available, generate code to distribute rshift
1935   over plus and add a carry.  */
1936 
1937 static gimple *
vect_recog_average_pattern(stmt_vec_info last_stmt_info,tree * type_out)1938 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1939 {
1940   /* Check for a shift right by one bit.  */
1941   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1942   vec_info *vinfo = last_stmt_info->vinfo;
1943   if (!last_stmt
1944       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1945       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1946     return NULL;
1947 
1948   /* Check that the shift result is wider than the users of the
1949      result need (i.e. that narrowing would be a natural choice).  */
1950   tree lhs = gimple_assign_lhs (last_stmt);
1951   tree type = TREE_TYPE (lhs);
1952   unsigned int target_precision
1953     = vect_element_precision (last_stmt_info->min_output_precision);
1954   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1955     return NULL;
1956 
1957   /* Look through any change in sign on the shift input.  */
1958   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1959   vect_unpromoted_value unprom_plus;
1960   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1961 						     &unprom_plus);
1962   if (!rshift_rhs
1963       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1964     return NULL;
1965 
1966   /* Get the definition of the shift input.  */
1967   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1968   if (!plus_stmt_info)
1969     return NULL;
1970 
1971   /* Check whether the shift input can be seen as a tree of additions on
1972      2 or 3 widened inputs.
1973 
1974      Note that the pattern should be a win even if the result of one or
1975      more additions is reused elsewhere: if the pattern matches, we'd be
1976      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
1977   internal_fn ifn = IFN_AVG_FLOOR;
1978   vect_unpromoted_value unprom[3];
1979   tree new_type;
1980   unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1981 					    PLUS_EXPR, false, 3,
1982 					    unprom, &new_type);
1983   if (nops == 0)
1984     return NULL;
1985   if (nops == 3)
1986     {
1987       /* Check that one operand is 1.  */
1988       unsigned int i;
1989       for (i = 0; i < 3; ++i)
1990 	if (integer_onep (unprom[i].op))
1991 	  break;
1992       if (i == 3)
1993 	return NULL;
1994       /* Throw away the 1 operand and keep the other two.  */
1995       if (i < 2)
1996 	unprom[i] = unprom[2];
1997       ifn = IFN_AVG_CEIL;
1998     }
1999 
2000   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2001 
2002   /* We know that:
2003 
2004      (a) the operation can be viewed as:
2005 
2006 	   TYPE widened0 = (TYPE) UNPROM[0];
2007 	   TYPE widened1 = (TYPE) UNPROM[1];
2008 	   TYPE tmp1 = widened0 + widened1 {+ 1};
2009 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2010 
2011      (b) the first two statements are equivalent to:
2012 
2013 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2014 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2015 
2016      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2017 	 where sensible;
2018 
2019      (d) all the operations can be performed correctly at twice the width of
2020 	 NEW_TYPE, due to the nature of the average operation; and
2021 
2022      (e) users of the result of the right shift need only TARGET_PRECISION
2023 	 bits, where TARGET_PRECISION is no more than half of TYPE's
2024 	 precision.
2025 
2026      Under these circumstances, the only situation in which NEW_TYPE
2027      could be narrower than TARGET_PRECISION is if widened0, widened1
2028      and an addition result are all used more than once.  Thus we can
2029      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2030      as "free", whereas widening the result of the average instruction
2031      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2032      therefore better not to go narrower than TARGET_PRECISION.  */
2033   if (TYPE_PRECISION (new_type) < target_precision)
2034     new_type = build_nonstandard_integer_type (target_precision,
2035 					       TYPE_UNSIGNED (new_type));
2036 
2037   /* Check for target support.  */
2038   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2039   if (!new_vectype)
2040     return NULL;
2041 
2042   bool fallback_p = false;
2043 
2044   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2045     ;
2046   else if (TYPE_UNSIGNED (new_type)
2047 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2048 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2049 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2050 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2051     fallback_p = true;
2052   else
2053     return NULL;
2054 
2055   /* The IR requires a valid vector type for the cast result, even though
2056      it's likely to be discarded.  */
2057   *type_out = get_vectype_for_scalar_type (vinfo, type);
2058   if (!*type_out)
2059     return NULL;
2060 
2061   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2062   tree new_ops[2];
2063   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2064 		       unprom, new_vectype);
2065 
2066   if (fallback_p)
2067     {
2068       /* As a fallback, generate code for following sequence:
2069 
2070 	 shifted_op0 = new_ops[0] >> 1;
2071 	 shifted_op1 = new_ops[1] >> 1;
2072 	 sum_of_shifted = shifted_op0 + shifted_op1;
2073 	 unmasked_carry = new_ops[0] and/or new_ops[1];
2074 	 carry = unmasked_carry & 1;
2075 	 new_var = sum_of_shifted + carry;
2076       */
2077 
2078       tree one_cst = build_one_cst (new_type);
2079       gassign *g;
2080 
2081       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2082       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2083       append_pattern_def_seq (last_stmt_info, g, new_vectype);
2084 
2085       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2086       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2087       append_pattern_def_seq (last_stmt_info, g, new_vectype);
2088 
2089       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2090       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2091 			       shifted_op0, shifted_op1);
2092       append_pattern_def_seq (last_stmt_info, g, new_vectype);
2093 
2094       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2095       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2096       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2097       append_pattern_def_seq (last_stmt_info, g, new_vectype);
2098 
2099       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2100       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2101       append_pattern_def_seq (last_stmt_info, g, new_vectype);
2102 
2103       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2104       return vect_convert_output (last_stmt_info, type, g, new_vectype);
2105     }
2106 
2107   /* Generate the IFN_AVG* call.  */
2108   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2109 						    new_ops[1]);
2110   gimple_call_set_lhs (average_stmt, new_var);
2111   gimple_set_location (average_stmt, gimple_location (last_stmt));
2112 
2113   if (dump_enabled_p ())
2114     dump_printf_loc (MSG_NOTE, vect_location,
2115 		     "created pattern stmt: %G", average_stmt);
2116 
2117   return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2118 }
2119 
2120 /* Recognize cases in which the input to a cast is wider than its
2121    output, and the input is fed by a widening operation.  Fold this
2122    by removing the unnecessary intermediate widening.  E.g.:
2123 
2124      unsigned char a;
2125      unsigned int b = (unsigned int) a;
2126      unsigned short c = (unsigned short) b;
2127 
2128    -->
2129 
2130      unsigned short c = (unsigned short) a;
2131 
2132    Although this is rare in input IR, it is an expected side-effect
2133    of the over-widening pattern above.
2134 
2135    This is beneficial also for integer-to-float conversions, if the
2136    widened integer has more bits than the float, and if the unwidened
2137    input doesn't.  */
2138 
2139 static gimple *
vect_recog_cast_forwprop_pattern(stmt_vec_info last_stmt_info,tree * type_out)2140 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2141 {
2142   /* Check for a cast, including an integer-to-float conversion.  */
2143   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2144   if (!last_stmt)
2145     return NULL;
2146   tree_code code = gimple_assign_rhs_code (last_stmt);
2147   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2148     return NULL;
2149 
2150   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2151   tree lhs = gimple_assign_lhs (last_stmt);
2152   if (!lhs)
2153     return NULL;
2154   tree lhs_type = TREE_TYPE (lhs);
2155   scalar_mode lhs_mode;
2156   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2157       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2158     return NULL;
2159 
2160   /* Check for a narrowing operation (from a vector point of view).  */
2161   tree rhs = gimple_assign_rhs1 (last_stmt);
2162   tree rhs_type = TREE_TYPE (rhs);
2163   if (!INTEGRAL_TYPE_P (rhs_type)
2164       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2165       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2166     return NULL;
2167 
2168   /* Try to find an unpromoted input.  */
2169   vec_info *vinfo = last_stmt_info->vinfo;
2170   vect_unpromoted_value unprom;
2171   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2172       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2173     return NULL;
2174 
2175   /* If the bits above RHS_TYPE matter, make sure that they're the
2176      same when extending from UNPROM as they are when extending from RHS.  */
2177   if (!INTEGRAL_TYPE_P (lhs_type)
2178       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2179     return NULL;
2180 
2181   /* We can get the same result by casting UNPROM directly, to avoid
2182      the unnecessary widening and narrowing.  */
2183   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2184 
2185   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2186   if (!*type_out)
2187     return NULL;
2188 
2189   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2190   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2191   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2192 
2193   return pattern_stmt;
2194 }
2195 
2196 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2197    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2198 
2199 static gimple *
vect_recog_widen_shift_pattern(stmt_vec_info last_stmt_info,tree * type_out)2200 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2201 {
2202   return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
2203 				      WIDEN_LSHIFT_EXPR, true,
2204 				      "vect_recog_widen_shift_pattern");
2205 }
2206 
2207 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2208 
2209    type a_t, b_t, c_t;
2210 
2211    S0 a_t = b_t r<< c_t;
2212 
2213   Input/Output:
2214 
2215   * STMT_VINFO: The stmt from which the pattern search begins,
2216     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2217     with a sequence:
2218 
2219    S1 d_t = -c_t;
2220    S2 e_t = d_t & (B - 1);
2221    S3 f_t = b_t << c_t;
2222    S4 g_t = b_t >> e_t;
2223    S0 a_t = f_t | g_t;
2224 
2225     where B is element bitsize of type.
2226 
2227   Output:
2228 
2229   * TYPE_OUT: The type of the output of this pattern.
2230 
2231   * Return value: A new stmt that will be used to replace the rotate
2232     S0 stmt.  */
2233 
2234 static gimple *
vect_recog_rotate_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2235 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2236 {
2237   gimple *last_stmt = stmt_vinfo->stmt;
2238   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2239   gimple *pattern_stmt, *def_stmt;
2240   enum tree_code rhs_code;
2241   vec_info *vinfo = stmt_vinfo->vinfo;
2242   enum vect_def_type dt;
2243   optab optab1, optab2;
2244   edge ext_def = NULL;
2245   bool bswap16_p = false;
2246 
2247   if (is_gimple_assign (last_stmt))
2248     {
2249       rhs_code = gimple_assign_rhs_code (last_stmt);
2250       switch (rhs_code)
2251 	{
2252 	case LROTATE_EXPR:
2253 	case RROTATE_EXPR:
2254 	  break;
2255 	default:
2256 	  return NULL;
2257 	}
2258 
2259       lhs = gimple_assign_lhs (last_stmt);
2260       oprnd0 = gimple_assign_rhs1 (last_stmt);
2261       type = TREE_TYPE (oprnd0);
2262       oprnd1 = gimple_assign_rhs2 (last_stmt);
2263     }
2264   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2265     {
2266       /* __builtin_bswap16 (x) is another form of x r>> 8.
2267 	 The vectorizer has bswap support, but only if the argument isn't
2268 	 promoted.  */
2269       lhs = gimple_call_lhs (last_stmt);
2270       oprnd0 = gimple_call_arg (last_stmt, 0);
2271       type = TREE_TYPE (oprnd0);
2272       if (!lhs
2273 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2274 	  || TYPE_PRECISION (type) <= 16
2275 	  || TREE_CODE (oprnd0) != SSA_NAME
2276 	  || BITS_PER_UNIT != 8
2277 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2278 	return NULL;
2279 
2280       stmt_vec_info def_stmt_info;
2281       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2282 	return NULL;
2283 
2284       if (dt != vect_internal_def)
2285 	return NULL;
2286 
2287       if (gimple_assign_cast_p (def_stmt))
2288 	{
2289 	  def = gimple_assign_rhs1 (def_stmt);
2290 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2291 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2292 	    oprnd0 = def;
2293 	}
2294 
2295       type = TREE_TYPE (lhs);
2296       vectype = get_vectype_for_scalar_type (vinfo, type);
2297       if (vectype == NULL_TREE)
2298 	return NULL;
2299 
2300       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2301 	{
2302 	  /* The encoding uses one stepped pattern for each byte in the
2303 	     16-bit word.  */
2304 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2305 	  for (unsigned i = 0; i < 3; ++i)
2306 	    for (unsigned j = 0; j < 2; ++j)
2307 	      elts.quick_push ((i + 1) * 2 - j - 1);
2308 
2309 	  vec_perm_indices indices (elts, 1,
2310 				    TYPE_VECTOR_SUBPARTS (char_vectype));
2311 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2312 	    {
2313 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
2314 		 undo the argument promotion.  */
2315 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2316 		{
2317 		  def = vect_recog_temp_ssa_var (type, NULL);
2318 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2319 		  append_pattern_def_seq (stmt_vinfo, def_stmt);
2320 		  oprnd0 = def;
2321 		}
2322 
2323 	      /* Pattern detected.  */
2324 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2325 
2326 	      *type_out = vectype;
2327 
2328 	      /* Pattern supported.  Create a stmt to be used to replace the
2329 		 pattern, with the unpromoted argument.  */
2330 	      var = vect_recog_temp_ssa_var (type, NULL);
2331 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2332 						1, oprnd0);
2333 	      gimple_call_set_lhs (pattern_stmt, var);
2334 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2335 				      gimple_call_fntype (last_stmt));
2336 	      return pattern_stmt;
2337 	    }
2338 	}
2339 
2340       oprnd1 = build_int_cst (integer_type_node, 8);
2341       rhs_code = LROTATE_EXPR;
2342       bswap16_p = true;
2343     }
2344   else
2345     return NULL;
2346 
2347   if (TREE_CODE (oprnd0) != SSA_NAME
2348       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2349       || !INTEGRAL_TYPE_P (type)
2350       || !TYPE_UNSIGNED (type))
2351     return NULL;
2352 
2353   stmt_vec_info def_stmt_info;
2354   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2355     return NULL;
2356 
2357   if (dt != vect_internal_def
2358       && dt != vect_constant_def
2359       && dt != vect_external_def)
2360     return NULL;
2361 
2362   vectype = get_vectype_for_scalar_type (vinfo, type);
2363   if (vectype == NULL_TREE)
2364     return NULL;
2365 
2366   /* If vector/vector or vector/scalar rotate is supported by the target,
2367      don't do anything here.  */
2368   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2369   if (optab1
2370       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2371     {
2372      use_rotate:
2373       if (bswap16_p)
2374 	{
2375 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2376 	    {
2377 	      def = vect_recog_temp_ssa_var (type, NULL);
2378 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2379 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2380 	      oprnd0 = def;
2381 	    }
2382 
2383 	  /* Pattern detected.  */
2384 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2385 
2386 	  *type_out = vectype;
2387 
2388 	  /* Pattern supported.  Create a stmt to be used to replace the
2389 	     pattern.  */
2390 	  var = vect_recog_temp_ssa_var (type, NULL);
2391 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2392 					      oprnd1);
2393 	  return pattern_stmt;
2394 	}
2395       return NULL;
2396     }
2397 
2398   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2399     {
2400       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2401       if (optab2
2402 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2403 	goto use_rotate;
2404     }
2405 
2406   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2407      don't do anything here either.  */
2408   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2409   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2410   if (!optab1
2411       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2412       || !optab2
2413       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2414     {
2415       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2416 	return NULL;
2417       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2418       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2419       if (!optab1
2420 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2421 	  || !optab2
2422 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2423 	return NULL;
2424     }
2425 
2426   *type_out = vectype;
2427 
2428   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2429     {
2430       def = vect_recog_temp_ssa_var (type, NULL);
2431       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2432       append_pattern_def_seq (stmt_vinfo, def_stmt);
2433       oprnd0 = def;
2434     }
2435 
2436   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2437     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2438 
2439   def = NULL_TREE;
2440   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2441   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2442     def = oprnd1;
2443   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2444     {
2445       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2446       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2447 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2448 	     == TYPE_PRECISION (type))
2449 	def = rhs1;
2450     }
2451 
2452   if (def == NULL_TREE)
2453     {
2454       def = vect_recog_temp_ssa_var (type, NULL);
2455       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2456       append_pattern_def_seq (stmt_vinfo, def_stmt);
2457     }
2458   stype = TREE_TYPE (def);
2459 
2460   if (TREE_CODE (def) == INTEGER_CST)
2461     {
2462       if (!tree_fits_uhwi_p (def)
2463 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2464 	  || integer_zerop (def))
2465 	return NULL;
2466       def2 = build_int_cst (stype,
2467 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2468     }
2469   else
2470     {
2471       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2472 
2473       if (vecstype == NULL_TREE)
2474 	return NULL;
2475       def2 = vect_recog_temp_ssa_var (stype, NULL);
2476       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2477       if (ext_def)
2478 	{
2479 	  basic_block new_bb
2480 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2481 	  gcc_assert (!new_bb);
2482 	}
2483       else
2484 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2485 
2486       def2 = vect_recog_temp_ssa_var (stype, NULL);
2487       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2488       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2489 				      gimple_assign_lhs (def_stmt), mask);
2490       if (ext_def)
2491 	{
2492 	  basic_block new_bb
2493 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2494 	  gcc_assert (!new_bb);
2495 	}
2496       else
2497 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2498     }
2499 
2500   var1 = vect_recog_temp_ssa_var (type, NULL);
2501   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2502 					? LSHIFT_EXPR : RSHIFT_EXPR,
2503 				  oprnd0, def);
2504   append_pattern_def_seq (stmt_vinfo, def_stmt);
2505 
2506   var2 = vect_recog_temp_ssa_var (type, NULL);
2507   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2508 					? RSHIFT_EXPR : LSHIFT_EXPR,
2509 				  oprnd0, def2);
2510   append_pattern_def_seq (stmt_vinfo, def_stmt);
2511 
2512   /* Pattern detected.  */
2513   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2514 
2515   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2516   var = vect_recog_temp_ssa_var (type, NULL);
2517   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2518 
2519   return pattern_stmt;
2520 }
2521 
2522 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2523    vectorized:
2524 
2525    type a_t;
2526    TYPE b_T, res_T;
2527 
2528    S1 a_t = ;
2529    S2 b_T = ;
2530    S3 res_T = b_T op a_t;
2531 
2532   where type 'TYPE' is a type with different size than 'type',
2533   and op is <<, >> or rotate.
2534 
2535   Also detect cases:
2536 
2537    type a_t;
2538    TYPE b_T, c_T, res_T;
2539 
2540    S0 c_T = ;
2541    S1 a_t = (type) c_T;
2542    S2 b_T = ;
2543    S3 res_T = b_T op a_t;
2544 
2545   Input/Output:
2546 
2547   * STMT_VINFO: The stmt from which the pattern search begins,
2548     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2549     with a shift/rotate which has same type on both operands, in the
2550     second case just b_T op c_T, in the first case with added cast
2551     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2552 
2553   Output:
2554 
2555   * TYPE_OUT: The type of the output of this pattern.
2556 
2557   * Return value: A new stmt that will be used to replace the shift/rotate
2558     S3 stmt.  */
2559 
2560 static gimple *
vect_recog_vector_vector_shift_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2561 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2562 					tree *type_out)
2563 {
2564   gimple *last_stmt = stmt_vinfo->stmt;
2565   tree oprnd0, oprnd1, lhs, var;
2566   gimple *pattern_stmt;
2567   enum tree_code rhs_code;
2568   vec_info *vinfo = stmt_vinfo->vinfo;
2569 
2570   if (!is_gimple_assign (last_stmt))
2571     return NULL;
2572 
2573   rhs_code = gimple_assign_rhs_code (last_stmt);
2574   switch (rhs_code)
2575     {
2576     case LSHIFT_EXPR:
2577     case RSHIFT_EXPR:
2578     case LROTATE_EXPR:
2579     case RROTATE_EXPR:
2580       break;
2581     default:
2582       return NULL;
2583     }
2584 
2585   lhs = gimple_assign_lhs (last_stmt);
2586   oprnd0 = gimple_assign_rhs1 (last_stmt);
2587   oprnd1 = gimple_assign_rhs2 (last_stmt);
2588   if (TREE_CODE (oprnd0) != SSA_NAME
2589       || TREE_CODE (oprnd1) != SSA_NAME
2590       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2591       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2592       || TYPE_PRECISION (TREE_TYPE (lhs))
2593 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2594     return NULL;
2595 
2596   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2597   if (!def_vinfo)
2598     return NULL;
2599 
2600   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2601   if (*type_out == NULL_TREE)
2602     return NULL;
2603 
2604   tree def = NULL_TREE;
2605   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2606   if (def_stmt && gimple_assign_cast_p (def_stmt))
2607     {
2608       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2609       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2610 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2611 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2612 	{
2613 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2614 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2615 	    def = rhs1;
2616 	  else
2617 	    {
2618 	      tree mask
2619 		= build_low_bits_mask (TREE_TYPE (rhs1),
2620 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2621 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2622 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2623 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
2624 							   TREE_TYPE (rhs1));
2625 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2626 	    }
2627 	}
2628     }
2629 
2630   if (def == NULL_TREE)
2631     {
2632       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2633       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2634       append_pattern_def_seq (stmt_vinfo, def_stmt);
2635     }
2636 
2637   /* Pattern detected.  */
2638   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2639 
2640   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2641   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2642   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2643 
2644   return pattern_stmt;
2645 }
2646 
2647 /* Return true iff the target has a vector optab implementing the operation
2648    CODE on type VECTYPE.  */
2649 
2650 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2651 target_has_vecop_for_code (tree_code code, tree vectype)
2652 {
2653   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2654   return voptab
2655 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2656 }
2657 
2658 /* Verify that the target has optabs of VECTYPE to perform all the steps
2659    needed by the multiplication-by-immediate synthesis algorithm described by
2660    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2661    present.  Return true iff the target supports all the steps.  */
2662 
2663 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2664 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2665 				 tree vectype, bool synth_shift_p)
2666 {
2667   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2668     return false;
2669 
2670   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2671   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2672 
2673   if (var == negate_variant
2674       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2675     return false;
2676 
2677   /* If we must synthesize shifts with additions make sure that vector
2678      addition is available.  */
2679   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2680     return false;
2681 
2682   for (int i = 1; i < alg->ops; i++)
2683     {
2684       switch (alg->op[i])
2685 	{
2686 	case alg_shift:
2687 	  break;
2688 	case alg_add_t_m2:
2689 	case alg_add_t2_m:
2690 	case alg_add_factor:
2691 	  if (!supports_vplus)
2692 	    return false;
2693 	  break;
2694 	case alg_sub_t_m2:
2695 	case alg_sub_t2_m:
2696 	case alg_sub_factor:
2697 	  if (!supports_vminus)
2698 	    return false;
2699 	  break;
2700 	case alg_unknown:
2701 	case alg_m:
2702 	case alg_zero:
2703 	case alg_impossible:
2704 	  return false;
2705 	default:
2706 	  gcc_unreachable ();
2707 	}
2708     }
2709 
2710   return true;
2711 }
2712 
2713 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2714    putting the final result in DEST.  Append all statements but the last into
2715    VINFO.  Return the last statement.  */
2716 
2717 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2718 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2719 			   stmt_vec_info vinfo)
2720 {
2721   HOST_WIDE_INT i;
2722   tree itype = TREE_TYPE (op);
2723   tree prev_res = op;
2724   gcc_assert (amnt >= 0);
2725   for (i = 0; i < amnt; i++)
2726     {
2727       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2728 		      : dest;
2729       gimple *stmt
2730         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2731       prev_res = tmp_var;
2732       if (i < amnt - 1)
2733 	append_pattern_def_seq (vinfo, stmt);
2734       else
2735 	return stmt;
2736     }
2737   gcc_unreachable ();
2738   return NULL;
2739 }
2740 
2741 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2742    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2743    the process if necessary.  Append the resulting assignment statements
2744    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2745    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2746    left shifts using additions.  */
2747 
2748 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2749 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2750 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2751 {
2752   if (integer_zerop (op2)
2753       && (code == LSHIFT_EXPR
2754 	  || code == PLUS_EXPR))
2755     {
2756       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2757       return op1;
2758     }
2759 
2760   gimple *stmt;
2761   tree itype = TREE_TYPE (op1);
2762   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2763 
2764   if (code == LSHIFT_EXPR
2765       && synth_shift_p)
2766     {
2767       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2768 					 stmt_vinfo);
2769       append_pattern_def_seq (stmt_vinfo, stmt);
2770       return tmp_var;
2771     }
2772 
2773   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2774   append_pattern_def_seq (stmt_vinfo, stmt);
2775   return tmp_var;
2776 }
2777 
2778 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2779    and simple arithmetic operations to be vectorized.  Record the statements
2780    produced in STMT_VINFO and return the last statement in the sequence or
2781    NULL if it's not possible to synthesize such a multiplication.
2782    This function mirrors the behavior of expand_mult_const in expmed.c but
2783    works on tree-ssa form.  */
2784 
2785 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2786 vect_synth_mult_by_constant (tree op, tree val,
2787 			     stmt_vec_info stmt_vinfo)
2788 {
2789   vec_info *vinfo = stmt_vinfo->vinfo;
2790   tree itype = TREE_TYPE (op);
2791   machine_mode mode = TYPE_MODE (itype);
2792   struct algorithm alg;
2793   mult_variant variant;
2794   if (!tree_fits_shwi_p (val))
2795     return NULL;
2796 
2797   /* Multiplication synthesis by shifts, adds and subs can introduce
2798      signed overflow where the original operation didn't.  Perform the
2799      operations on an unsigned type and cast back to avoid this.
2800      In the future we may want to relax this for synthesis algorithms
2801      that we can prove do not cause unexpected overflow.  */
2802   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2803 
2804   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2805 
2806   /* Targets that don't support vector shifts but support vector additions
2807      can synthesize shifts that way.  */
2808   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2809 
2810   HOST_WIDE_INT hwval = tree_to_shwi (val);
2811   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2812      The vectorizer's benefit analysis will decide whether it's beneficial
2813      to do this.  */
2814   bool possible = choose_mult_variant (mode, hwval, &alg,
2815 					&variant, MAX_COST);
2816   if (!possible)
2817     return NULL;
2818 
2819   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2820 
2821   if (!vectype
2822       || !target_supports_mult_synth_alg (&alg, variant,
2823 					   vectype, synth_shift_p))
2824     return NULL;
2825 
2826   tree accumulator;
2827 
2828   /* Clear out the sequence of statements so we can populate it below.  */
2829   gimple *stmt = NULL;
2830 
2831   if (cast_to_unsigned_p)
2832     {
2833       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2834       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2835       append_pattern_def_seq (stmt_vinfo, stmt);
2836       op = tmp_op;
2837     }
2838 
2839   if (alg.op[0] == alg_zero)
2840     accumulator = build_int_cst (multtype, 0);
2841   else
2842     accumulator = op;
2843 
2844   bool needs_fixup = (variant == negate_variant)
2845 		      || (variant == add_variant);
2846 
2847   for (int i = 1; i < alg.ops; i++)
2848     {
2849       tree shft_log = build_int_cst (multtype, alg.log[i]);
2850       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2851       tree tmp_var = NULL_TREE;
2852 
2853       switch (alg.op[i])
2854 	{
2855 	case alg_shift:
2856 	  if (synth_shift_p)
2857 	    stmt
2858 	      = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2859 					    stmt_vinfo);
2860 	  else
2861 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2862 					 shft_log);
2863 	  break;
2864 	case alg_add_t_m2:
2865 	  tmp_var
2866 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2867 					    stmt_vinfo, synth_shift_p);
2868 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2869 				       tmp_var);
2870 	  break;
2871 	case alg_sub_t_m2:
2872 	  tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2873 						  shft_log, stmt_vinfo,
2874 						  synth_shift_p);
2875 	  /* In some algorithms the first step involves zeroing the
2876 	     accumulator.  If subtracting from such an accumulator
2877 	     just emit the negation directly.  */
2878 	  if (integer_zerop (accumulator))
2879 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2880 	  else
2881 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2882 					tmp_var);
2883 	  break;
2884 	case alg_add_t2_m:
2885 	  tmp_var
2886 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2887 					   stmt_vinfo, synth_shift_p);
2888 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2889 	  break;
2890 	case alg_sub_t2_m:
2891 	  tmp_var
2892 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2893 					   stmt_vinfo, synth_shift_p);
2894 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2895 	  break;
2896 	case alg_add_factor:
2897 	  tmp_var
2898 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2899 					    stmt_vinfo, synth_shift_p);
2900 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2901 				       tmp_var);
2902 	  break;
2903 	case alg_sub_factor:
2904 	  tmp_var
2905 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2906 					   stmt_vinfo, synth_shift_p);
2907 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2908 				      accumulator);
2909 	  break;
2910 	default:
2911 	  gcc_unreachable ();
2912 	}
2913       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2914 	 but rather return it directly.  */
2915 
2916       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2917 	append_pattern_def_seq (stmt_vinfo, stmt);
2918       accumulator = accum_tmp;
2919     }
2920   if (variant == negate_variant)
2921     {
2922       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2923       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2924       accumulator = accum_tmp;
2925       if (cast_to_unsigned_p)
2926 	append_pattern_def_seq (stmt_vinfo, stmt);
2927     }
2928   else if (variant == add_variant)
2929     {
2930       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2931       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2932       accumulator = accum_tmp;
2933       if (cast_to_unsigned_p)
2934 	append_pattern_def_seq (stmt_vinfo, stmt);
2935     }
2936   /* Move back to a signed if needed.  */
2937   if (cast_to_unsigned_p)
2938     {
2939       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2940       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2941     }
2942 
2943   return stmt;
2944 }
2945 
2946 /* Detect multiplication by constant and convert it into a sequence of
2947    shifts and additions, subtractions, negations.  We reuse the
2948    choose_mult_variant algorithms from expmed.c
2949 
2950    Input/Output:
2951 
2952    STMT_VINFO: The stmt from which the pattern search begins,
2953    i.e. the mult stmt.
2954 
2955  Output:
2956 
2957   * TYPE_OUT: The type of the output of this pattern.
2958 
2959   * Return value: A new stmt that will be used to replace
2960     the multiplication.  */
2961 
2962 static gimple *
vect_recog_mult_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2963 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2964 {
2965   vec_info *vinfo = stmt_vinfo->vinfo;
2966   gimple *last_stmt = stmt_vinfo->stmt;
2967   tree oprnd0, oprnd1, vectype, itype;
2968   gimple *pattern_stmt;
2969 
2970   if (!is_gimple_assign (last_stmt))
2971     return NULL;
2972 
2973   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2974     return NULL;
2975 
2976   oprnd0 = gimple_assign_rhs1 (last_stmt);
2977   oprnd1 = gimple_assign_rhs2 (last_stmt);
2978   itype = TREE_TYPE (oprnd0);
2979 
2980   if (TREE_CODE (oprnd0) != SSA_NAME
2981       || TREE_CODE (oprnd1) != INTEGER_CST
2982       || !INTEGRAL_TYPE_P (itype)
2983       || !type_has_mode_precision_p (itype))
2984     return NULL;
2985 
2986   vectype = get_vectype_for_scalar_type (vinfo, itype);
2987   if (vectype == NULL_TREE)
2988     return NULL;
2989 
2990   /* If the target can handle vectorized multiplication natively,
2991      don't attempt to optimize this.  */
2992   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2993   if (mul_optab != unknown_optab)
2994     {
2995       machine_mode vec_mode = TYPE_MODE (vectype);
2996       int icode = (int) optab_handler (mul_optab, vec_mode);
2997       if (icode != CODE_FOR_nothing)
2998        return NULL;
2999     }
3000 
3001   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
3002   if (!pattern_stmt)
3003     return NULL;
3004 
3005   /* Pattern detected.  */
3006   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3007 
3008   *type_out = vectype;
3009 
3010   return pattern_stmt;
3011 }
3012 
3013 /* Detect a signed division by a constant that wouldn't be
3014    otherwise vectorized:
3015 
3016    type a_t, b_t;
3017 
3018    S1 a_t = b_t / N;
3019 
3020   where type 'type' is an integral type and N is a constant.
3021 
3022   Similarly handle modulo by a constant:
3023 
3024    S4 a_t = b_t % N;
3025 
3026   Input/Output:
3027 
3028   * STMT_VINFO: The stmt from which the pattern search begins,
3029     i.e. the division stmt.  S1 is replaced by if N is a power
3030     of two constant and type is signed:
3031   S3  y_t = b_t < 0 ? N - 1 : 0;
3032   S2  x_t = b_t + y_t;
3033   S1' a_t = x_t >> log2 (N);
3034 
3035     S4 is replaced if N is a power of two constant and
3036     type is signed by (where *_T temporaries have unsigned type):
3037   S9  y_T = b_t < 0 ? -1U : 0U;
3038   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3039   S7  z_t = (type) z_T;
3040   S6  w_t = b_t + z_t;
3041   S5  x_t = w_t & (N - 1);
3042   S4' a_t = x_t - z_t;
3043 
3044   Output:
3045 
3046   * TYPE_OUT: The type of the output of this pattern.
3047 
3048   * Return value: A new stmt that will be used to replace the division
3049     S1 or modulo S4 stmt.  */
3050 
3051 static gimple *
vect_recog_divmod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3052 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3053 {
3054   vec_info *vinfo = stmt_vinfo->vinfo;
3055   gimple *last_stmt = stmt_vinfo->stmt;
3056   tree oprnd0, oprnd1, vectype, itype, cond;
3057   gimple *pattern_stmt, *def_stmt;
3058   enum tree_code rhs_code;
3059   optab optab;
3060   tree q;
3061   int dummy_int, prec;
3062 
3063   if (!is_gimple_assign (last_stmt))
3064     return NULL;
3065 
3066   rhs_code = gimple_assign_rhs_code (last_stmt);
3067   switch (rhs_code)
3068     {
3069     case TRUNC_DIV_EXPR:
3070     case EXACT_DIV_EXPR:
3071     case TRUNC_MOD_EXPR:
3072       break;
3073     default:
3074       return NULL;
3075     }
3076 
3077   oprnd0 = gimple_assign_rhs1 (last_stmt);
3078   oprnd1 = gimple_assign_rhs2 (last_stmt);
3079   itype = TREE_TYPE (oprnd0);
3080   if (TREE_CODE (oprnd0) != SSA_NAME
3081       || TREE_CODE (oprnd1) != INTEGER_CST
3082       || TREE_CODE (itype) != INTEGER_TYPE
3083       || !type_has_mode_precision_p (itype))
3084     return NULL;
3085 
3086   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3087   vectype = get_vectype_for_scalar_type (vinfo, itype);
3088   if (vectype == NULL_TREE)
3089     return NULL;
3090 
3091   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3092     {
3093       /* If the target can handle vectorized division or modulo natively,
3094 	 don't attempt to optimize this, since native division is likely
3095 	 to give smaller code.  */
3096       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3097       if (optab != unknown_optab)
3098 	{
3099 	  machine_mode vec_mode = TYPE_MODE (vectype);
3100 	  int icode = (int) optab_handler (optab, vec_mode);
3101 	  if (icode != CODE_FOR_nothing)
3102 	    return NULL;
3103 	}
3104     }
3105 
3106   prec = TYPE_PRECISION (itype);
3107   if (integer_pow2p (oprnd1))
3108     {
3109       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3110 	return NULL;
3111 
3112       /* Pattern detected.  */
3113       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3114 
3115       *type_out = vectype;
3116 
3117       /* Check if the target supports this internal function.  */
3118       internal_fn ifn = IFN_DIV_POW2;
3119       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3120 	{
3121 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3122 
3123 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3124 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3125 	  gimple_call_set_lhs (div_stmt, var_div);
3126 
3127 	  if (rhs_code == TRUNC_MOD_EXPR)
3128 	    {
3129 	      append_pattern_def_seq (stmt_vinfo, div_stmt);
3130 	      def_stmt
3131 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3132 				       LSHIFT_EXPR, var_div, shift);
3133 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3134 	      pattern_stmt
3135 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3136 				       MINUS_EXPR, oprnd0,
3137 				       gimple_assign_lhs (def_stmt));
3138 	    }
3139 	  else
3140 	    pattern_stmt = div_stmt;
3141 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3142 
3143 	  return pattern_stmt;
3144 	}
3145 
3146       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3147 		     build_int_cst (itype, 0));
3148       if (rhs_code == TRUNC_DIV_EXPR
3149 	  || rhs_code == EXACT_DIV_EXPR)
3150 	{
3151 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
3152 	  tree shift;
3153 	  def_stmt
3154 	    = gimple_build_assign (var, COND_EXPR, cond,
3155 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3156 						build_int_cst (itype, 1)),
3157 				   build_int_cst (itype, 0));
3158 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3159 	  var = vect_recog_temp_ssa_var (itype, NULL);
3160 	  def_stmt
3161 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3162 				   gimple_assign_lhs (def_stmt));
3163 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3164 
3165 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
3166 	  pattern_stmt
3167 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 				   RSHIFT_EXPR, var, shift);
3169 	}
3170       else
3171 	{
3172 	  tree signmask;
3173 	  if (compare_tree_int (oprnd1, 2) == 0)
3174 	    {
3175 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3176 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3177 					      build_int_cst (itype, 1),
3178 					      build_int_cst (itype, 0));
3179 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3180 	    }
3181 	  else
3182 	    {
3183 	      tree utype
3184 		= build_nonstandard_integer_type (prec, 1);
3185 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3186 	      tree shift
3187 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3188 					- tree_log2 (oprnd1));
3189 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
3190 
3191 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3192 					      build_int_cst (utype, -1),
3193 					      build_int_cst (utype, 0));
3194 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3195 	      var = vect_recog_temp_ssa_var (utype, NULL);
3196 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3197 					      gimple_assign_lhs (def_stmt),
3198 					      shift);
3199 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3200 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3201 	      def_stmt
3202 		= gimple_build_assign (signmask, NOP_EXPR, var);
3203 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3204 	    }
3205 	  def_stmt
3206 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3207 				   PLUS_EXPR, oprnd0, signmask);
3208 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3209 	  def_stmt
3210 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3211 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3212 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3213 						build_int_cst (itype, 1)));
3214 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3215 
3216 	  pattern_stmt
3217 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3218 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
3219 				   signmask);
3220 	}
3221 
3222       return pattern_stmt;
3223     }
3224 
3225   if (prec > HOST_BITS_PER_WIDE_INT
3226       || integer_zerop (oprnd1))
3227     return NULL;
3228 
3229   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3230     return NULL;
3231 
3232   if (TYPE_UNSIGNED (itype))
3233     {
3234       unsigned HOST_WIDE_INT mh, ml;
3235       int pre_shift, post_shift;
3236       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3237 				  & GET_MODE_MASK (itype_mode));
3238       tree t1, t2, t3, t4;
3239 
3240       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3241 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3242 	return NULL;
3243 
3244       /* Find a suitable multiplier and right shift count
3245 	 instead of multiplying with D.  */
3246       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3247 
3248       /* If the suggested multiplier is more than SIZE bits, we can do better
3249 	 for even divisors, using an initial right shift.  */
3250       if (mh != 0 && (d & 1) == 0)
3251 	{
3252 	  pre_shift = ctz_or_zero (d);
3253 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3254 				  &ml, &post_shift, &dummy_int);
3255 	  gcc_assert (!mh);
3256 	}
3257       else
3258 	pre_shift = 0;
3259 
3260       if (mh != 0)
3261 	{
3262 	  if (post_shift - 1 >= prec)
3263 	    return NULL;
3264 
3265 	  /* t1 = oprnd0 h* ml;
3266 	     t2 = oprnd0 - t1;
3267 	     t3 = t2 >> 1;
3268 	     t4 = t1 + t3;
3269 	     q = t4 >> (post_shift - 1);  */
3270 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
3271 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3272 					  build_int_cst (itype, ml));
3273 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3274 
3275 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3276 	  def_stmt
3277 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3278 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3279 
3280 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3281 	  def_stmt
3282 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3283 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3284 
3285 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3286 	  def_stmt
3287 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3288 
3289 	  if (post_shift != 1)
3290 	    {
3291 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3292 
3293 	      q = vect_recog_temp_ssa_var (itype, NULL);
3294 	      pattern_stmt
3295 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
3296 				       build_int_cst (itype, post_shift - 1));
3297 	    }
3298 	  else
3299 	    {
3300 	      q = t4;
3301 	      pattern_stmt = def_stmt;
3302 	    }
3303 	}
3304       else
3305 	{
3306 	  if (pre_shift >= prec || post_shift >= prec)
3307 	    return NULL;
3308 
3309 	  /* t1 = oprnd0 >> pre_shift;
3310 	     t2 = t1 h* ml;
3311 	     q = t2 >> post_shift;  */
3312 	  if (pre_shift)
3313 	    {
3314 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
3315 	      def_stmt
3316 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3317 				       build_int_cst (NULL, pre_shift));
3318 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3319 	    }
3320 	  else
3321 	    t1 = oprnd0;
3322 
3323 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3324 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3325 					  build_int_cst (itype, ml));
3326 
3327 	  if (post_shift)
3328 	    {
3329 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
3330 
3331 	      q = vect_recog_temp_ssa_var (itype, NULL);
3332 	      def_stmt
3333 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
3334 				       build_int_cst (itype, post_shift));
3335 	    }
3336 	  else
3337 	    q = t2;
3338 
3339 	  pattern_stmt = def_stmt;
3340 	}
3341     }
3342   else
3343     {
3344       unsigned HOST_WIDE_INT ml;
3345       int post_shift;
3346       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3347       unsigned HOST_WIDE_INT abs_d;
3348       bool add = false;
3349       tree t1, t2, t3, t4;
3350 
3351       /* Give up for -1.  */
3352       if (d == -1)
3353 	return NULL;
3354 
3355       /* Since d might be INT_MIN, we have to cast to
3356 	 unsigned HOST_WIDE_INT before negating to avoid
3357 	 undefined signed overflow.  */
3358       abs_d = (d >= 0
3359 	       ? (unsigned HOST_WIDE_INT) d
3360 	       : - (unsigned HOST_WIDE_INT) d);
3361 
3362       /* n rem d = n rem -d */
3363       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3364 	{
3365 	  d = abs_d;
3366 	  oprnd1 = build_int_cst (itype, abs_d);
3367 	}
3368       if (HOST_BITS_PER_WIDE_INT >= prec
3369 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3370 	/* This case is not handled correctly below.  */
3371 	return NULL;
3372 
3373       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3374       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3375 	{
3376 	  add = true;
3377 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
3378 	}
3379       if (post_shift >= prec)
3380 	return NULL;
3381 
3382       /* t1 = oprnd0 h* ml;  */
3383       t1 = vect_recog_temp_ssa_var (itype, NULL);
3384       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3385 				      build_int_cst (itype, ml));
3386 
3387       if (add)
3388 	{
3389 	  /* t2 = t1 + oprnd0;  */
3390 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3391 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3392 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3393 	}
3394       else
3395 	t2 = t1;
3396 
3397       if (post_shift)
3398 	{
3399 	  /* t3 = t2 >> post_shift;  */
3400 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3401 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3402 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3403 					  build_int_cst (itype, post_shift));
3404 	}
3405       else
3406 	t3 = t2;
3407 
3408       wide_int oprnd0_min, oprnd0_max;
3409       int msb = 1;
3410       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3411 	{
3412 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3413 	    msb = 0;
3414 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3415 	    msb = -1;
3416 	}
3417 
3418       if (msb == 0 && d >= 0)
3419 	{
3420 	  /* q = t3;  */
3421 	  q = t3;
3422 	  pattern_stmt = def_stmt;
3423 	}
3424       else
3425 	{
3426 	  /* t4 = oprnd0 >> (prec - 1);
3427 	     or if we know from VRP that oprnd0 >= 0
3428 	     t4 = 0;
3429 	     or if we know from VRP that oprnd0 < 0
3430 	     t4 = -1;  */
3431 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3432 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3433 	  if (msb != 1)
3434 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3435 					    build_int_cst (itype, msb));
3436 	  else
3437 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3438 					    build_int_cst (itype, prec - 1));
3439 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3440 
3441 	  /* q = t3 - t4;  or q = t4 - t3;  */
3442 	  q = vect_recog_temp_ssa_var (itype, NULL);
3443 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3444 					      d < 0 ? t3 : t4);
3445 	}
3446     }
3447 
3448   if (rhs_code == TRUNC_MOD_EXPR)
3449     {
3450       tree r, t1;
3451 
3452       /* We divided.  Now finish by:
3453 	 t1 = q * oprnd1;
3454 	 r = oprnd0 - t1;  */
3455       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3456 
3457       t1 = vect_recog_temp_ssa_var (itype, NULL);
3458       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3459       append_pattern_def_seq (stmt_vinfo, def_stmt);
3460 
3461       r = vect_recog_temp_ssa_var (itype, NULL);
3462       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3463     }
3464 
3465   /* Pattern detected.  */
3466   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3467 
3468   *type_out = vectype;
3469   return pattern_stmt;
3470 }
3471 
3472 /* Function vect_recog_mixed_size_cond_pattern
3473 
3474    Try to find the following pattern:
3475 
3476      type x_t, y_t;
3477      TYPE a_T, b_T, c_T;
3478    loop:
3479      S1  a_T = x_t CMP y_t ? b_T : c_T;
3480 
3481    where type 'TYPE' is an integral type which has different size
3482    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3483    than 'type', the constants need to fit into an integer type
3484    with the same width as 'type') or results of conversion from 'type'.
3485 
3486    Input:
3487 
3488    * STMT_VINFO: The stmt from which the pattern search begins.
3489 
3490    Output:
3491 
3492    * TYPE_OUT: The type of the output of this pattern.
3493 
3494    * Return value: A new stmt that will be used to replace the pattern.
3495 	Additionally a def_stmt is added.
3496 
3497 	a_it = x_t CMP y_t ? b_it : c_it;
3498 	a_T = (TYPE) a_it;  */
3499 
3500 static gimple *
vect_recog_mixed_size_cond_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3501 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3502 {
3503   vec_info *vinfo = stmt_vinfo->vinfo;
3504   gimple *last_stmt = stmt_vinfo->stmt;
3505   tree cond_expr, then_clause, else_clause;
3506   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3507   gimple *pattern_stmt, *def_stmt;
3508   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3509   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3510   bool promotion;
3511   tree comp_scalar_type;
3512 
3513   if (!is_gimple_assign (last_stmt)
3514       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3515       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3516     return NULL;
3517 
3518   cond_expr = gimple_assign_rhs1 (last_stmt);
3519   then_clause = gimple_assign_rhs2 (last_stmt);
3520   else_clause = gimple_assign_rhs3 (last_stmt);
3521 
3522   if (!COMPARISON_CLASS_P (cond_expr))
3523     return NULL;
3524 
3525   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3526   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3527   if (comp_vectype == NULL_TREE)
3528     return NULL;
3529 
3530   type = gimple_expr_type (last_stmt);
3531   if (types_compatible_p (type, comp_scalar_type)
3532       || ((TREE_CODE (then_clause) != INTEGER_CST
3533 	   || TREE_CODE (else_clause) != INTEGER_CST)
3534 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3535       || !INTEGRAL_TYPE_P (type))
3536     return NULL;
3537 
3538   if ((TREE_CODE (then_clause) != INTEGER_CST
3539        && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3540 			      &def_stmt0, &promotion))
3541       || (TREE_CODE (else_clause) != INTEGER_CST
3542 	  && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3543 				 &def_stmt1, &promotion)))
3544     return NULL;
3545 
3546   if (orig_type0 && orig_type1
3547       && !types_compatible_p (orig_type0, orig_type1))
3548     return NULL;
3549 
3550   if (orig_type0)
3551     {
3552       if (!types_compatible_p (orig_type0, comp_scalar_type))
3553 	return NULL;
3554       then_clause = gimple_assign_rhs1 (def_stmt0);
3555       itype = orig_type0;
3556     }
3557 
3558   if (orig_type1)
3559     {
3560       if (!types_compatible_p (orig_type1, comp_scalar_type))
3561 	return NULL;
3562       else_clause = gimple_assign_rhs1 (def_stmt1);
3563       itype = orig_type1;
3564     }
3565 
3566 
3567   HOST_WIDE_INT cmp_mode_size
3568     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3569 
3570   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3571   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3572     return NULL;
3573 
3574   vectype = get_vectype_for_scalar_type (vinfo, type);
3575   if (vectype == NULL_TREE)
3576     return NULL;
3577 
3578   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3579     return NULL;
3580 
3581   if (itype == NULL_TREE)
3582     itype = build_nonstandard_integer_type (cmp_mode_size,
3583   					    TYPE_UNSIGNED (type));
3584 
3585   if (itype == NULL_TREE
3586       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3587     return NULL;
3588 
3589   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3590   if (vecitype == NULL_TREE)
3591     return NULL;
3592 
3593   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3594     return NULL;
3595 
3596   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3597     {
3598       if ((TREE_CODE (then_clause) == INTEGER_CST
3599 	   && !int_fits_type_p (then_clause, itype))
3600 	  || (TREE_CODE (else_clause) == INTEGER_CST
3601 	      && !int_fits_type_p (else_clause, itype)))
3602 	return NULL;
3603     }
3604 
3605   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3606 				  COND_EXPR, unshare_expr (cond_expr),
3607 				  fold_convert (itype, then_clause),
3608 				  fold_convert (itype, else_clause));
3609   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3610 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3611 
3612   append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3613   *type_out = vectype;
3614 
3615   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3616 
3617   return pattern_stmt;
3618 }
3619 
3620 
3621 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3622    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3623    in case it's a result of a comparison which can be directly vectorized into
3624    a vector comparison.  Fills in STMTS with all stmts visited during the
3625    walk.  */
3626 
3627 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3628 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3629 {
3630   tree rhs1;
3631   enum tree_code rhs_code;
3632 
3633   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3634   if (!def_stmt_info)
3635     return false;
3636 
3637   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3638   if (!def_stmt)
3639     return false;
3640 
3641   if (stmts.contains (def_stmt))
3642     return true;
3643 
3644   rhs1 = gimple_assign_rhs1 (def_stmt);
3645   rhs_code = gimple_assign_rhs_code (def_stmt);
3646   switch (rhs_code)
3647     {
3648     case SSA_NAME:
3649       if (! check_bool_pattern (rhs1, vinfo, stmts))
3650 	return false;
3651       break;
3652 
3653     CASE_CONVERT:
3654       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3655 	return false;
3656       if (! check_bool_pattern (rhs1, vinfo, stmts))
3657 	return false;
3658       break;
3659 
3660     case BIT_NOT_EXPR:
3661       if (! check_bool_pattern (rhs1, vinfo, stmts))
3662 	return false;
3663       break;
3664 
3665     case BIT_AND_EXPR:
3666     case BIT_IOR_EXPR:
3667     case BIT_XOR_EXPR:
3668       if (! check_bool_pattern (rhs1, vinfo, stmts)
3669 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3670 	return false;
3671       break;
3672 
3673     default:
3674       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3675 	{
3676 	  tree vecitype, comp_vectype;
3677 
3678 	  /* If the comparison can throw, then is_gimple_condexpr will be
3679 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3680 	  if (stmt_could_throw_p (cfun, def_stmt))
3681 	    return false;
3682 
3683 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3684 	  if (comp_vectype == NULL_TREE)
3685 	    return false;
3686 
3687 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
3688 							  TREE_TYPE (rhs1));
3689 	  if (mask_type
3690 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3691 	    return false;
3692 
3693 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3694 	    {
3695 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3696 	      tree itype
3697 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3698 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
3699 	      if (vecitype == NULL_TREE)
3700 		return false;
3701 	    }
3702 	  else
3703 	    vecitype = comp_vectype;
3704 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3705 	    return false;
3706 	}
3707       else
3708 	return false;
3709       break;
3710     }
3711 
3712   bool res = stmts.add (def_stmt);
3713   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3714   gcc_assert (!res);
3715 
3716   return true;
3717 }
3718 
3719 
3720 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3721    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3722    pattern sequence.  */
3723 
3724 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3725 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3726 {
3727   vec_info *vinfo = stmt_info->vinfo;
3728   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3729 					   NOP_EXPR, var);
3730   append_pattern_def_seq (stmt_info, cast_stmt,
3731 			  get_vectype_for_scalar_type (vinfo, type));
3732   return gimple_assign_lhs (cast_stmt);
3733 }
3734 
3735 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3736    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3737    type, OUT_TYPE is the desired final integer type of the whole pattern.
3738    STMT_INFO is the info of the pattern root and is where pattern stmts should
3739    be associated with.  DEFS is a map of pattern defs.  */
3740 
3741 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3742 adjust_bool_pattern (tree var, tree out_type,
3743 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3744 {
3745   vec_info *vinfo = stmt_info->vinfo;
3746   gimple *stmt = SSA_NAME_DEF_STMT (var);
3747   enum tree_code rhs_code, def_rhs_code;
3748   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3749   location_t loc;
3750   gimple *pattern_stmt, *def_stmt;
3751   tree trueval = NULL_TREE;
3752 
3753   rhs1 = gimple_assign_rhs1 (stmt);
3754   rhs2 = gimple_assign_rhs2 (stmt);
3755   rhs_code = gimple_assign_rhs_code (stmt);
3756   loc = gimple_location (stmt);
3757   switch (rhs_code)
3758     {
3759     case SSA_NAME:
3760     CASE_CONVERT:
3761       irhs1 = *defs.get (rhs1);
3762       itype = TREE_TYPE (irhs1);
3763       pattern_stmt
3764 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3765 			       SSA_NAME, irhs1);
3766       break;
3767 
3768     case BIT_NOT_EXPR:
3769       irhs1 = *defs.get (rhs1);
3770       itype = TREE_TYPE (irhs1);
3771       pattern_stmt
3772 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3773 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3774       break;
3775 
3776     case BIT_AND_EXPR:
3777       /* Try to optimize x = y & (a < b ? 1 : 0); into
3778 	 x = (a < b ? y : 0);
3779 
3780 	 E.g. for:
3781 	   bool a_b, b_b, c_b;
3782 	   TYPE d_T;
3783 
3784 	   S1  a_b = x1 CMP1 y1;
3785 	   S2  b_b = x2 CMP2 y2;
3786 	   S3  c_b = a_b & b_b;
3787 	   S4  d_T = (TYPE) c_b;
3788 
3789 	 we would normally emit:
3790 
3791 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3792 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3793 	   S3'  c_T = a_T & b_T;
3794 	   S4'  d_T = c_T;
3795 
3796 	 but we can save one stmt by using the
3797 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3798 	 BIT_AND_EXPR stmt out:
3799 
3800 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3801 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3802 	   S4'  f_T = c_T;
3803 
3804 	 At least when VEC_COND_EXPR is implemented using masks
3805 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3806 	 computes the comparison masks and ands it, in one case with
3807 	 all ones vector, in the other case with a vector register.
3808 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3809 	 often more expensive.  */
3810       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3811       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3812       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3813 	{
3814 	  irhs1 = *defs.get (rhs1);
3815 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3816 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3817 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3818 	    {
3819 	      rhs_code = def_rhs_code;
3820 	      rhs1 = def_rhs1;
3821 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3822 	      trueval = irhs1;
3823 	      goto do_compare;
3824 	    }
3825 	  else
3826 	    irhs2 = *defs.get (rhs2);
3827 	  goto and_ior_xor;
3828 	}
3829       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3830       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3831       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3832 	{
3833 	  irhs2 = *defs.get (rhs2);
3834 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3835 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3836 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3837 	    {
3838 	      rhs_code = def_rhs_code;
3839 	      rhs1 = def_rhs1;
3840 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3841 	      trueval = irhs2;
3842 	      goto do_compare;
3843 	    }
3844 	  else
3845 	    irhs1 = *defs.get (rhs1);
3846 	  goto and_ior_xor;
3847 	}
3848       /* FALLTHRU */
3849     case BIT_IOR_EXPR:
3850     case BIT_XOR_EXPR:
3851       irhs1 = *defs.get (rhs1);
3852       irhs2 = *defs.get (rhs2);
3853     and_ior_xor:
3854       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3855 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3856 	{
3857 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3858 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3859 	  int out_prec = TYPE_PRECISION (out_type);
3860 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3861 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3862 					      stmt_info);
3863 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3864 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3865 					      stmt_info);
3866 	  else
3867 	    {
3868 	      irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3869 	      irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3870 	    }
3871 	}
3872       itype = TREE_TYPE (irhs1);
3873       pattern_stmt
3874 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3875 			       rhs_code, irhs1, irhs2);
3876       break;
3877 
3878     default:
3879     do_compare:
3880       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3881       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3882 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3883 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3884 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3885 	{
3886 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3887 	  itype
3888 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3889 	}
3890       else
3891 	itype = TREE_TYPE (rhs1);
3892       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3893       if (trueval == NULL_TREE)
3894 	trueval = build_int_cst (itype, 1);
3895       else
3896 	gcc_checking_assert (useless_type_conversion_p (itype,
3897 							TREE_TYPE (trueval)));
3898       pattern_stmt
3899 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3900 			       COND_EXPR, cond_expr, trueval,
3901 			       build_int_cst (itype, 0));
3902       break;
3903     }
3904 
3905   gimple_set_location (pattern_stmt, loc);
3906   append_pattern_def_seq (stmt_info, pattern_stmt,
3907 			  get_vectype_for_scalar_type (vinfo, itype));
3908   defs.put (var, gimple_assign_lhs (pattern_stmt));
3909 }
3910 
3911 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3912 
3913 static int
sort_after_uid(const void * p1,const void * p2)3914 sort_after_uid (const void *p1, const void *p2)
3915 {
3916   const gimple *stmt1 = *(const gimple * const *)p1;
3917   const gimple *stmt2 = *(const gimple * const *)p2;
3918   return gimple_uid (stmt1) - gimple_uid (stmt2);
3919 }
3920 
3921 /* Create pattern stmts for all stmts participating in the bool pattern
3922    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3923    OUT_TYPE.  Return the def of the pattern root.  */
3924 
3925 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3926 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3927 		   tree out_type, stmt_vec_info stmt_info)
3928 {
3929   /* Gather original stmts in the bool pattern in their order of appearance
3930      in the IL.  */
3931   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3932   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3933        i != bool_stmt_set.end (); ++i)
3934     bool_stmts.quick_push (*i);
3935   bool_stmts.qsort (sort_after_uid);
3936 
3937   /* Now process them in that order, producing pattern stmts.  */
3938   hash_map <tree, tree> defs;
3939   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3940     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3941 			 out_type, stmt_info, defs);
3942 
3943   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3944   gimple *pattern_stmt
3945     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3946   return gimple_assign_lhs (pattern_stmt);
3947 }
3948 
3949 /* Return the proper type for converting bool VAR into
3950    an integer value or NULL_TREE if no such type exists.
3951    The type is chosen so that the converted value has the
3952    same number of elements as VAR's vector type.  */
3953 
3954 static tree
integer_type_for_mask(tree var,vec_info * vinfo)3955 integer_type_for_mask (tree var, vec_info *vinfo)
3956 {
3957   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3958     return NULL_TREE;
3959 
3960   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3961   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3962     return NULL_TREE;
3963 
3964   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3965 }
3966 
3967 /* Function vect_recog_bool_pattern
3968 
3969    Try to find pattern like following:
3970 
3971      bool a_b, b_b, c_b, d_b, e_b;
3972      TYPE f_T;
3973    loop:
3974      S1  a_b = x1 CMP1 y1;
3975      S2  b_b = x2 CMP2 y2;
3976      S3  c_b = a_b & b_b;
3977      S4  d_b = x3 CMP3 y3;
3978      S5  e_b = c_b | d_b;
3979      S6  f_T = (TYPE) e_b;
3980 
3981    where type 'TYPE' is an integral type.  Or a similar pattern
3982    ending in
3983 
3984      S6  f_Y = e_b ? r_Y : s_Y;
3985 
3986    as results from if-conversion of a complex condition.
3987 
3988    Input:
3989 
3990    * STMT_VINFO: The stmt at the end from which the pattern
3991 		 search begins, i.e. cast of a bool to
3992 		 an integer type.
3993 
3994    Output:
3995 
3996    * TYPE_OUT: The type of the output of this pattern.
3997 
3998    * Return value: A new stmt that will be used to replace the pattern.
3999 
4000 	Assuming size of TYPE is the same as size of all comparisons
4001 	(otherwise some casts would be added where needed), the above
4002 	sequence we create related pattern stmts:
4003 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4004 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4005 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4006 	S5'  e_T = c_T | d_T;
4007 	S6'  f_T = e_T;
4008 
4009 	Instead of the above S3' we could emit:
4010 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4011 	S3'  c_T = a_T | b_T;
4012 	but the above is more efficient.  */
4013 
4014 static gimple *
vect_recog_bool_pattern(stmt_vec_info stmt_vinfo,tree * type_out)4015 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4016 {
4017   gimple *last_stmt = stmt_vinfo->stmt;
4018   enum tree_code rhs_code;
4019   tree var, lhs, rhs, vectype;
4020   vec_info *vinfo = stmt_vinfo->vinfo;
4021   gimple *pattern_stmt;
4022 
4023   if (!is_gimple_assign (last_stmt))
4024     return NULL;
4025 
4026   var = gimple_assign_rhs1 (last_stmt);
4027   lhs = gimple_assign_lhs (last_stmt);
4028   rhs_code = gimple_assign_rhs_code (last_stmt);
4029 
4030   if (rhs_code == VIEW_CONVERT_EXPR)
4031     var = TREE_OPERAND (var, 0);
4032 
4033   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4034     return NULL;
4035 
4036   hash_set<gimple *> bool_stmts;
4037 
4038   if (CONVERT_EXPR_CODE_P (rhs_code)
4039       || rhs_code == VIEW_CONVERT_EXPR)
4040     {
4041       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4042 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4043 	return NULL;
4044       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4045       if (vectype == NULL_TREE)
4046 	return NULL;
4047 
4048       if (check_bool_pattern (var, vinfo, bool_stmts))
4049 	{
4050 	  rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
4051 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4052 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4053 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4054 	  else
4055 	    pattern_stmt
4056 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
4057 	}
4058       else
4059 	{
4060 	  tree type = integer_type_for_mask (var, vinfo);
4061 	  tree cst0, cst1, tmp;
4062 
4063 	  if (!type)
4064 	    return NULL;
4065 
4066 	  /* We may directly use cond with narrowed type to avoid
4067 	     multiple cond exprs with following result packing and
4068 	     perform single cond with packed mask instead.  In case
4069 	     of widening we better make cond first and then extract
4070 	     results.  */
4071 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4072 	    type = TREE_TYPE (lhs);
4073 
4074 	  cst0 = build_int_cst (type, 0);
4075 	  cst1 = build_int_cst (type, 1);
4076 	  tmp = vect_recog_temp_ssa_var (type, NULL);
4077 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4078 
4079 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4080 	    {
4081 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4082 	      append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4083 
4084 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4085 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4086 	    }
4087 	}
4088 
4089       *type_out = vectype;
4090       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4091 
4092       return pattern_stmt;
4093     }
4094   else if (rhs_code == COND_EXPR
4095 	   && TREE_CODE (var) == SSA_NAME)
4096     {
4097       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4098       if (vectype == NULL_TREE)
4099 	return NULL;
4100 
4101       /* Build a scalar type for the boolean result that when
4102          vectorized matches the vector type of the result in
4103 	 size and number of elements.  */
4104       unsigned prec
4105 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4106 			       TYPE_VECTOR_SUBPARTS (vectype));
4107 
4108       tree type
4109 	= build_nonstandard_integer_type (prec,
4110 					  TYPE_UNSIGNED (TREE_TYPE (var)));
4111       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4112 	return NULL;
4113 
4114       if (!check_bool_pattern (var, vinfo, bool_stmts))
4115 	return NULL;
4116 
4117       rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
4118 
4119       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4120       pattern_stmt
4121 	  = gimple_build_assign (lhs, COND_EXPR,
4122 				 build2 (NE_EXPR, boolean_type_node,
4123 					 rhs, build_int_cst (type, 0)),
4124 				 gimple_assign_rhs2 (last_stmt),
4125 				 gimple_assign_rhs3 (last_stmt));
4126       *type_out = vectype;
4127       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4128 
4129       return pattern_stmt;
4130     }
4131   else if (rhs_code == SSA_NAME
4132 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
4133     {
4134       stmt_vec_info pattern_stmt_info;
4135       tree nunits_vectype;
4136       if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
4137 					   &nunits_vectype)
4138 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4139 	return NULL;
4140 
4141       if (check_bool_pattern (var, vinfo, bool_stmts))
4142 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
4143       else
4144 	{
4145 	  tree type = integer_type_for_mask (var, vinfo);
4146 	  tree cst0, cst1, new_vectype;
4147 
4148 	  if (!type)
4149 	    return NULL;
4150 
4151 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4152 	    type = TREE_TYPE (vectype);
4153 
4154 	  cst0 = build_int_cst (type, 0);
4155 	  cst1 = build_int_cst (type, 1);
4156 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
4157 
4158 	  rhs = vect_recog_temp_ssa_var (type, NULL);
4159 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4160 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4161 	}
4162 
4163       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4164       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4165 	{
4166 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4167 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4168 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
4169 	  rhs = rhs2;
4170 	}
4171       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4172       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4173       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4174       *type_out = vectype;
4175       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4176 
4177       return pattern_stmt;
4178     }
4179   else
4180     return NULL;
4181 }
4182 
4183 
4184 /* A helper for vect_recog_mask_conversion_pattern.  Build
4185    conversion of MASK to a type suitable for masking VECTYPE.
4186    Built statement gets required vectype and is appended to
4187    a pattern sequence of STMT_VINFO.
4188 
4189    Return converted mask.  */
4190 
4191 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo)4192 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4193 {
4194   gimple *stmt;
4195   tree masktype, tmp;
4196 
4197   masktype = truth_type_for (vectype);
4198   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4199   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4200   append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
4201 
4202   return tmp;
4203 }
4204 
4205 
4206 /* Function vect_recog_mask_conversion_pattern
4207 
4208    Try to find statements which require boolean type
4209    converison.  Additional conversion statements are
4210    added to handle such cases.  For example:
4211 
4212    bool m_1, m_2, m_3;
4213    int i_4, i_5;
4214    double d_6, d_7;
4215    char c_1, c_2, c_3;
4216 
4217    S1   m_1 = i_4 > i_5;
4218    S2   m_2 = d_6 < d_7;
4219    S3   m_3 = m_1 & m_2;
4220    S4   c_1 = m_3 ? c_2 : c_3;
4221 
4222    Will be transformed into:
4223 
4224    S1   m_1 = i_4 > i_5;
4225    S2   m_2 = d_6 < d_7;
4226    S3'' m_2' = (_Bool[bitsize=32])m_2
4227    S3'  m_3' = m_1 & m_2';
4228    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4229    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4230 
4231 static gimple *
vect_recog_mask_conversion_pattern(stmt_vec_info stmt_vinfo,tree * type_out)4232 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4233 {
4234   gimple *last_stmt = stmt_vinfo->stmt;
4235   enum tree_code rhs_code;
4236   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4237   tree vectype1, vectype2;
4238   stmt_vec_info pattern_stmt_info;
4239   vec_info *vinfo = stmt_vinfo->vinfo;
4240 
4241   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4242   if (is_gimple_call (last_stmt)
4243       && gimple_call_internal_p (last_stmt))
4244     {
4245       gcall *pattern_stmt;
4246 
4247       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4248       int mask_argno = internal_fn_mask_index (ifn);
4249       if (mask_argno < 0)
4250 	return NULL;
4251 
4252       bool store_p = internal_store_fn_p (ifn);
4253       if (store_p)
4254 	{
4255 	  int rhs_index = internal_fn_stored_value_index (ifn);
4256 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
4257 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4258 	}
4259       else
4260 	{
4261 	  lhs = gimple_call_lhs (last_stmt);
4262 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4263 	}
4264 
4265       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4266       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4267       if (!mask_arg_type)
4268 	return NULL;
4269       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4270 
4271       if (!vectype1 || !vectype2
4272 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4273 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4274 	return NULL;
4275 
4276       tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4277 
4278       auto_vec<tree, 8> args;
4279       unsigned int nargs = gimple_call_num_args (last_stmt);
4280       args.safe_grow (nargs);
4281       for (unsigned int i = 0; i < nargs; ++i)
4282 	args[i] = ((int) i == mask_argno
4283 		   ? tmp
4284 		   : gimple_call_arg (last_stmt, i));
4285       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4286 
4287       if (!store_p)
4288 	{
4289 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4290 	  gimple_call_set_lhs (pattern_stmt, lhs);
4291 	}
4292       gimple_call_set_nothrow (pattern_stmt, true);
4293 
4294       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4295       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4296 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4297 
4298       *type_out = vectype1;
4299       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4300 
4301       return pattern_stmt;
4302     }
4303 
4304   if (!is_gimple_assign (last_stmt))
4305     return NULL;
4306 
4307   gimple *pattern_stmt;
4308   lhs = gimple_assign_lhs (last_stmt);
4309   rhs1 = gimple_assign_rhs1 (last_stmt);
4310   rhs_code = gimple_assign_rhs_code (last_stmt);
4311 
4312   /* Check for cond expression requiring mask conversion.  */
4313   if (rhs_code == COND_EXPR)
4314     {
4315       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4316 
4317       if (TREE_CODE (rhs1) == SSA_NAME)
4318 	{
4319 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
4320 	  if (!rhs1_type)
4321 	    return NULL;
4322 	}
4323       else if (COMPARISON_CLASS_P (rhs1))
4324 	{
4325 	  /* Check whether we're comparing scalar booleans and (if so)
4326 	     whether a better mask type exists than the mask associated
4327 	     with boolean-sized elements.  This avoids unnecessary packs
4328 	     and unpacks if the booleans are set from comparisons of
4329 	     wider types.  E.g. in:
4330 
4331 	       int x1, x2, x3, x4, y1, y1;
4332 	       ...
4333 	       bool b1 = (x1 == x2);
4334 	       bool b2 = (x3 == x4);
4335 	       ... = b1 == b2 ? y1 : y2;
4336 
4337 	     it is better for b1 and b2 to use the mask type associated
4338 	     with int elements rather bool (byte) elements.  */
4339 	  rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4340 	  if (!rhs1_type)
4341 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4342 	}
4343       else
4344 	return NULL;
4345 
4346       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4347 
4348       if (!vectype1 || !vectype2)
4349 	return NULL;
4350 
4351       /* Continue if a conversion is needed.  Also continue if we have
4352 	 a comparison whose vector type would normally be different from
4353 	 VECTYPE2 when considered in isolation.  In that case we'll
4354 	 replace the comparison with an SSA name (so that we can record
4355 	 its vector type) and behave as though the comparison was an SSA
4356 	 name from the outset.  */
4357       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4358 		    TYPE_VECTOR_SUBPARTS (vectype2))
4359 	  && (TREE_CODE (rhs1) == SSA_NAME
4360 	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4361 	return NULL;
4362 
4363       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4364          in place, we can handle it in vectorizable_condition.  This avoids
4365 	 unnecessary promotion stmts and increased vectorization factor.  */
4366       if (COMPARISON_CLASS_P (rhs1)
4367 	  && INTEGRAL_TYPE_P (rhs1_type)
4368 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4369 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4370 	{
4371 	  enum vect_def_type dt;
4372 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4373 	      && dt == vect_external_def
4374 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4375 	      && (dt == vect_external_def
4376 		  || dt == vect_constant_def))
4377 	    {
4378 	      tree wide_scalar_type = build_nonstandard_integer_type
4379 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4380 		 TYPE_UNSIGNED (rhs1_type));
4381 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
4382 							   wide_scalar_type);
4383 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4384 		return NULL;
4385 	    }
4386 	}
4387 
4388       /* If rhs1 is a comparison we need to move it into a
4389 	 separate statement.  */
4390       if (TREE_CODE (rhs1) != SSA_NAME)
4391 	{
4392 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4393 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
4394 	  rhs1 = tmp;
4395 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
4396 				  rhs1_type);
4397 	}
4398 
4399       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4400 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4401 	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4402       else
4403 	tmp = rhs1;
4404 
4405       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4406       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4407 					  gimple_assign_rhs2 (last_stmt),
4408 					  gimple_assign_rhs3 (last_stmt));
4409 
4410       *type_out = vectype1;
4411       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4412 
4413       return pattern_stmt;
4414     }
4415 
4416   /* Now check for binary boolean operations requiring conversion for
4417      one of operands.  */
4418   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4419     return NULL;
4420 
4421   if (rhs_code != BIT_IOR_EXPR
4422       && rhs_code != BIT_XOR_EXPR
4423       && rhs_code != BIT_AND_EXPR
4424       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4425     return NULL;
4426 
4427   rhs2 = gimple_assign_rhs2 (last_stmt);
4428 
4429   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4430   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4431 
4432   if (!rhs1_type || !rhs2_type
4433       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4434     return NULL;
4435 
4436   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4437     {
4438       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4439       if (!vectype1)
4440 	return NULL;
4441       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4442     }
4443   else
4444     {
4445       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4446       if (!vectype1)
4447 	return NULL;
4448       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4449     }
4450 
4451   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4452   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4453 
4454   *type_out = vectype1;
4455   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4456 
4457   return pattern_stmt;
4458 }
4459 
4460 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4461    the boolean condition under which it occurs, otherwise return null.  */
4462 
4463 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4464 vect_get_load_store_mask (stmt_vec_info stmt_info)
4465 {
4466   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4467     {
4468       gcc_assert (gimple_assign_single_p (def_assign));
4469       return NULL_TREE;
4470     }
4471 
4472   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4473     {
4474       internal_fn ifn = gimple_call_internal_fn (def_call);
4475       int mask_index = internal_fn_mask_index (ifn);
4476       return gimple_call_arg (def_call, mask_index);
4477     }
4478 
4479   gcc_unreachable ();
4480 }
4481 
4482 /* Return MASK if MASK is suitable for masking an operation on vectors
4483    of type VECTYPE, otherwise convert it into such a form and return
4484    the result.  Associate any conversion statements with STMT_INFO's
4485    pattern.  */
4486 
4487 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4488 vect_convert_mask_for_vectype (tree mask, tree vectype,
4489 			       stmt_vec_info stmt_info, vec_info *vinfo)
4490 {
4491   tree mask_type = integer_type_for_mask (mask, vinfo);
4492   if (mask_type)
4493     {
4494       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4495       if (mask_vectype
4496 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4497 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4498 	mask = build_mask_conversion (mask, vectype, stmt_info);
4499     }
4500   return mask;
4501 }
4502 
4503 /* Return the equivalent of:
4504 
4505      fold_convert (TYPE, VALUE)
4506 
4507    with the expectation that the operation will be vectorized.
4508    If new statements are needed, add them as pattern statements
4509    to STMT_INFO.  */
4510 
4511 static tree
vect_add_conversion_to_pattern(tree type,tree value,stmt_vec_info stmt_info)4512 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4513 {
4514   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4515     return value;
4516 
4517   vec_info *vinfo = stmt_info->vinfo;
4518   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4519   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4520   append_pattern_def_seq (stmt_info, conversion,
4521 			  get_vectype_for_scalar_type (vinfo, type));
4522   return new_value;
4523 }
4524 
4525 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4526    internal function.  Return the final statement on success and set
4527    *TYPE_OUT to the vector type being loaded or stored.
4528 
4529    This function only handles gathers and scatters that were recognized
4530    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4531 
4532 static gimple *
vect_recog_gather_scatter_pattern(stmt_vec_info stmt_info,tree * type_out)4533 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4534 {
4535   /* Currently we only support this for loop vectorization.  */
4536   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4537   if (!loop_vinfo)
4538     return NULL;
4539 
4540   /* Make sure that we're looking at a gather load or scatter store.  */
4541   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4542   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4543     return NULL;
4544 
4545   /* Get the boolean that controls whether the load or store happens.
4546      This is null if the operation is unconditional.  */
4547   tree mask = vect_get_load_store_mask (stmt_info);
4548 
4549   /* Make sure that the target supports an appropriate internal
4550      function for the gather/scatter operation.  */
4551   gather_scatter_info gs_info;
4552   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4553       || gs_info.decl)
4554     return NULL;
4555 
4556   /* Convert the mask to the right form.  */
4557   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4558 						 gs_info.element_type);
4559   if (mask)
4560     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4561 					  loop_vinfo);
4562 
4563   /* Get the invariant base and non-invariant offset, converting the
4564      latter to the same width as the vector elements.  */
4565   tree base = gs_info.base;
4566   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4567   tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4568 						stmt_info);
4569 
4570   /* Build the new pattern statement.  */
4571   tree scale = size_int (gs_info.scale);
4572   gcall *pattern_stmt;
4573   if (DR_IS_READ (dr))
4574     {
4575       tree zero = build_zero_cst (gs_info.element_type);
4576       if (mask != NULL)
4577 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4578 						   offset, scale, zero, mask);
4579       else
4580 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4581 						   offset, scale, zero);
4582       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4583       gimple_call_set_lhs (pattern_stmt, load_lhs);
4584     }
4585   else
4586     {
4587       tree rhs = vect_get_store_rhs (stmt_info);
4588       if (mask != NULL)
4589 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4590 						   base, offset, scale, rhs,
4591 						   mask);
4592       else
4593 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4594 						   base, offset, scale, rhs);
4595     }
4596   gimple_call_set_nothrow (pattern_stmt, true);
4597 
4598   /* Copy across relevant vectorization info and associate DR with the
4599      new pattern statement instead of the original statement.  */
4600   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4601   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4602 
4603   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4604   *type_out = vectype;
4605   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4606 
4607   return pattern_stmt;
4608 }
4609 
4610 /* Return true if TYPE is a non-boolean integer type.  These are the types
4611    that we want to consider for narrowing.  */
4612 
4613 static bool
vect_narrowable_type_p(tree type)4614 vect_narrowable_type_p (tree type)
4615 {
4616   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4617 }
4618 
4619 /* Return true if the operation given by CODE can be truncated to N bits
4620    when only N bits of the output are needed.  This is only true if bit N+1
4621    of the inputs has no effect on the low N bits of the result.  */
4622 
4623 static bool
vect_truncatable_operation_p(tree_code code)4624 vect_truncatable_operation_p (tree_code code)
4625 {
4626   switch (code)
4627     {
4628     case PLUS_EXPR:
4629     case MINUS_EXPR:
4630     case MULT_EXPR:
4631     case BIT_AND_EXPR:
4632     case BIT_IOR_EXPR:
4633     case BIT_XOR_EXPR:
4634     case COND_EXPR:
4635       return true;
4636 
4637     default:
4638       return false;
4639     }
4640 }
4641 
4642 /* Record that STMT_INFO could be changed from operating on TYPE to
4643    operating on a type with the precision and sign given by PRECISION
4644    and SIGN respectively.  PRECISION is an arbitrary bit precision;
4645    it might not be a whole number of bytes.  */
4646 
4647 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4648 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4649 			 unsigned int precision, signop sign)
4650 {
4651   /* Round the precision up to a whole number of bytes.  */
4652   precision = vect_element_precision (precision);
4653   if (precision < TYPE_PRECISION (type)
4654       && (!stmt_info->operation_precision
4655 	  || stmt_info->operation_precision > precision))
4656     {
4657       stmt_info->operation_precision = precision;
4658       stmt_info->operation_sign = sign;
4659     }
4660 }
4661 
4662 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4663    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
4664    is an arbitrary bit precision; it might not be a whole number of bytes.  */
4665 
4666 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4667 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4668 			      unsigned int min_input_precision)
4669 {
4670   /* This operation in isolation only requires the inputs to have
4671      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
4672      that MIN_INPUT_PRECISION is a natural precision for the chain
4673      as a whole.  E.g. consider something like:
4674 
4675 	 unsigned short *x, *y;
4676 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4677 
4678      The right shift can be done on unsigned chars, and only requires the
4679      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
4680      approach would mean turning a natural chain of single-vector unsigned
4681      short operations into one that truncates "*x" and then extends
4682      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4683      operation and one vector for each unsigned char operation.
4684      This would be a significant pessimization.
4685 
4686      Instead only propagate the maximum of this precision and the precision
4687      required by the users of the result.  This means that we don't pessimize
4688      the case above but continue to optimize things like:
4689 
4690 	 unsigned char *y;
4691 	 unsigned short *x;
4692 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4693 
4694      Here we would truncate two vectors of *x to a single vector of
4695      unsigned chars and use single-vector unsigned char operations for
4696      everything else, rather than doing two unsigned short copies of
4697      "(*x & 0xf0) >> 4" and then truncating the result.  */
4698   min_input_precision = MAX (min_input_precision,
4699 			     stmt_info->min_output_precision);
4700 
4701   if (min_input_precision < TYPE_PRECISION (type)
4702       && (!stmt_info->min_input_precision
4703 	  || stmt_info->min_input_precision > min_input_precision))
4704     stmt_info->min_input_precision = min_input_precision;
4705 }
4706 
4707 /* Subroutine of vect_determine_min_output_precision.  Return true if
4708    we can calculate a reduced number of output bits for STMT_INFO,
4709    whose result is LHS.  */
4710 
4711 static bool
vect_determine_min_output_precision_1(stmt_vec_info stmt_info,tree lhs)4712 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4713 {
4714   /* Take the maximum precision required by users of the result.  */
4715   vec_info *vinfo = stmt_info->vinfo;
4716   unsigned int precision = 0;
4717   imm_use_iterator iter;
4718   use_operand_p use;
4719   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4720     {
4721       gimple *use_stmt = USE_STMT (use);
4722       if (is_gimple_debug (use_stmt))
4723 	continue;
4724       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4725       if (!use_stmt_info || !use_stmt_info->min_input_precision)
4726 	return false;
4727       /* The input precision recorded for COND_EXPRs applies only to the
4728 	 "then" and "else" values.  */
4729       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4730       if (assign
4731 	  && gimple_assign_rhs_code (assign) == COND_EXPR
4732 	  && use->use != gimple_assign_rhs2_ptr (assign)
4733 	  && use->use != gimple_assign_rhs3_ptr (assign))
4734 	return false;
4735       precision = MAX (precision, use_stmt_info->min_input_precision);
4736     }
4737 
4738   if (dump_enabled_p ())
4739     dump_printf_loc (MSG_NOTE, vect_location,
4740 		     "only the low %d bits of %T are significant\n",
4741 		     precision, lhs);
4742   stmt_info->min_output_precision = precision;
4743   return true;
4744 }
4745 
4746 /* Calculate min_output_precision for STMT_INFO.  */
4747 
4748 static void
vect_determine_min_output_precision(stmt_vec_info stmt_info)4749 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4750 {
4751   /* We're only interested in statements with a narrowable result.  */
4752   tree lhs = gimple_get_lhs (stmt_info->stmt);
4753   if (!lhs
4754       || TREE_CODE (lhs) != SSA_NAME
4755       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4756     return;
4757 
4758   if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4759     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4760 }
4761 
4762 /* Use range information to decide whether STMT (described by STMT_INFO)
4763    could be done in a narrower type.  This is effectively a forward
4764    propagation, since it uses context-independent information that applies
4765    to all users of an SSA name.  */
4766 
4767 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4768 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4769 {
4770   tree lhs = gimple_assign_lhs (stmt);
4771   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4772     return;
4773 
4774   tree type = TREE_TYPE (lhs);
4775   if (!vect_narrowable_type_p (type))
4776     return;
4777 
4778   /* First see whether we have any useful range information for the result.  */
4779   unsigned int precision = TYPE_PRECISION (type);
4780   signop sign = TYPE_SIGN (type);
4781   wide_int min_value, max_value;
4782   if (!vect_get_range_info (lhs, &min_value, &max_value))
4783     return;
4784 
4785   tree_code code = gimple_assign_rhs_code (stmt);
4786   unsigned int nops = gimple_num_ops (stmt);
4787 
4788   if (!vect_truncatable_operation_p (code))
4789     /* Check that all relevant input operands are compatible, and update
4790        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
4791     for (unsigned int i = 1; i < nops; ++i)
4792       {
4793 	tree op = gimple_op (stmt, i);
4794 	if (TREE_CODE (op) == INTEGER_CST)
4795 	  {
4796 	    /* Don't require the integer to have RHS_TYPE (which it might
4797 	       not for things like shift amounts, etc.), but do require it
4798 	       to fit the type.  */
4799 	    if (!int_fits_type_p (op, type))
4800 	      return;
4801 
4802 	    min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4803 	    max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4804 	  }
4805 	else if (TREE_CODE (op) == SSA_NAME)
4806 	  {
4807 	    /* Ignore codes that don't take uniform arguments.  */
4808 	    if (!types_compatible_p (TREE_TYPE (op), type))
4809 	      return;
4810 
4811 	    wide_int op_min_value, op_max_value;
4812 	    if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4813 	      return;
4814 
4815 	    min_value = wi::min (min_value, op_min_value, sign);
4816 	    max_value = wi::max (max_value, op_max_value, sign);
4817 	  }
4818 	else
4819 	  return;
4820       }
4821 
4822   /* Try to switch signed types for unsigned types if we can.
4823      This is better for two reasons.  First, unsigned ops tend
4824      to be cheaper than signed ops.  Second, it means that we can
4825      handle things like:
4826 
4827 	signed char c;
4828 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4829 
4830      as:
4831 
4832 	signed char c;
4833 	unsigned short res_1 = (unsigned short) c & 0xff00;
4834 	int res = (int) res_1;
4835 
4836      where the intermediate result res_1 has unsigned rather than
4837      signed type.  */
4838   if (sign == SIGNED && !wi::neg_p (min_value))
4839     sign = UNSIGNED;
4840 
4841   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
4842   unsigned int precision1 = wi::min_precision (min_value, sign);
4843   unsigned int precision2 = wi::min_precision (max_value, sign);
4844   unsigned int value_precision = MAX (precision1, precision2);
4845   if (value_precision >= precision)
4846     return;
4847 
4848   if (dump_enabled_p ())
4849     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4850 		     " without loss of precision: %G",
4851 		     sign == SIGNED ? "signed" : "unsigned",
4852 		     value_precision, stmt);
4853 
4854   vect_set_operation_type (stmt_info, type, value_precision, sign);
4855   vect_set_min_input_precision (stmt_info, type, value_precision);
4856 }
4857 
4858 /* Use information about the users of STMT's result to decide whether
4859    STMT (described by STMT_INFO) could be done in a narrower type.
4860    This is effectively a backward propagation.  */
4861 
4862 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4863 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4864 {
4865   tree_code code = gimple_assign_rhs_code (stmt);
4866   unsigned int opno = (code == COND_EXPR ? 2 : 1);
4867   tree type = TREE_TYPE (gimple_op (stmt, opno));
4868   if (!vect_narrowable_type_p (type))
4869     return;
4870 
4871   unsigned int precision = TYPE_PRECISION (type);
4872   unsigned int operation_precision, min_input_precision;
4873   switch (code)
4874     {
4875     CASE_CONVERT:
4876       /* Only the bits that contribute to the output matter.  Don't change
4877 	 the precision of the operation itself.  */
4878       operation_precision = precision;
4879       min_input_precision = stmt_info->min_output_precision;
4880       break;
4881 
4882     case LSHIFT_EXPR:
4883     case RSHIFT_EXPR:
4884       {
4885 	tree shift = gimple_assign_rhs2 (stmt);
4886 	if (TREE_CODE (shift) != INTEGER_CST
4887 	    || !wi::ltu_p (wi::to_widest (shift), precision))
4888 	  return;
4889 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
4890 	if (code == LSHIFT_EXPR)
4891 	  {
4892 	    /* Avoid creating an undefined shift.
4893 
4894 	       ??? We could instead use min_output_precision as-is and
4895 	       optimize out-of-range shifts to zero.  However, only
4896 	       degenerate testcases shift away all their useful input data,
4897 	       and it isn't natural to drop input operations in the middle
4898 	       of vectorization.  This sort of thing should really be
4899 	       handled before vectorization.  */
4900 	    operation_precision = MAX (stmt_info->min_output_precision,
4901 				       const_shift + 1);
4902 	    /* We need CONST_SHIFT fewer bits of the input.  */
4903 	    min_input_precision = (MAX (operation_precision, const_shift)
4904 				   - const_shift);
4905 	  }
4906 	else
4907 	  {
4908 	    /* We need CONST_SHIFT extra bits to do the operation.  */
4909 	    operation_precision = (stmt_info->min_output_precision
4910 				   + const_shift);
4911 	    min_input_precision = operation_precision;
4912 	  }
4913 	break;
4914       }
4915 
4916     default:
4917       if (vect_truncatable_operation_p (code))
4918 	{
4919 	  /* Input bit N has no effect on output bits N-1 and lower.  */
4920 	  operation_precision = stmt_info->min_output_precision;
4921 	  min_input_precision = operation_precision;
4922 	  break;
4923 	}
4924       return;
4925     }
4926 
4927   if (operation_precision < precision)
4928     {
4929       if (dump_enabled_p ())
4930 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4931 			 " without affecting users: %G",
4932 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4933 			 operation_precision, stmt);
4934       vect_set_operation_type (stmt_info, type, operation_precision,
4935 			       TYPE_SIGN (type));
4936     }
4937   vect_set_min_input_precision (stmt_info, type, min_input_precision);
4938 }
4939 
4940 /* Return true if the statement described by STMT_INFO sets a boolean
4941    SSA_NAME and if we know how to vectorize this kind of statement using
4942    vector mask types.  */
4943 
4944 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)4945 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4946 {
4947   tree lhs = gimple_get_lhs (stmt_info->stmt);
4948   if (!lhs
4949       || TREE_CODE (lhs) != SSA_NAME
4950       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4951     return false;
4952 
4953   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4954     {
4955       tree_code rhs_code = gimple_assign_rhs_code (assign);
4956       switch (rhs_code)
4957 	{
4958 	CASE_CONVERT:
4959 	case SSA_NAME:
4960 	case BIT_NOT_EXPR:
4961 	case BIT_IOR_EXPR:
4962 	case BIT_XOR_EXPR:
4963 	case BIT_AND_EXPR:
4964 	  return true;
4965 
4966 	default:
4967 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
4968 	}
4969     }
4970   return false;
4971 }
4972 
4973 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
4974    a vector mask type instead of a normal vector type.  Record the
4975    result in STMT_INFO->mask_precision.  */
4976 
4977 static void
vect_determine_mask_precision(stmt_vec_info stmt_info)4978 vect_determine_mask_precision (stmt_vec_info stmt_info)
4979 {
4980   vec_info *vinfo = stmt_info->vinfo;
4981 
4982   if (!possible_vector_mask_operation_p (stmt_info)
4983       || stmt_info->mask_precision)
4984     return;
4985 
4986   auto_vec<stmt_vec_info, 32> worklist;
4987   worklist.quick_push (stmt_info);
4988   while (!worklist.is_empty ())
4989     {
4990       stmt_info = worklist.last ();
4991       unsigned int orig_length = worklist.length ();
4992 
4993       /* If at least one boolean input uses a vector mask type,
4994 	 pick the mask type with the narrowest elements.
4995 
4996 	 ??? This is the traditional behavior.  It should always produce
4997 	 the smallest number of operations, but isn't necessarily the
4998 	 optimal choice.  For example, if we have:
4999 
5000 	   a = b & c
5001 
5002 	 where:
5003 
5004 	 - the user of a wants it to have a mask type for 16-bit elements (M16)
5005 	 - b also uses M16
5006 	 - c uses a mask type for 8-bit elements (M8)
5007 
5008 	 then picking M8 gives:
5009 
5010 	 - 1 M16->M8 pack for b
5011 	 - 1 M8 AND for a
5012 	 - 2 M8->M16 unpacks for the user of a
5013 
5014 	 whereas picking M16 would have given:
5015 
5016 	 - 2 M8->M16 unpacks for c
5017 	 - 2 M16 ANDs for a
5018 
5019 	 The number of operations are equal, but M16 would have given
5020 	 a shorter dependency chain and allowed more ILP.  */
5021       unsigned int precision = ~0U;
5022       gassign *assign = as_a <gassign *> (stmt_info->stmt);
5023       unsigned int nops = gimple_num_ops (assign);
5024       for (unsigned int i = 1; i < nops; ++i)
5025 	{
5026 	  tree rhs = gimple_op (assign, i);
5027 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5028 	    continue;
5029 
5030 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5031 	  if (!def_stmt_info)
5032 	    /* Don't let external or constant operands influence the choice.
5033 	       We can convert them to whichever vector type we pick.  */
5034 	    continue;
5035 
5036 	  if (def_stmt_info->mask_precision)
5037 	    {
5038 	      if (precision > def_stmt_info->mask_precision)
5039 		precision = def_stmt_info->mask_precision;
5040 	    }
5041 	  else if (possible_vector_mask_operation_p (def_stmt_info))
5042 	    worklist.safe_push (def_stmt_info);
5043 	}
5044 
5045       /* Defer the choice if we need to visit operands first.  */
5046       if (orig_length != worklist.length ())
5047 	continue;
5048 
5049       /* If the statement compares two values that shouldn't use vector masks,
5050 	 try comparing the values as normal scalars instead.  */
5051       tree_code rhs_code = gimple_assign_rhs_code (assign);
5052       if (precision == ~0U
5053 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5054 	{
5055 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5056 	  scalar_mode mode;
5057 	  tree vectype, mask_type;
5058 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5059 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5060 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5061 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5062 	    precision = GET_MODE_BITSIZE (mode);
5063 	}
5064 
5065       if (dump_enabled_p ())
5066 	{
5067 	  if (precision == ~0U)
5068 	    dump_printf_loc (MSG_NOTE, vect_location,
5069 			     "using normal nonmask vectors for %G",
5070 			     stmt_info->stmt);
5071 	  else
5072 	    dump_printf_loc (MSG_NOTE, vect_location,
5073 			     "using boolean precision %d for %G",
5074 			     precision, stmt_info->stmt);
5075 	}
5076 
5077       stmt_info->mask_precision = precision;
5078       worklist.pop ();
5079     }
5080 }
5081 
5082 /* Handle vect_determine_precisions for STMT_INFO, given that we
5083    have already done so for the users of its result.  */
5084 
5085 void
vect_determine_stmt_precisions(stmt_vec_info stmt_info)5086 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
5087 {
5088   vect_determine_min_output_precision (stmt_info);
5089   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5090     {
5091       vect_determine_precisions_from_range (stmt_info, stmt);
5092       vect_determine_precisions_from_users (stmt_info, stmt);
5093     }
5094   vect_determine_mask_precision (stmt_info);
5095 }
5096 
5097 /* Walk backwards through the vectorizable region to determine the
5098    values of these fields:
5099 
5100    - min_output_precision
5101    - min_input_precision
5102    - operation_precision
5103    - operation_sign.  */
5104 
5105 void
vect_determine_precisions(vec_info * vinfo)5106 vect_determine_precisions (vec_info *vinfo)
5107 {
5108   DUMP_VECT_SCOPE ("vect_determine_precisions");
5109 
5110   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5111     {
5112       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5113       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5114       unsigned int nbbs = loop->num_nodes;
5115 
5116       for (unsigned int i = 0; i < nbbs; i++)
5117 	{
5118 	  basic_block bb = bbs[nbbs - i - 1];
5119 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
5120 	       !gsi_end_p (si); gsi_prev (&si))
5121 	    vect_determine_stmt_precisions
5122 	      (vinfo->lookup_stmt (gsi_stmt (si)));
5123 	}
5124     }
5125   else
5126     {
5127       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5128       gimple_stmt_iterator si = bb_vinfo->region_end;
5129       gimple *stmt;
5130       do
5131 	{
5132 	  if (!gsi_stmt (si))
5133 	    si = gsi_last_bb (bb_vinfo->bb);
5134 	  else
5135 	    gsi_prev (&si);
5136 	  stmt = gsi_stmt (si);
5137 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5138 	  if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5139 	    vect_determine_stmt_precisions (stmt_info);
5140 	}
5141       while (stmt != gsi_stmt (bb_vinfo->region_begin));
5142     }
5143 }
5144 
5145 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
5146 
5147 struct vect_recog_func
5148 {
5149   vect_recog_func_ptr fn;
5150   const char *name;
5151 };
5152 
5153 /* Note that ordering matters - the first pattern matching on a stmt is
5154    taken which means usually the more complex one needs to preceed the
5155    less comples onex (widen_sum only after dot_prod or sad for example).  */
5156 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5157   { vect_recog_over_widening_pattern, "over_widening" },
5158   /* Must come after over_widening, which narrows the shift as much as
5159      possible beforehand.  */
5160   { vect_recog_average_pattern, "average" },
5161   { vect_recog_mulhs_pattern, "mult_high" },
5162   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5163   { vect_recog_widen_mult_pattern, "widen_mult" },
5164   { vect_recog_dot_prod_pattern, "dot_prod" },
5165   { vect_recog_sad_pattern, "sad" },
5166   { vect_recog_widen_sum_pattern, "widen_sum" },
5167   { vect_recog_pow_pattern, "pow" },
5168   { vect_recog_widen_shift_pattern, "widen_shift" },
5169   { vect_recog_rotate_pattern, "rotate" },
5170   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5171   { vect_recog_divmod_pattern, "divmod" },
5172   { vect_recog_mult_pattern, "mult" },
5173   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5174   { vect_recog_bool_pattern, "bool" },
5175   /* This must come before mask conversion, and includes the parts
5176      of mask conversion that are needed for gather and scatter
5177      internal functions.  */
5178   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5179   { vect_recog_mask_conversion_pattern, "mask_conversion" }
5180 };
5181 
5182 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5183 
5184 /* Mark statements that are involved in a pattern.  */
5185 
5186 static inline void
vect_mark_pattern_stmts(stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5187 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5188                          tree pattern_vectype)
5189 {
5190   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5191   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5192 
5193   gimple *orig_pattern_stmt = NULL;
5194   if (is_pattern_stmt_p (orig_stmt_info))
5195     {
5196       /* We're replacing a statement in an existing pattern definition
5197 	 sequence.  */
5198       orig_pattern_stmt = orig_stmt_info->stmt;
5199       if (dump_enabled_p ())
5200 	dump_printf_loc (MSG_NOTE, vect_location,
5201 			 "replacing earlier pattern %G", orig_pattern_stmt);
5202 
5203       /* To keep the book-keeping simple, just swap the lhs of the
5204 	 old and new statements, so that the old one has a valid but
5205 	 unused lhs.  */
5206       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5207       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5208       gimple_set_lhs (pattern_stmt, old_lhs);
5209 
5210       if (dump_enabled_p ())
5211 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5212 
5213       /* Switch to the statement that ORIG replaces.  */
5214       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5215 
5216       /* We shouldn't be replacing the main pattern statement.  */
5217       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5218 		  != orig_pattern_stmt);
5219     }
5220 
5221   if (def_seq)
5222     for (gimple_stmt_iterator si = gsi_start (def_seq);
5223 	 !gsi_end_p (si); gsi_next (&si))
5224       {
5225 	if (dump_enabled_p ())
5226 	  dump_printf_loc (MSG_NOTE, vect_location,
5227 			   "extra pattern stmt: %G", gsi_stmt (si));
5228 	stmt_vec_info pattern_stmt_info
5229 	  = vect_init_pattern_stmt (gsi_stmt (si),
5230 				    orig_stmt_info, pattern_vectype);
5231 	/* Stmts in the def sequence are not vectorizable cycle or
5232 	   induction defs, instead they should all be vect_internal_def
5233 	   feeding the main pattern stmt which retains this def type.  */
5234 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5235       }
5236 
5237   if (orig_pattern_stmt)
5238     {
5239       vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5240 
5241       /* Insert all the new pattern statements before the original one.  */
5242       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5243       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5244 					       orig_def_seq);
5245       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5246       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5247 
5248       /* Remove the pattern statement that this new pattern replaces.  */
5249       gsi_remove (&gsi, false);
5250     }
5251   else
5252     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5253 
5254   /* Transfer reduction path info to the pattern.  */
5255   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5256     {
5257       vec_info *vinfo = orig_stmt_info_saved->vinfo;
5258       tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5259 				1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5260       /* Search the pattern def sequence and the main pattern stmt.  Note
5261          we may have inserted all into a containing pattern def sequence
5262 	 so the following is a bit awkward.  */
5263       gimple_stmt_iterator si;
5264       gimple *s;
5265       if (def_seq)
5266 	{
5267 	  si = gsi_start (def_seq);
5268 	  s = gsi_stmt (si);
5269 	  gsi_next (&si);
5270 	}
5271       else
5272 	{
5273 	  si = gsi_none ();
5274 	  s = pattern_stmt;
5275 	}
5276       do
5277 	{
5278 	  bool found = false;
5279 	  for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5280 	    if (gimple_op (s, i) == lookfor)
5281 	      {
5282 		STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5283 		lookfor = gimple_get_lhs (s);
5284 		found = true;
5285 		break;
5286 	      }
5287 	  if (s == pattern_stmt)
5288 	    {
5289 	      if (!found && dump_enabled_p ())
5290 		dump_printf_loc (MSG_NOTE, vect_location,
5291 				 "failed to update reduction index.\n");
5292 	      break;
5293 	    }
5294 	  if (gsi_end_p (si))
5295 	    s = pattern_stmt;
5296 	  else
5297 	    {
5298 	      s = gsi_stmt (si);
5299 	      if (s == pattern_stmt)
5300 		/* Found the end inside a bigger pattern def seq.  */
5301 		si = gsi_none ();
5302 	      else
5303 		gsi_next (&si);
5304 	    }
5305 	} while (1);
5306     }
5307 }
5308 
5309 /* Function vect_pattern_recog_1
5310 
5311    Input:
5312    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5313         computation pattern.
5314    STMT_INFO: A stmt from which the pattern search should start.
5315 
5316    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5317    a sequence of statements that has the same functionality and can be
5318    used to replace STMT_INFO.  It returns the last statement in the sequence
5319    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5320    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5321    statement, having first checked that the target supports the new operation
5322    in that type.
5323 
5324    This function also does some bookkeeping, as explained in the documentation
5325    for vect_recog_pattern.  */
5326 
5327 static void
vect_pattern_recog_1(vect_recog_func * recog_func,stmt_vec_info stmt_info)5328 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
5329 {
5330   vec_info *vinfo = stmt_info->vinfo;
5331   gimple *pattern_stmt;
5332   loop_vec_info loop_vinfo;
5333   tree pattern_vectype;
5334 
5335   /* If this statement has already been replaced with pattern statements,
5336      leave the original statement alone, since the first match wins.
5337      Instead try to match against the definition statements that feed
5338      the main pattern statement.  */
5339   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5340     {
5341       gimple_stmt_iterator gsi;
5342       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5343 	   !gsi_end_p (gsi); gsi_next (&gsi))
5344 	vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
5345       return;
5346     }
5347 
5348   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5349   pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
5350   if (!pattern_stmt)
5351     {
5352       /* Clear any half-formed pattern definition sequence.  */
5353       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5354       return;
5355     }
5356 
5357   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5358   gcc_assert (pattern_vectype);
5359 
5360   /* Found a vectorizable pattern.  */
5361   if (dump_enabled_p ())
5362     dump_printf_loc (MSG_NOTE, vect_location,
5363 		     "%s pattern recognized: %G",
5364 		     recog_func->name, pattern_stmt);
5365 
5366   /* Mark the stmts that are involved in the pattern. */
5367   vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
5368 
5369   /* Patterns cannot be vectorized using SLP, because they change the order of
5370      computation.  */
5371   if (loop_vinfo)
5372     {
5373       unsigned ix, ix2;
5374       stmt_vec_info *elem_ptr;
5375       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5376 			     elem_ptr, *elem_ptr == stmt_info);
5377     }
5378 }
5379 
5380 
5381 /* Function vect_pattern_recog
5382 
5383    Input:
5384    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5385         computation idioms.
5386 
5387    Output - for each computation idiom that is detected we create a new stmt
5388         that provides the same functionality and that can be vectorized.  We
5389         also record some information in the struct_stmt_info of the relevant
5390         stmts, as explained below:
5391 
5392    At the entry to this function we have the following stmts, with the
5393    following initial value in the STMT_VINFO fields:
5394 
5395          stmt                     in_pattern_p  related_stmt    vec_stmt
5396          S1: a_i = ....                 -       -               -
5397          S2: a_2 = ..use(a_i)..         -       -               -
5398          S3: a_1 = ..use(a_2)..         -       -               -
5399          S4: a_0 = ..use(a_1)..         -       -               -
5400          S5: ... = ..use(a_0)..         -       -               -
5401 
5402    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5403    represented by a single stmt.  We then:
5404    - create a new stmt S6 equivalent to the pattern (the stmt is not
5405      inserted into the code)
5406    - fill in the STMT_VINFO fields as follows:
5407 
5408                                   in_pattern_p  related_stmt    vec_stmt
5409          S1: a_i = ....                 -       -               -
5410          S2: a_2 = ..use(a_i)..         -       -               -
5411          S3: a_1 = ..use(a_2)..         -       -               -
5412          S4: a_0 = ..use(a_1)..         true    S6              -
5413           '---> S6: a_new = ....        -       S4              -
5414          S5: ... = ..use(a_0)..         -       -               -
5415 
5416    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5417    to each other through the RELATED_STMT field).
5418 
5419    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5420    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5421    remain irrelevant unless used by stmts other than S4.
5422 
5423    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5424    (because they are marked as irrelevant).  It will vectorize S6, and record
5425    a pointer to the new vector stmt VS6 from S6 (as usual).
5426    S4 will be skipped, and S5 will be vectorized as usual:
5427 
5428                                   in_pattern_p  related_stmt    vec_stmt
5429          S1: a_i = ....                 -       -               -
5430          S2: a_2 = ..use(a_i)..         -       -               -
5431          S3: a_1 = ..use(a_2)..         -       -               -
5432        > VS6: va_new = ....             -       -               -
5433          S4: a_0 = ..use(a_1)..         true    S6              VS6
5434           '---> S6: a_new = ....        -       S4              VS6
5435        > VS5: ... = ..vuse(va_new)..    -       -               -
5436          S5: ... = ..use(a_0)..         -       -               -
5437 
5438    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5439    elsewhere), and we'll end up with:
5440 
5441         VS6: va_new = ....
5442         VS5: ... = ..vuse(va_new)..
5443 
5444    In case of more than one pattern statements, e.g., widen-mult with
5445    intermediate type:
5446 
5447      S1  a_t = ;
5448      S2  a_T = (TYPE) a_t;
5449            '--> S3: a_it = (interm_type) a_t;
5450      S4  prod_T = a_T * CONST;
5451            '--> S5: prod_T' = a_it w* CONST;
5452 
5453    there may be other users of a_T outside the pattern.  In that case S2 will
5454    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5455    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5456    be recorded in S3.  */
5457 
5458 void
vect_pattern_recog(vec_info * vinfo)5459 vect_pattern_recog (vec_info *vinfo)
5460 {
5461   class loop *loop;
5462   basic_block *bbs;
5463   unsigned int nbbs;
5464   gimple_stmt_iterator si;
5465   unsigned int i, j;
5466 
5467   vect_determine_precisions (vinfo);
5468 
5469   DUMP_VECT_SCOPE ("vect_pattern_recog");
5470 
5471   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5472     {
5473       loop = LOOP_VINFO_LOOP (loop_vinfo);
5474       bbs = LOOP_VINFO_BBS (loop_vinfo);
5475       nbbs = loop->num_nodes;
5476 
5477       /* Scan through the loop stmts, applying the pattern recognition
5478 	 functions starting at each stmt visited:  */
5479       for (i = 0; i < nbbs; i++)
5480 	{
5481 	  basic_block bb = bbs[i];
5482 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5483 	    {
5484 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5485 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
5486 	      for (j = 0; j < NUM_PATTERNS; j++)
5487 		vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5488 				      stmt_info);
5489 	    }
5490 	}
5491     }
5492   else
5493     {
5494       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5495       for (si = bb_vinfo->region_begin;
5496 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5497 	{
5498 	  gimple *stmt = gsi_stmt (si);
5499 	  stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5500 	  if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5501 	    continue;
5502 
5503 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
5504 	  for (j = 0; j < NUM_PATTERNS; j++)
5505 	    vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
5506 	}
5507     }
5508 }
5509