1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2019 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
52 
53 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55 {
56   value_range_kind vr_type = get_range_info (var, min_value, max_value);
57   wide_int nonzero = get_nonzero_bits (var);
58   signop sgn = TYPE_SIGN (TREE_TYPE (var));
59   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 					 nonzero, sgn) == VR_RANGE)
61     {
62       if (dump_enabled_p ())
63 	{
64 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 	  dump_printf (MSG_NOTE, " has range [");
66 	  dump_hex (MSG_NOTE, *min_value);
67 	  dump_printf (MSG_NOTE, ", ");
68 	  dump_hex (MSG_NOTE, *max_value);
69 	  dump_printf (MSG_NOTE, "]\n");
70 	}
71       return true;
72     }
73   else
74     {
75       if (dump_enabled_p ())
76 	{
77 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 	  dump_printf (MSG_NOTE, " has no range info\n");
79 	}
80       return false;
81     }
82 }
83 
84 /* Report that we've found an instance of pattern PATTERN in
85    statement STMT.  */
86 
87 static void
vect_pattern_detected(const char * name,gimple * stmt)88 vect_pattern_detected (const char *name, gimple *stmt)
89 {
90   if (dump_enabled_p ())
91     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
92 }
93 
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95    return the pattern statement's stmt_vec_info.  Set its vector type to
96    VECTYPE if it doesn't have one already.  */
97 
98 static stmt_vec_info
vect_init_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 			tree vectype)
101 {
102   vec_info *vinfo = orig_stmt_info->vinfo;
103   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104   if (pattern_stmt_info == NULL)
105     pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107 
108   pattern_stmt_info->pattern_stmt_p = true;
109   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113     STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114   return pattern_stmt_info;
115 }
116 
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119    have one already.  */
120 
121 static void
vect_set_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 		       tree vectype)
124 {
125   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126   STMT_VINFO_RELATED_STMT (orig_stmt_info)
127     = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
128 }
129 
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
131    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132    be different from the vector type of the final pattern statement.  */
133 
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 			tree vectype = NULL_TREE)
137 {
138   vec_info *vinfo = stmt_info->vinfo;
139   if (vectype)
140     {
141       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143     }
144   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 				      new_stmt);
146 }
147 
148 /* The caller wants to perform new operations on vect_external variable
149    VAR, so that the result of the operations would also be vect_external.
150    Return the edge on which the operations can be performed, if one exists.
151    Return null if the operations should instead be treated as part of
152    the pattern that needs them.  */
153 
154 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)155 vect_get_external_def_edge (vec_info *vinfo, tree var)
156 {
157   edge e = NULL;
158   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159     {
160       e = loop_preheader_edge (loop_vinfo->loop);
161       if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 	{
163 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 	  if (bb == NULL
165 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 	    e = NULL;
167 	}
168     }
169   return e;
170 }
171 
172 /* Return true if the target supports a vector version of CODE,
173    where CODE is known to map to a direct optab.  ITYPE specifies
174    the type of (some of) the scalar inputs and OTYPE specifies the
175    type of the scalar result.
176 
177    If CODE allows the inputs and outputs to have different type
178    (such as for WIDEN_SUM_EXPR), it is the input mode rather
179    than the output mode that determines the appropriate target pattern.
180    Operand 0 of the target pattern then specifies the mode that the output
181    must have.
182 
183    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185    is nonnull.  */
186 
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 				 tree itype, tree *vecotype_out,
190 				 tree *vecitype_out = NULL)
191 {
192   tree vecitype = get_vectype_for_scalar_type (itype);
193   if (!vecitype)
194     return false;
195 
196   tree vecotype = get_vectype_for_scalar_type (otype);
197   if (!vecotype)
198     return false;
199 
200   optab optab = optab_for_tree_code (code, vecitype, optab_default);
201   if (!optab)
202     return false;
203 
204   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205   if (icode == CODE_FOR_nothing
206       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207     return false;
208 
209   *vecotype_out = vecotype;
210   if (vecitype_out)
211     *vecitype_out = vecitype;
212   return true;
213 }
214 
215 /* Round bit precision PRECISION up to a full element.  */
216 
217 static unsigned int
vect_element_precision(unsigned int precision)218 vect_element_precision (unsigned int precision)
219 {
220   precision = 1 << ceil_log2 (precision);
221   return MAX (precision, BITS_PER_UNIT);
222 }
223 
224 /* If OP is defined by a statement that's being considered for vectorization,
225    return information about that statement, otherwise return NULL.  */
226 
227 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)228 vect_get_internal_def (vec_info *vinfo, tree op)
229 {
230   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231   if (def_stmt_info
232       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233     return def_stmt_info;
234   return NULL;
235 }
236 
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238    is a result of a type promotion, such that:
239      DEF_STMT: NAME = NOP (name0)
240    If CHECK_SIGN is TRUE, check that either both types are signed or both are
241    unsigned.  */
242 
243 static bool
type_conversion_p(tree name,stmt_vec_info stmt_vinfo,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 		   tree *orig_type, gimple **def_stmt, bool *promotion)
246 {
247   tree type = TREE_TYPE (name);
248   tree oprnd0;
249   enum vect_def_type dt;
250 
251   stmt_vec_info def_stmt_info;
252   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 			   def_stmt))
254     return false;
255 
256   if (dt != vect_internal_def
257       && dt != vect_external_def && dt != vect_constant_def)
258     return false;
259 
260   if (!*def_stmt)
261     return false;
262 
263   if (!is_gimple_assign (*def_stmt))
264     return false;
265 
266   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267     return false;
268 
269   oprnd0 = gimple_assign_rhs1 (*def_stmt);
270 
271   *orig_type = TREE_TYPE (oprnd0);
272   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274     return false;
275 
276   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277     *promotion = true;
278   else
279     *promotion = false;
280 
281   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282     return false;
283 
284   return true;
285 }
286 
287 /* Holds information about an input operand after some sign changes
288    and type promotions have been peeled away.  */
289 struct vect_unpromoted_value {
290   vect_unpromoted_value ();
291 
292   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293 
294   /* The value obtained after peeling away zero or more casts.  */
295   tree op;
296 
297   /* The type of OP.  */
298   tree type;
299 
300   /* The definition type of OP.  */
301   vect_def_type dt;
302 
303   /* If OP is the result of peeling at least one cast, and if the cast
304      of OP itself is a vectorizable statement, CASTER identifies that
305      statement, otherwise it is null.  */
306   stmt_vec_info caster;
307 };
308 
vect_unpromoted_value()309 inline vect_unpromoted_value::vect_unpromoted_value ()
310   : op (NULL_TREE),
311     type (NULL_TREE),
312     dt (vect_uninitialized_def),
313     caster (NULL)
314 {
315 }
316 
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318    statement that casts it to CASTER_IN.  */
319 
320 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 			       stmt_vec_info caster_in)
323 {
324   op = op_in;
325   type = TREE_TYPE (op);
326   dt = dt_in;
327   caster = caster_in;
328 }
329 
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331    to reach some vectorizable inner operand OP', continuing as long as it
332    is possible to convert OP' back to OP using a possible sign change
333    followed by a possible promotion P.  Return this OP', or null if OP is
334    not a vectorizable SSA name.  If there is a promotion P, describe its
335    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
336    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337    have more than one user.
338 
339    A successful return means that it is possible to go from OP' to OP
340    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
341    whereas the cast from UNPROM to OP might be a promotion, a sign
342    change, or a nop.
343 
344    E.g. say we have:
345 
346        signed short *ptr = ...;
347        signed short C = *ptr;
348        unsigned short B = (unsigned short) C;    // sign change
349        signed int A = (signed int) B;            // unsigned promotion
350        ...possible other uses of A...
351        unsigned int OP = (unsigned int) A;       // sign change
352 
353    In this case it's possible to go directly from C to OP using:
354 
355        OP = (unsigned int) (unsigned short) C;
356 	    +------------+ +--------------+
357 	       promotion      sign change
358 
359    so OP' would be C.  The input to the promotion is B, so UNPROM
360    would describe B.  */
361 
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 				      vect_unpromoted_value *unprom,
365 				      bool *single_use_p = NULL)
366 {
367   tree res = NULL_TREE;
368   tree op_type = TREE_TYPE (op);
369   unsigned int orig_precision = TYPE_PRECISION (op_type);
370   unsigned int min_precision = orig_precision;
371   stmt_vec_info caster = NULL;
372   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373     {
374       /* See whether OP is simple enough to vectorize.  */
375       stmt_vec_info def_stmt_info;
376       gimple *def_stmt;
377       vect_def_type dt;
378       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
379 	break;
380 
381       /* If OP is the input of a demotion, skip over it to see whether
382 	 OP is itself the result of a promotion.  If so, the combined
383 	 effect of the promotion and the demotion might fit the required
384 	 pattern, otherwise neither operation fits.
385 
386 	 This copes with cases such as the result of an arithmetic
387 	 operation being truncated before being stored, and where that
388 	 arithmetic operation has been recognized as an over-widened one.  */
389       if (TYPE_PRECISION (op_type) <= min_precision)
390 	{
391 	  /* Use OP as the UNPROM described above if we haven't yet
392 	     found a promotion, or if using the new input preserves the
393 	     sign of the previous promotion.  */
394 	  if (!res
395 	      || TYPE_PRECISION (unprom->type) == orig_precision
396 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
397 	    {
398 	      unprom->set_op (op, dt, caster);
399 	      min_precision = TYPE_PRECISION (op_type);
400 	    }
401 	  /* Stop if we've already seen a promotion and if this
402 	     conversion does more than change the sign.  */
403 	  else if (TYPE_PRECISION (op_type)
404 		   != TYPE_PRECISION (unprom->type))
405 	    break;
406 
407 	  /* The sequence now extends to OP.  */
408 	  res = op;
409 	}
410 
411       /* See whether OP is defined by a cast.  Record it as CASTER if
412 	 the cast is potentially vectorizable.  */
413       if (!def_stmt)
414 	break;
415       caster = def_stmt_info;
416 
417       /* Ignore pattern statements, since we don't link uses for them.  */
418       if (caster
419 	  && single_use_p
420 	  && !STMT_VINFO_RELATED_STMT (caster)
421 	  && !has_single_use (res))
422 	*single_use_p = false;
423 
424       gassign *assign = dyn_cast <gassign *> (def_stmt);
425       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 	break;
427 
428       /* Continue with the input to the cast.  */
429       op = gimple_assign_rhs1 (def_stmt);
430       op_type = TREE_TYPE (op);
431     }
432   return res;
433 }
434 
435 /* OP is an integer operand to an operation that returns TYPE, and we
436    want to treat the operation as a widening one.  So far we can treat
437    it as widening from *COMMON_TYPE.
438 
439    Return true if OP is suitable for such a widening operation,
440    either widening from *COMMON_TYPE or from some supertype of it.
441    Update *COMMON_TYPE to the supertype in the latter case.
442 
443    SHIFT_P is true if OP is a shift amount.  */
444 
445 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 			    tree *common_type)
448 {
449   /* Calculate the minimum precision required by OP, without changing
450      the sign of either operand.  */
451   unsigned int precision;
452   if (shift_p)
453     {
454       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 	return false;
456       precision = TREE_INT_CST_LOW (op);
457     }
458   else
459     {
460       precision = wi::min_precision (wi::to_widest (op),
461 				     TYPE_SIGN (*common_type));
462       if (precision * 2 > TYPE_PRECISION (type))
463 	return false;
464     }
465 
466   /* If OP requires a wider type, switch to that type.  The checks
467      above ensure that this is still narrower than the result.  */
468   precision = vect_element_precision (precision);
469   if (TYPE_PRECISION (*common_type) < precision)
470     *common_type = build_nonstandard_integer_type
471       (precision, TYPE_UNSIGNED (*common_type));
472   return true;
473 }
474 
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
477 
478 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480 {
481   if (types_compatible_p (*common_type, new_type))
482     return true;
483 
484   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
485   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487     return true;
488 
489   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
490   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492     {
493       *common_type = new_type;
494       return true;
495     }
496 
497   /* We have mismatched signs, with the signed type being
498      no wider than the unsigned type.  In this case we need
499      a wider signed type.  */
500   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 				TYPE_PRECISION (new_type));
502   precision *= 2;
503   if (precision * 2 > TYPE_PRECISION (type))
504     return false;
505 
506   *common_type = build_nonstandard_integer_type (precision, false);
507   return true;
508 }
509 
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511    in which each node either performs CODE or WIDENED_CODE, and where
512    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
513    specifies the maximum number of leaf operands.  SHIFT_P says whether
514    CODE and WIDENED_CODE are some sort of shift.
515 
516    If STMT_INFO is such a tree, return the number of leaf operands
517    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
518    to a type that (a) is narrower than the result of STMT_INFO and
519    (b) can hold all leaf operand values.
520 
521    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522    exists.  */
523 
524 static unsigned int
vect_widened_op_tree(stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 		      tree_code widened_code, bool shift_p,
527 		      unsigned int max_nops,
528 		      vect_unpromoted_value *unprom, tree *common_type)
529 {
530   /* Check for an integer operation with the right code.  */
531   vec_info *vinfo = stmt_info->vinfo;
532   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533   if (!assign)
534     return 0;
535 
536   tree_code rhs_code = gimple_assign_rhs_code (assign);
537   if (rhs_code != code && rhs_code != widened_code)
538     return 0;
539 
540   tree type = gimple_expr_type (assign);
541   if (!INTEGRAL_TYPE_P (type))
542     return 0;
543 
544   /* Assume that both operands will be leaf operands.  */
545   max_nops -= 2;
546 
547   /* Check the operands.  */
548   unsigned int next_op = 0;
549   for (unsigned int i = 0; i < 2; ++i)
550     {
551       vect_unpromoted_value *this_unprom = &unprom[next_op];
552       unsigned int nops = 1;
553       tree op = gimple_op (assign, i + 1);
554       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 	{
556 	  /* We already have a common type from earlier operands.
557 	     Update it to account for OP.  */
558 	  this_unprom->set_op (op, vect_constant_def);
559 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 	    return 0;
561 	}
562       else
563 	{
564 	  /* Only allow shifts by constants.  */
565 	  if (shift_p && i == 1)
566 	    return 0;
567 
568 	  if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 						     this_unprom))
570 	    return 0;
571 
572 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 	    {
574 	      /* The operand isn't widened.  If STMT_INFO has the code
575 		 for an unwidened operation, recursively check whether
576 		 this operand is a node of the tree.  */
577 	      if (rhs_code != code
578 		  || max_nops == 0
579 		  || this_unprom->dt != vect_internal_def)
580 		return 0;
581 
582 	      /* Give back the leaf slot allocated above now that we're
583 		 not treating this as a leaf operand.  */
584 	      max_nops += 1;
585 
586 	      /* Recursively process the definition of the operand.  */
587 	      stmt_vec_info def_stmt_info
588 		= vinfo->lookup_def (this_unprom->op);
589 	      nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 					   shift_p, max_nops, this_unprom,
591 					   common_type);
592 	      if (nops == 0)
593 		return 0;
594 
595 	      max_nops -= nops;
596 	    }
597 	  else
598 	    {
599 	      /* Make sure that the operand is narrower than the result.  */
600 	      if (TYPE_PRECISION (this_unprom->type) * 2
601 		  > TYPE_PRECISION (type))
602 		return 0;
603 
604 	      /* Update COMMON_TYPE for the new operand.  */
605 	      if (i == 0)
606 		*common_type = this_unprom->type;
607 	      else if (!vect_joust_widened_type (type, this_unprom->type,
608 						 common_type))
609 		return 0;
610 	    }
611 	}
612       next_op += nops;
613     }
614   return next_op;
615 }
616 
617 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
618    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619 
620 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
622 {
623   return make_temp_ssa_name (type, stmt, "patt");
624 }
625 
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627    followed by a version of STMT2_INFO that takes NEW_RHS as its first
628    input.  Try to do this using pattern statements, returning true on
629    success.  */
630 
631 static bool
vect_split_statement(stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 		      gimple *stmt1, tree vectype)
634 {
635   if (is_pattern_stmt_p (stmt2_info))
636     {
637       /* STMT2_INFO is part of a pattern.  Get the statement to which
638 	 the pattern is attached.  */
639       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640       vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641 
642       if (dump_enabled_p ())
643 	dump_printf_loc (MSG_NOTE, vect_location,
644 			 "Splitting pattern statement: %G", stmt2_info->stmt);
645 
646       /* Since STMT2_INFO is a pattern statement, we can change it
647 	 in-situ without worrying about changing the code for the
648 	 containing block.  */
649       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650 
651       if (dump_enabled_p ())
652 	{
653 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 			   stmt2_info->stmt);
656 	}
657 
658       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
659       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
660 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
661 	   to the end of the definition sequence.  */
662 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
663       else
664 	{
665 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
666 	     before it.  */
667 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 	}
670       return true;
671     }
672   else
673     {
674       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
675 	 two-statement pattern now.  */
676       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678       tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679       if (!lhs_vectype)
680 	return false;
681 
682       if (dump_enabled_p ())
683 	dump_printf_loc (MSG_NOTE, vect_location,
684 			 "Splitting statement: %G", stmt2_info->stmt);
685 
686       /* Add STMT1 as a singleton pattern definition sequence.  */
687       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688       vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689       gimple_seq_add_stmt_without_update (def_seq, stmt1);
690 
691       /* Build the second of the two pattern statements.  */
692       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694       vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695 
696       if (dump_enabled_p ())
697 	{
698 	  dump_printf_loc (MSG_NOTE, vect_location,
699 			   "into pattern statements: %G", stmt1);
700 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
701 	}
702 
703       return true;
704     }
705 }
706 
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708    to STMT_INFO's pattern definition statements if no better way is
709    available.  VECTYPE is the vector form of TYPE.  */
710 
711 static tree
vect_convert_input(stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)712 vect_convert_input (stmt_vec_info stmt_info, tree type,
713 		    vect_unpromoted_value *unprom, tree vectype)
714 {
715   /* Check for a no-op conversion.  */
716   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717     return unprom->op;
718 
719   /* Allow the caller to create constant vect_unpromoted_values.  */
720   if (TREE_CODE (unprom->op) == INTEGER_CST)
721     return wide_int_to_tree (type, wi::to_widest (unprom->op));
722 
723   tree input = unprom->op;
724   if (unprom->caster)
725     {
726       tree lhs = gimple_get_lhs (unprom->caster->stmt);
727       tree lhs_type = TREE_TYPE (lhs);
728 
729       /* If the result of the existing cast is the right width, use it
730 	 instead of the source of the cast.  */
731       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
732 	input = lhs;
733       /* If the precision we want is between the source and result
734 	 precisions of the existing cast, try splitting the cast into
735 	 two and tapping into a mid-way point.  */
736       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
737 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
738 	{
739 	  /* In order to preserve the semantics of the original cast,
740 	     give the mid-way point the same signedness as the input value.
741 
742 	     It would be possible to use a signed type here instead if
743 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
744 	     make the sign of the midtype sensitive to the order in
745 	     which we process the statements, since the signedness of
746 	     TYPE is the signedness required by just one of possibly
747 	     many users.  Also, unsigned promotions are usually as cheap
748 	     as or cheaper than signed ones, so it's better to keep an
749 	     unsigned promotion.  */
750 	  tree midtype = build_nonstandard_integer_type
751 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
752 	  tree vec_midtype = get_vectype_for_scalar_type (midtype);
753 	  if (vec_midtype)
754 	    {
755 	      input = vect_recog_temp_ssa_var (midtype, NULL);
756 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
757 						       unprom->op);
758 	      if (!vect_split_statement (unprom->caster, input, new_stmt,
759 					 vec_midtype))
760 		append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
761 	    }
762 	}
763 
764       /* See if we can reuse an existing result.  */
765       if (types_compatible_p (type, TREE_TYPE (input)))
766 	return input;
767     }
768 
769   /* We need a new conversion statement.  */
770   tree new_op = vect_recog_temp_ssa_var (type, NULL);
771   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
772 
773   /* If OP is an external value, see if we can insert the new statement
774      on an incoming edge.  */
775   if (input == unprom->op && unprom->dt == vect_external_def)
776     if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
777       {
778 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
779 	gcc_assert (!new_bb);
780 	return new_op;
781       }
782 
783   /* As a (common) last resort, add the statement to the pattern itself.  */
784   append_pattern_def_seq (stmt_info, new_stmt, vectype);
785   return new_op;
786 }
787 
788 /* Invoke vect_convert_input for N elements of UNPROM and store the
789    result in the corresponding elements of RESULT.  */
790 
791 static void
vect_convert_inputs(stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)792 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
793 		     tree *result, tree type, vect_unpromoted_value *unprom,
794 		     tree vectype)
795 {
796   for (unsigned int i = 0; i < n; ++i)
797     {
798       unsigned int j;
799       for (j = 0; j < i; ++j)
800 	if (unprom[j].op == unprom[i].op)
801 	  break;
802       if (j < i)
803 	result[i] = result[j];
804       else
805 	result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
806     }
807 }
808 
809 /* The caller has created a (possibly empty) sequence of pattern definition
810    statements followed by a single statement PATTERN_STMT.  Cast the result
811    of this final statement to TYPE.  If a new statement is needed, add
812    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
813    and return the new statement, otherwise return PATTERN_STMT as-is.
814    VECITYPE is the vector form of PATTERN_STMT's result type.  */
815 
816 static gimple *
vect_convert_output(stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)817 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
818 		     tree vecitype)
819 {
820   tree lhs = gimple_get_lhs (pattern_stmt);
821   if (!types_compatible_p (type, TREE_TYPE (lhs)))
822     {
823       append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
824       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
825       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
826     }
827   return pattern_stmt;
828 }
829 
830 /* Return true if STMT_VINFO describes a reduction for which reassociation
831    is allowed.  If STMT_INFO is part of a group, assume that it's part of
832    a reduction chain and optimistically assume that all statements
833    except the last allow reassociation.  */
834 
835 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)836 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
837 {
838   return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
839 	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
840 	  : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
841 }
842 
843 /* As above, but also require it to have code CODE and to be a reduction
844    in the outermost loop.  When returning true, store the operands in
845    *OP0_OUT and *OP1_OUT.  */
846 
847 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)848 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
849 				tree *op0_out, tree *op1_out)
850 {
851   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
852   if (!loop_info)
853     return false;
854 
855   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
856   if (!assign || gimple_assign_rhs_code (assign) != code)
857     return false;
858 
859   /* We don't allow changing the order of the computation in the inner-loop
860      when doing outer-loop vectorization.  */
861   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
862   if (loop && nested_in_vect_loop_p (loop, stmt_info))
863     return false;
864 
865   if (!vect_reassociating_reduction_p (stmt_info))
866     return false;
867 
868   *op0_out = gimple_assign_rhs1 (assign);
869   *op1_out = gimple_assign_rhs2 (assign);
870   return true;
871 }
872 
873 /* Function vect_recog_dot_prod_pattern
874 
875    Try to find the following pattern:
876 
877      type x_t, y_t;
878      TYPE1 prod;
879      TYPE2 sum = init;
880    loop:
881      sum_0 = phi <init, sum_1>
882      S1  x_t = ...
883      S2  y_t = ...
884      S3  x_T = (TYPE1) x_t;
885      S4  y_T = (TYPE1) y_t;
886      S5  prod = x_T * y_T;
887      [S6  prod = (TYPE2) prod;  #optional]
888      S7  sum_1 = prod + sum_0;
889 
890    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
891    same size of 'TYPE1' or bigger. This is a special case of a reduction
892    computation.
893 
894    Input:
895 
896    * STMT_VINFO: The stmt from which the pattern search begins.  In the
897    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
898    will be detected.
899 
900    Output:
901 
902    * TYPE_OUT: The type of the output  of this pattern.
903 
904    * Return value: A new stmt that will be used to replace the sequence of
905    stmts that constitute the pattern. In this case it will be:
906         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
907 
908    Note: The dot-prod idiom is a widening reduction pattern that is
909          vectorized without preserving all the intermediate results. It
910          produces only N/2 (widened) results (by summing up pairs of
911          intermediate results) rather than all N results.  Therefore, we
912          cannot allow this pattern when we want to get all the results and in
913          the correct order (as is the case when this computation is in an
914          inner-loop nested in an outer-loop that us being vectorized).  */
915 
916 static gimple *
vect_recog_dot_prod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)917 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
918 {
919   tree oprnd0, oprnd1;
920   gimple *last_stmt = stmt_vinfo->stmt;
921   vec_info *vinfo = stmt_vinfo->vinfo;
922   tree type, half_type;
923   gimple *pattern_stmt;
924   tree var;
925 
926   /* Look for the following pattern
927           DX = (TYPE1) X;
928           DY = (TYPE1) Y;
929           DPROD = DX * DY;
930           DDPROD = (TYPE2) DPROD;
931           sum_1 = DDPROD + sum_0;
932      In which
933      - DX is double the size of X
934      - DY is double the size of Y
935      - DX, DY, DPROD all have the same type
936      - sum is the same size of DPROD or bigger
937      - sum has been recognized as a reduction variable.
938 
939      This is equivalent to:
940        DPROD = X w* Y;          #widen mult
941        sum_1 = DPROD w+ sum_0;  #widen summation
942      or
943        DPROD = X w* Y;          #widen mult
944        sum_1 = DPROD + sum_0;   #summation
945    */
946 
947   /* Starting from LAST_STMT, follow the defs of its uses in search
948      of the above pattern.  */
949 
950   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
951 				       &oprnd0, &oprnd1))
952     return NULL;
953 
954   type = gimple_expr_type (last_stmt);
955 
956   vect_unpromoted_value unprom_mult;
957   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
958 
959   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
960      we know that oprnd1 is the reduction variable (defined by a loop-header
961      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
962      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
963   if (!oprnd0)
964     return NULL;
965 
966   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
967   if (!mult_vinfo)
968     return NULL;
969 
970   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
971      inside the loop (in case we are analyzing an outer-loop).  */
972   vect_unpromoted_value unprom0[2];
973   if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
974 			     false, 2, unprom0, &half_type))
975     return NULL;
976 
977   /* If there are two widening operations, make sure they agree on
978      the sign of the extension.  */
979   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
980       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
981     return NULL;
982 
983   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
984 
985   tree half_vectype;
986   if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
987 					type_out, &half_vectype))
988     return NULL;
989 
990   /* Get the inputs in the appropriate types.  */
991   tree mult_oprnd[2];
992   vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
993 		       unprom0, half_vectype);
994 
995   var = vect_recog_temp_ssa_var (type, NULL);
996   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
997 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
998 
999   return pattern_stmt;
1000 }
1001 
1002 
1003 /* Function vect_recog_sad_pattern
1004 
1005    Try to find the following Sum of Absolute Difference (SAD) pattern:
1006 
1007      type x_t, y_t;
1008      signed TYPE1 diff, abs_diff;
1009      TYPE2 sum = init;
1010    loop:
1011      sum_0 = phi <init, sum_1>
1012      S1  x_t = ...
1013      S2  y_t = ...
1014      S3  x_T = (TYPE1) x_t;
1015      S4  y_T = (TYPE1) y_t;
1016      S5  diff = x_T - y_T;
1017      S6  abs_diff = ABS_EXPR <diff>;
1018      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1019      S8  sum_1 = abs_diff + sum_0;
1020 
1021    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1022    same size of 'TYPE1' or bigger. This is a special case of a reduction
1023    computation.
1024 
1025    Input:
1026 
1027    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1028    example, when this function is called with S8, the pattern
1029    {S3,S4,S5,S6,S7,S8} will be detected.
1030 
1031    Output:
1032 
1033    * TYPE_OUT: The type of the output of this pattern.
1034 
1035    * Return value: A new stmt that will be used to replace the sequence of
1036    stmts that constitute the pattern. In this case it will be:
1037         SAD_EXPR <x_t, y_t, sum_0>
1038   */
1039 
1040 static gimple *
vect_recog_sad_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1041 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1042 {
1043   gimple *last_stmt = stmt_vinfo->stmt;
1044   vec_info *vinfo = stmt_vinfo->vinfo;
1045   tree half_type;
1046 
1047   /* Look for the following pattern
1048           DX = (TYPE1) X;
1049           DY = (TYPE1) Y;
1050           DDIFF = DX - DY;
1051           DAD = ABS_EXPR <DDIFF>;
1052           DDPROD = (TYPE2) DPROD;
1053           sum_1 = DAD + sum_0;
1054      In which
1055      - DX is at least double the size of X
1056      - DY is at least double the size of Y
1057      - DX, DY, DDIFF, DAD all have the same type
1058      - sum is the same size of DAD or bigger
1059      - sum has been recognized as a reduction variable.
1060 
1061      This is equivalent to:
1062        DDIFF = X w- Y;          #widen sub
1063        DAD = ABS_EXPR <DDIFF>;
1064        sum_1 = DAD w+ sum_0;    #widen summation
1065      or
1066        DDIFF = X w- Y;          #widen sub
1067        DAD = ABS_EXPR <DDIFF>;
1068        sum_1 = DAD + sum_0;     #summation
1069    */
1070 
1071   /* Starting from LAST_STMT, follow the defs of its uses in search
1072      of the above pattern.  */
1073 
1074   tree plus_oprnd0, plus_oprnd1;
1075   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1076 				       &plus_oprnd0, &plus_oprnd1))
1077     return NULL;
1078 
1079   tree sum_type = gimple_expr_type (last_stmt);
1080 
1081   /* Any non-truncating sequence of conversions is OK here, since
1082      with a successful match, the result of the ABS(U) is known to fit
1083      within the nonnegative range of the result type.  (It cannot be the
1084      negative of the minimum signed value due to the range of the widening
1085      MINUS_EXPR.)  */
1086   vect_unpromoted_value unprom_abs;
1087   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1088 						      &unprom_abs);
1089 
1090   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1091      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1092      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1093      Then check that plus_oprnd0 is defined by an abs_expr.  */
1094 
1095   if (!plus_oprnd0)
1096     return NULL;
1097 
1098   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1099   if (!abs_stmt_vinfo)
1100     return NULL;
1101 
1102   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1103      inside the loop (in case we are analyzing an outer-loop).  */
1104   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1105   if (!abs_stmt
1106       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1107 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1108     return NULL;
1109 
1110   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1111   tree abs_type = TREE_TYPE (abs_oprnd);
1112   if (TYPE_UNSIGNED (abs_type))
1113     return NULL;
1114 
1115   /* Peel off conversions from the ABS input.  This can involve sign
1116      changes (e.g. from an unsigned subtraction to a signed ABS input)
1117      or signed promotion, but it can't include unsigned promotion.
1118      (Note that ABS of an unsigned promotion should have been folded
1119      away before now anyway.)  */
1120   vect_unpromoted_value unprom_diff;
1121   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1122 						    &unprom_diff);
1123   if (!abs_oprnd)
1124     return NULL;
1125   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1126       && TYPE_UNSIGNED (unprom_diff.type))
1127     return NULL;
1128 
1129   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1130   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1131   if (!diff_stmt_vinfo)
1132     return NULL;
1133 
1134   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1135      inside the loop (in case we are analyzing an outer-loop).  */
1136   vect_unpromoted_value unprom[2];
1137   if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1138 			     false, 2, unprom, &half_type))
1139     return NULL;
1140 
1141   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1142 
1143   tree half_vectype;
1144   if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1145 					type_out, &half_vectype))
1146     return NULL;
1147 
1148   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1149   tree sad_oprnd[2];
1150   vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1151 		       unprom, half_vectype);
1152 
1153   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1154   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1155 					      sad_oprnd[1], plus_oprnd1);
1156 
1157   return pattern_stmt;
1158 }
1159 
1160 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1161    so that it can be treated as though it had the form:
1162 
1163       A_TYPE a;
1164       B_TYPE b;
1165       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1166       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1167     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1168     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1169     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170 
1171    Try to replace the pattern with:
1172 
1173       A_TYPE a;
1174       B_TYPE b;
1175       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1176       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1177     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1178     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1179 
1180    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181 
1182    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1183    name of the pattern being matched, for dump purposes.  */
1184 
1185 static gimple *
vect_recog_widen_op_pattern(stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1186 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1187 			     tree_code orig_code, tree_code wide_code,
1188 			     bool shift_p, const char *name)
1189 {
1190   gimple *last_stmt = last_stmt_info->stmt;
1191 
1192   vect_unpromoted_value unprom[2];
1193   tree half_type;
1194   if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 			     shift_p, 2, unprom, &half_type))
1196     return NULL;
1197 
1198   /* Pattern detected.  */
1199   vect_pattern_detected (name, last_stmt);
1200 
1201   tree type = gimple_expr_type (last_stmt);
1202   tree itype = type;
1203   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 					    TYPE_UNSIGNED (half_type));
1207 
1208   /* Check target support  */
1209   tree vectype = get_vectype_for_scalar_type (half_type);
1210   tree vecitype = get_vectype_for_scalar_type (itype);
1211   enum tree_code dummy_code;
1212   int dummy_int;
1213   auto_vec<tree> dummy_vec;
1214   if (!vectype
1215       || !vecitype
1216       || !supportable_widening_operation (wide_code, last_stmt_info,
1217 					  vecitype, vectype,
1218 					  &dummy_code, &dummy_code,
1219 					  &dummy_int, &dummy_vec))
1220     return NULL;
1221 
1222   *type_out = get_vectype_for_scalar_type (type);
1223   if (!*type_out)
1224     return NULL;
1225 
1226   tree oprnd[2];
1227   vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1228 
1229   tree var = vect_recog_temp_ssa_var (itype, NULL);
1230   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1231 					      oprnd[0], oprnd[1]);
1232 
1233   return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1234 }
1235 
1236 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1237    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1238 
1239 static gimple *
vect_recog_widen_mult_pattern(stmt_vec_info last_stmt_info,tree * type_out)1240 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1241 {
1242   return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1243 				      WIDEN_MULT_EXPR, false,
1244 				      "vect_recog_widen_mult_pattern");
1245 }
1246 
1247 /* Function vect_recog_pow_pattern
1248 
1249    Try to find the following pattern:
1250 
1251      x = POW (y, N);
1252 
1253    with POW being one of pow, powf, powi, powif and N being
1254    either 2 or 0.5.
1255 
1256    Input:
1257 
1258    * STMT_VINFO: The stmt from which the pattern search begins.
1259 
1260    Output:
1261 
1262    * TYPE_OUT: The type of the output of this pattern.
1263 
1264    * Return value: A new stmt that will be used to replace the sequence of
1265    stmts that constitute the pattern. In this case it will be:
1266         x = x * x
1267    or
1268 	x = sqrt (x)
1269 */
1270 
1271 static gimple *
vect_recog_pow_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1272 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1273 {
1274   gimple *last_stmt = stmt_vinfo->stmt;
1275   tree base, exp;
1276   gimple *stmt;
1277   tree var;
1278 
1279   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1280     return NULL;
1281 
1282   switch (gimple_call_combined_fn (last_stmt))
1283     {
1284     CASE_CFN_POW:
1285     CASE_CFN_POWI:
1286       break;
1287 
1288     default:
1289       return NULL;
1290     }
1291 
1292   base = gimple_call_arg (last_stmt, 0);
1293   exp = gimple_call_arg (last_stmt, 1);
1294   if (TREE_CODE (exp) != REAL_CST
1295       && TREE_CODE (exp) != INTEGER_CST)
1296     {
1297       if (flag_unsafe_math_optimizations
1298 	  && TREE_CODE (base) == REAL_CST
1299 	  && !gimple_call_internal_p (last_stmt))
1300 	{
1301 	  combined_fn log_cfn;
1302 	  built_in_function exp_bfn;
1303 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1304 	    {
1305 	    case BUILT_IN_POW:
1306 	      log_cfn = CFN_BUILT_IN_LOG;
1307 	      exp_bfn = BUILT_IN_EXP;
1308 	      break;
1309 	    case BUILT_IN_POWF:
1310 	      log_cfn = CFN_BUILT_IN_LOGF;
1311 	      exp_bfn = BUILT_IN_EXPF;
1312 	      break;
1313 	    case BUILT_IN_POWL:
1314 	      log_cfn = CFN_BUILT_IN_LOGL;
1315 	      exp_bfn = BUILT_IN_EXPL;
1316 	      break;
1317 	    default:
1318 	      return NULL;
1319 	    }
1320 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1321 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1322 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1323 	     does that, but if C is a power of 2, we want to use
1324 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1325 	     vectorization we don't have vectorized exp2.  */
1326 	  if (logc
1327 	      && TREE_CODE (logc) == REAL_CST
1328 	      && exp_decl
1329 	      && lookup_attribute ("omp declare simd",
1330 				   DECL_ATTRIBUTES (exp_decl)))
1331 	    {
1332 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1333 	      if (node->simd_clones == NULL)
1334 		{
1335 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1336 		      || node->definition)
1337 		    return NULL;
1338 		  expand_simd_clones (node);
1339 		  if (node->simd_clones == NULL)
1340 		    return NULL;
1341 		}
1342 	      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1343 	      if (!*type_out)
1344 		return NULL;
1345 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1347 	      append_pattern_def_seq (stmt_vinfo, g);
1348 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 	      g = gimple_build_call (exp_decl, 1, def);
1350 	      gimple_call_set_lhs (g, res);
1351 	      return g;
1352 	    }
1353 	}
1354 
1355       return NULL;
1356     }
1357 
1358   /* We now have a pow or powi builtin function call with a constant
1359      exponent.  */
1360 
1361   /* Catch squaring.  */
1362   if ((tree_fits_shwi_p (exp)
1363        && tree_to_shwi (exp) == 2)
1364       || (TREE_CODE (exp) == REAL_CST
1365           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1366     {
1367       if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1368 					    TREE_TYPE (base), type_out))
1369 	return NULL;
1370 
1371       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1372       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1373       return stmt;
1374     }
1375 
1376   /* Catch square root.  */
1377   if (TREE_CODE (exp) == REAL_CST
1378       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1379     {
1380       *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1381       if (*type_out
1382 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1383 					     OPTIMIZE_FOR_SPEED))
1384 	{
1385 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1386 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1387 	  gimple_call_set_lhs (stmt, var);
1388 	  gimple_call_set_nothrow (stmt, true);
1389 	  return stmt;
1390 	}
1391     }
1392 
1393   return NULL;
1394 }
1395 
1396 
1397 /* Function vect_recog_widen_sum_pattern
1398 
1399    Try to find the following pattern:
1400 
1401      type x_t;
1402      TYPE x_T, sum = init;
1403    loop:
1404      sum_0 = phi <init, sum_1>
1405      S1  x_t = *p;
1406      S2  x_T = (TYPE) x_t;
1407      S3  sum_1 = x_T + sum_0;
1408 
1409    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1410    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1411    a special case of a reduction computation.
1412 
1413    Input:
1414 
1415    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1416    when this function is called with S3, the pattern {S2,S3} will be detected.
1417 
1418    Output:
1419 
1420    * TYPE_OUT: The type of the output of this pattern.
1421 
1422    * Return value: A new stmt that will be used to replace the sequence of
1423    stmts that constitute the pattern. In this case it will be:
1424         WIDEN_SUM <x_t, sum_0>
1425 
1426    Note: The widening-sum idiom is a widening reduction pattern that is
1427 	 vectorized without preserving all the intermediate results. It
1428          produces only N/2 (widened) results (by summing up pairs of
1429 	 intermediate results) rather than all N results.  Therefore, we
1430 	 cannot allow this pattern when we want to get all the results and in
1431 	 the correct order (as is the case when this computation is in an
1432 	 inner-loop nested in an outer-loop that us being vectorized).  */
1433 
1434 static gimple *
vect_recog_widen_sum_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1435 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1436 {
1437   gimple *last_stmt = stmt_vinfo->stmt;
1438   tree oprnd0, oprnd1;
1439   vec_info *vinfo = stmt_vinfo->vinfo;
1440   tree type;
1441   gimple *pattern_stmt;
1442   tree var;
1443 
1444   /* Look for the following pattern
1445           DX = (TYPE) X;
1446           sum_1 = DX + sum_0;
1447      In which DX is at least double the size of X, and sum_1 has been
1448      recognized as a reduction variable.
1449    */
1450 
1451   /* Starting from LAST_STMT, follow the defs of its uses in search
1452      of the above pattern.  */
1453 
1454   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1455 				       &oprnd0, &oprnd1))
1456     return NULL;
1457 
1458   type = gimple_expr_type (last_stmt);
1459 
1460   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1461      we know that oprnd1 is the reduction variable (defined by a loop-header
1462      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1463      Left to check that oprnd0 is defined by a cast from type 'type' to type
1464      'TYPE'.  */
1465 
1466   vect_unpromoted_value unprom0;
1467   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1468       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1469     return NULL;
1470 
1471   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1472 
1473   if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1474 					type_out))
1475     return NULL;
1476 
1477   var = vect_recog_temp_ssa_var (type, NULL);
1478   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1479 
1480   return pattern_stmt;
1481 }
1482 
1483 /* Recognize cases in which an operation is performed in one type WTYPE
1484    but could be done more efficiently in a narrower type NTYPE.  For example,
1485    if we have:
1486 
1487      ATYPE a;  // narrower than NTYPE
1488      BTYPE b;  // narrower than NTYPE
1489      WTYPE aw = (WTYPE) a;
1490      WTYPE bw = (WTYPE) b;
1491      WTYPE res = aw + bw;  // only uses of aw and bw
1492 
1493    then it would be more efficient to do:
1494 
1495      NTYPE an = (NTYPE) a;
1496      NTYPE bn = (NTYPE) b;
1497      NTYPE resn = an + bn;
1498      WTYPE res = (WTYPE) resn;
1499 
1500    Other situations include things like:
1501 
1502      ATYPE a;  // NTYPE or narrower
1503      WTYPE aw = (WTYPE) a;
1504      WTYPE res = aw + b;
1505 
1506    when only "(NTYPE) res" is significant.  In that case it's more efficient
1507    to truncate "b" and do the operation on NTYPE instead:
1508 
1509      NTYPE an = (NTYPE) a;
1510      NTYPE bn = (NTYPE) b;  // truncation
1511      NTYPE resn = an + bn;
1512      WTYPE res = (WTYPE) resn;
1513 
1514    All users of "res" should then use "resn" instead, making the final
1515    statement dead (not marked as relevant).  The final statement is still
1516    needed to maintain the type correctness of the IR.
1517 
1518    vect_determine_precisions has already determined the minimum
1519    precison of the operation and the minimum precision required
1520    by users of the result.  */
1521 
1522 static gimple *
vect_recog_over_widening_pattern(stmt_vec_info last_stmt_info,tree * type_out)1523 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1524 {
1525   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1526   if (!last_stmt)
1527     return NULL;
1528 
1529   /* See whether we have found that this operation can be done on a
1530      narrower type without changing its semantics.  */
1531   unsigned int new_precision = last_stmt_info->operation_precision;
1532   if (!new_precision)
1533     return NULL;
1534 
1535   vec_info *vinfo = last_stmt_info->vinfo;
1536   tree lhs = gimple_assign_lhs (last_stmt);
1537   tree type = TREE_TYPE (lhs);
1538   tree_code code = gimple_assign_rhs_code (last_stmt);
1539 
1540   /* Keep the first operand of a COND_EXPR as-is: only the other two
1541      operands are interesting.  */
1542   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1543 
1544   /* Check the operands.  */
1545   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1546   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1547   unprom.quick_grow (nops);
1548   unsigned int min_precision = 0;
1549   bool single_use_p = false;
1550   for (unsigned int i = 0; i < nops; ++i)
1551     {
1552       tree op = gimple_op (last_stmt, first_op + i);
1553       if (TREE_CODE (op) == INTEGER_CST)
1554 	unprom[i].set_op (op, vect_constant_def);
1555       else if (TREE_CODE (op) == SSA_NAME)
1556 	{
1557 	  bool op_single_use_p = true;
1558 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1559 						     &op_single_use_p))
1560 	    return NULL;
1561 	  /* If:
1562 
1563 	     (1) N bits of the result are needed;
1564 	     (2) all inputs are widened from M<N bits; and
1565 	     (3) one operand OP is a single-use SSA name
1566 
1567 	     we can shift the M->N widening from OP to the output
1568 	     without changing the number or type of extensions involved.
1569 	     This then reduces the number of copies of STMT_INFO.
1570 
1571 	     If instead of (3) more than one operand is a single-use SSA name,
1572 	     shifting the extension to the output is even more of a win.
1573 
1574 	     If instead:
1575 
1576 	     (1) N bits of the result are needed;
1577 	     (2) one operand OP2 is widened from M2<N bits;
1578 	     (3) another operand OP1 is widened from M1<M2 bits; and
1579 	     (4) both OP1 and OP2 are single-use
1580 
1581 	     the choice is between:
1582 
1583 	     (a) truncating OP2 to M1, doing the operation on M1,
1584 		 and then widening the result to N
1585 
1586 	     (b) widening OP1 to M2, doing the operation on M2, and then
1587 		 widening the result to N
1588 
1589 	     Both shift the M2->N widening of the inputs to the output.
1590 	     (a) additionally shifts the M1->M2 widening to the output;
1591 	     it requires fewer copies of STMT_INFO but requires an extra
1592 	     M2->M1 truncation.
1593 
1594 	     Which is better will depend on the complexity and cost of
1595 	     STMT_INFO, which is hard to predict at this stage.  However,
1596 	     a clear tie-breaker in favor of (b) is the fact that the
1597 	     truncation in (a) increases the length of the operation chain.
1598 
1599 	     If instead of (4) only one of OP1 or OP2 is single-use,
1600 	     (b) is still a win over doing the operation in N bits:
1601 	     it still shifts the M2->N widening on the single-use operand
1602 	     to the output and reduces the number of STMT_INFO copies.
1603 
1604 	     If neither operand is single-use then operating on fewer than
1605 	     N bits might lead to more extensions overall.  Whether it does
1606 	     or not depends on global information about the vectorization
1607 	     region, and whether that's a good trade-off would again
1608 	     depend on the complexity and cost of the statements involved,
1609 	     as well as things like register pressure that are not normally
1610 	     modelled at this stage.  We therefore ignore these cases
1611 	     and just optimize the clear single-use wins above.
1612 
1613 	     Thus we take the maximum precision of the unpromoted operands
1614 	     and record whether any operand is single-use.  */
1615 	  if (unprom[i].dt == vect_internal_def)
1616 	    {
1617 	      min_precision = MAX (min_precision,
1618 				   TYPE_PRECISION (unprom[i].type));
1619 	      single_use_p |= op_single_use_p;
1620 	    }
1621 	}
1622     }
1623 
1624   /* Although the operation could be done in operation_precision, we have
1625      to balance that against introducing extra truncations or extensions.
1626      Calculate the minimum precision that can be handled efficiently.
1627 
1628      The loop above determined that the operation could be handled
1629      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1630      extension from the inputs to the output without introducing more
1631      instructions, and would reduce the number of instructions required
1632      for STMT_INFO itself.
1633 
1634      vect_determine_precisions has also determined that the result only
1635      needs min_output_precision bits.  Truncating by a factor of N times
1636      requires a tree of N - 1 instructions, so if TYPE is N times wider
1637      than min_output_precision, doing the operation in TYPE and truncating
1638      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1639      In contrast:
1640 
1641      - truncating the input to a unary operation and doing the operation
1642        in the new type requires at most N - 1 + 1 = N instructions per
1643        output vector
1644 
1645      - doing the same for a binary operation requires at most
1646        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1647 
1648      Both unary and binary operations require fewer instructions than
1649      this if the operands were extended from a suitable truncated form.
1650      Thus there is usually nothing to lose by doing operations in
1651      min_output_precision bits, but there can be something to gain.  */
1652   if (!single_use_p)
1653     min_precision = last_stmt_info->min_output_precision;
1654   else
1655     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1656 
1657   /* Apply the minimum efficient precision we just calculated.  */
1658   if (new_precision < min_precision)
1659     new_precision = min_precision;
1660   new_precision = vect_element_precision (new_precision);
1661   if (new_precision >= TYPE_PRECISION (type))
1662     return NULL;
1663 
1664   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1665 
1666   *type_out = get_vectype_for_scalar_type (type);
1667   if (!*type_out)
1668     return NULL;
1669 
1670   /* We've found a viable pattern.  Get the new type of the operation.  */
1671   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1672   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1673 
1674   /* If we're truncating an operation, we need to make sure that we
1675      don't introduce new undefined overflow.  The codes tested here are
1676      a subset of those accepted by vect_truncatable_operation_p.  */
1677   tree op_type = new_type;
1678   if (TYPE_OVERFLOW_UNDEFINED (new_type)
1679       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1680     op_type = build_nonstandard_integer_type (new_precision, true);
1681 
1682   /* We specifically don't check here whether the target supports the
1683      new operation, since it might be something that a later pattern
1684      wants to rewrite anyway.  If targets have a minimum element size
1685      for some optabs, we should pattern-match smaller ops to larger ops
1686      where beneficial.  */
1687   tree new_vectype = get_vectype_for_scalar_type (new_type);
1688   tree op_vectype = get_vectype_for_scalar_type (op_type);
1689   if (!new_vectype || !op_vectype)
1690     return NULL;
1691 
1692   if (dump_enabled_p ())
1693     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1694 		     type, new_type);
1695 
1696   /* Calculate the rhs operands for an operation on OP_TYPE.  */
1697   tree ops[3] = {};
1698   for (unsigned int i = 1; i < first_op; ++i)
1699     ops[i - 1] = gimple_op (last_stmt, i);
1700   vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1701 		       op_type, &unprom[0], op_vectype);
1702 
1703   /* Use the operation to produce a result of type OP_TYPE.  */
1704   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1705   gimple *pattern_stmt = gimple_build_assign (new_var, code,
1706 					      ops[0], ops[1], ops[2]);
1707   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1708 
1709   if (dump_enabled_p ())
1710     dump_printf_loc (MSG_NOTE, vect_location,
1711 		     "created pattern stmt: %G", pattern_stmt);
1712 
1713   /* Convert back to the original signedness, if OP_TYPE is different
1714      from NEW_TYPE.  */
1715   if (op_type != new_type)
1716     pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1717 					pattern_stmt, op_vectype);
1718 
1719   /* Promote the result to the original type.  */
1720   pattern_stmt = vect_convert_output (last_stmt_info, type,
1721 				      pattern_stmt, new_vectype);
1722 
1723   return pattern_stmt;
1724 }
1725 
1726 /* Recognize the patterns:
1727 
1728 	    ATYPE a;  // narrower than TYPE
1729 	    BTYPE b;  // narrower than TYPE
1730 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1731      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1732 
1733    where only the bottom half of avg is used.  Try to transform them into:
1734 
1735 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1736      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1737 
1738   followed by:
1739 
1740 	    TYPE avg = (TYPE) avg';
1741 
1742   where NTYPE is no wider than half of TYPE.  Since only the bottom half
1743   of avg is used, all or part of the cast of avg' should become redundant.  */
1744 
1745 static gimple *
vect_recog_average_pattern(stmt_vec_info last_stmt_info,tree * type_out)1746 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1747 {
1748   /* Check for a shift right by one bit.  */
1749   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1750   vec_info *vinfo = last_stmt_info->vinfo;
1751   if (!last_stmt
1752       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1753       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1754     return NULL;
1755 
1756   /* Check that the shift result is wider than the users of the
1757      result need (i.e. that narrowing would be a natural choice).  */
1758   tree lhs = gimple_assign_lhs (last_stmt);
1759   tree type = TREE_TYPE (lhs);
1760   unsigned int target_precision
1761     = vect_element_precision (last_stmt_info->min_output_precision);
1762   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1763     return NULL;
1764 
1765   /* Look through any change in sign on the shift input.  */
1766   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1767   vect_unpromoted_value unprom_plus;
1768   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1769 						     &unprom_plus);
1770   if (!rshift_rhs
1771       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1772     return NULL;
1773 
1774   /* Get the definition of the shift input.  */
1775   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1776   if (!plus_stmt_info)
1777     return NULL;
1778 
1779   /* Check whether the shift input can be seen as a tree of additions on
1780      2 or 3 widened inputs.
1781 
1782      Note that the pattern should be a win even if the result of one or
1783      more additions is reused elsewhere: if the pattern matches, we'd be
1784      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
1785   internal_fn ifn = IFN_AVG_FLOOR;
1786   vect_unpromoted_value unprom[3];
1787   tree new_type;
1788   unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1789 					    PLUS_EXPR, false, 3,
1790 					    unprom, &new_type);
1791   if (nops == 0)
1792     return NULL;
1793   if (nops == 3)
1794     {
1795       /* Check that one operand is 1.  */
1796       unsigned int i;
1797       for (i = 0; i < 3; ++i)
1798 	if (integer_onep (unprom[i].op))
1799 	  break;
1800       if (i == 3)
1801 	return NULL;
1802       /* Throw away the 1 operand and keep the other two.  */
1803       if (i < 2)
1804 	unprom[i] = unprom[2];
1805       ifn = IFN_AVG_CEIL;
1806     }
1807 
1808   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1809 
1810   /* We know that:
1811 
1812      (a) the operation can be viewed as:
1813 
1814 	   TYPE widened0 = (TYPE) UNPROM[0];
1815 	   TYPE widened1 = (TYPE) UNPROM[1];
1816 	   TYPE tmp1 = widened0 + widened1 {+ 1};
1817 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
1818 
1819      (b) the first two statements are equivalent to:
1820 
1821 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1822 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1823 
1824      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1825 	 where sensible;
1826 
1827      (d) all the operations can be performed correctly at twice the width of
1828 	 NEW_TYPE, due to the nature of the average operation; and
1829 
1830      (e) users of the result of the right shift need only TARGET_PRECISION
1831 	 bits, where TARGET_PRECISION is no more than half of TYPE's
1832 	 precision.
1833 
1834      Under these circumstances, the only situation in which NEW_TYPE
1835      could be narrower than TARGET_PRECISION is if widened0, widened1
1836      and an addition result are all used more than once.  Thus we can
1837      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1838      as "free", whereas widening the result of the average instruction
1839      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
1840      therefore better not to go narrower than TARGET_PRECISION.  */
1841   if (TYPE_PRECISION (new_type) < target_precision)
1842     new_type = build_nonstandard_integer_type (target_precision,
1843 					       TYPE_UNSIGNED (new_type));
1844 
1845   /* Check for target support.  */
1846   tree new_vectype = get_vectype_for_scalar_type (new_type);
1847   if (!new_vectype
1848       || !direct_internal_fn_supported_p (ifn, new_vectype,
1849 					  OPTIMIZE_FOR_SPEED))
1850     return NULL;
1851 
1852   /* The IR requires a valid vector type for the cast result, even though
1853      it's likely to be discarded.  */
1854   *type_out = get_vectype_for_scalar_type (type);
1855   if (!*type_out)
1856     return NULL;
1857 
1858   /* Generate the IFN_AVG* call.  */
1859   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1860   tree new_ops[2];
1861   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1862 		       unprom, new_vectype);
1863   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1864 						    new_ops[1]);
1865   gimple_call_set_lhs (average_stmt, new_var);
1866   gimple_set_location (average_stmt, gimple_location (last_stmt));
1867 
1868   if (dump_enabled_p ())
1869     dump_printf_loc (MSG_NOTE, vect_location,
1870 		     "created pattern stmt: %G", average_stmt);
1871 
1872   return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1873 }
1874 
1875 /* Recognize cases in which the input to a cast is wider than its
1876    output, and the input is fed by a widening operation.  Fold this
1877    by removing the unnecessary intermediate widening.  E.g.:
1878 
1879      unsigned char a;
1880      unsigned int b = (unsigned int) a;
1881      unsigned short c = (unsigned short) b;
1882 
1883    -->
1884 
1885      unsigned short c = (unsigned short) a;
1886 
1887    Although this is rare in input IR, it is an expected side-effect
1888    of the over-widening pattern above.
1889 
1890    This is beneficial also for integer-to-float conversions, if the
1891    widened integer has more bits than the float, and if the unwidened
1892    input doesn't.  */
1893 
1894 static gimple *
vect_recog_cast_forwprop_pattern(stmt_vec_info last_stmt_info,tree * type_out)1895 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1896 {
1897   /* Check for a cast, including an integer-to-float conversion.  */
1898   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1899   if (!last_stmt)
1900     return NULL;
1901   tree_code code = gimple_assign_rhs_code (last_stmt);
1902   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1903     return NULL;
1904 
1905   /* Make sure that the rhs is a scalar with a natural bitsize.  */
1906   tree lhs = gimple_assign_lhs (last_stmt);
1907   if (!lhs)
1908     return NULL;
1909   tree lhs_type = TREE_TYPE (lhs);
1910   scalar_mode lhs_mode;
1911   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1912       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1913     return NULL;
1914 
1915   /* Check for a narrowing operation (from a vector point of view).  */
1916   tree rhs = gimple_assign_rhs1 (last_stmt);
1917   tree rhs_type = TREE_TYPE (rhs);
1918   if (!INTEGRAL_TYPE_P (rhs_type)
1919       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1920       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1921     return NULL;
1922 
1923   /* Try to find an unpromoted input.  */
1924   vec_info *vinfo = last_stmt_info->vinfo;
1925   vect_unpromoted_value unprom;
1926   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1927       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1928     return NULL;
1929 
1930   /* If the bits above RHS_TYPE matter, make sure that they're the
1931      same when extending from UNPROM as they are when extending from RHS.  */
1932   if (!INTEGRAL_TYPE_P (lhs_type)
1933       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1934     return NULL;
1935 
1936   /* We can get the same result by casting UNPROM directly, to avoid
1937      the unnecessary widening and narrowing.  */
1938   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1939 
1940   *type_out = get_vectype_for_scalar_type (lhs_type);
1941   if (!*type_out)
1942     return NULL;
1943 
1944   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1945   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1946   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1947 
1948   return pattern_stmt;
1949 }
1950 
1951 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1952    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1953 
1954 static gimple *
vect_recog_widen_shift_pattern(stmt_vec_info last_stmt_info,tree * type_out)1955 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1956 {
1957   return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1958 				      WIDEN_LSHIFT_EXPR, true,
1959 				      "vect_recog_widen_shift_pattern");
1960 }
1961 
1962 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1963 
1964    type a_t, b_t, c_t;
1965 
1966    S0 a_t = b_t r<< c_t;
1967 
1968   Input/Output:
1969 
1970   * STMT_VINFO: The stmt from which the pattern search begins,
1971     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1972     with a sequence:
1973 
1974    S1 d_t = -c_t;
1975    S2 e_t = d_t & (B - 1);
1976    S3 f_t = b_t << c_t;
1977    S4 g_t = b_t >> e_t;
1978    S0 a_t = f_t | g_t;
1979 
1980     where B is element bitsize of type.
1981 
1982   Output:
1983 
1984   * TYPE_OUT: The type of the output of this pattern.
1985 
1986   * Return value: A new stmt that will be used to replace the rotate
1987     S0 stmt.  */
1988 
1989 static gimple *
vect_recog_rotate_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1990 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1991 {
1992   gimple *last_stmt = stmt_vinfo->stmt;
1993   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1994   gimple *pattern_stmt, *def_stmt;
1995   enum tree_code rhs_code;
1996   vec_info *vinfo = stmt_vinfo->vinfo;
1997   enum vect_def_type dt;
1998   optab optab1, optab2;
1999   edge ext_def = NULL;
2000 
2001   if (!is_gimple_assign (last_stmt))
2002     return NULL;
2003 
2004   rhs_code = gimple_assign_rhs_code (last_stmt);
2005   switch (rhs_code)
2006     {
2007     case LROTATE_EXPR:
2008     case RROTATE_EXPR:
2009       break;
2010     default:
2011       return NULL;
2012     }
2013 
2014   lhs = gimple_assign_lhs (last_stmt);
2015   oprnd0 = gimple_assign_rhs1 (last_stmt);
2016   type = TREE_TYPE (oprnd0);
2017   oprnd1 = gimple_assign_rhs2 (last_stmt);
2018   if (TREE_CODE (oprnd0) != SSA_NAME
2019       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2020       || !INTEGRAL_TYPE_P (type)
2021       || !TYPE_UNSIGNED (type))
2022     return NULL;
2023 
2024   stmt_vec_info def_stmt_info;
2025   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2026     return NULL;
2027 
2028   if (dt != vect_internal_def
2029       && dt != vect_constant_def
2030       && dt != vect_external_def)
2031     return NULL;
2032 
2033   vectype = get_vectype_for_scalar_type (type);
2034   if (vectype == NULL_TREE)
2035     return NULL;
2036 
2037   /* If vector/vector or vector/scalar rotate is supported by the target,
2038      don't do anything here.  */
2039   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2040   if (optab1
2041       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2042     return NULL;
2043 
2044   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2045     {
2046       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2047       if (optab2
2048 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2049 	return NULL;
2050     }
2051 
2052   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2053      don't do anything here either.  */
2054   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2055   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2056   if (!optab1
2057       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2058       || !optab2
2059       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2060     {
2061       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2062 	return NULL;
2063       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2064       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2065       if (!optab1
2066 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2067 	  || !optab2
2068 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2069 	return NULL;
2070     }
2071 
2072   *type_out = vectype;
2073 
2074   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2075     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2076 
2077   def = NULL_TREE;
2078   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2079   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2080     def = oprnd1;
2081   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2082     {
2083       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2084       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2085 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2086 	     == TYPE_PRECISION (type))
2087 	def = rhs1;
2088     }
2089 
2090   if (def == NULL_TREE)
2091     {
2092       def = vect_recog_temp_ssa_var (type, NULL);
2093       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2094       append_pattern_def_seq (stmt_vinfo, def_stmt);
2095     }
2096   stype = TREE_TYPE (def);
2097 
2098   if (TREE_CODE (def) == INTEGER_CST)
2099     {
2100       if (!tree_fits_uhwi_p (def)
2101 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2102 	  || integer_zerop (def))
2103 	return NULL;
2104       def2 = build_int_cst (stype,
2105 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2106     }
2107   else
2108     {
2109       tree vecstype = get_vectype_for_scalar_type (stype);
2110 
2111       if (vecstype == NULL_TREE)
2112 	return NULL;
2113       def2 = vect_recog_temp_ssa_var (stype, NULL);
2114       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2115       if (ext_def)
2116 	{
2117 	  basic_block new_bb
2118 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2119 	  gcc_assert (!new_bb);
2120 	}
2121       else
2122 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2123 
2124       def2 = vect_recog_temp_ssa_var (stype, NULL);
2125       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2126       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2127 				      gimple_assign_lhs (def_stmt), mask);
2128       if (ext_def)
2129 	{
2130 	  basic_block new_bb
2131 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2132 	  gcc_assert (!new_bb);
2133 	}
2134       else
2135 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2136     }
2137 
2138   var1 = vect_recog_temp_ssa_var (type, NULL);
2139   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2140 					? LSHIFT_EXPR : RSHIFT_EXPR,
2141 				  oprnd0, def);
2142   append_pattern_def_seq (stmt_vinfo, def_stmt);
2143 
2144   var2 = vect_recog_temp_ssa_var (type, NULL);
2145   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2146 					? RSHIFT_EXPR : LSHIFT_EXPR,
2147 				  oprnd0, def2);
2148   append_pattern_def_seq (stmt_vinfo, def_stmt);
2149 
2150   /* Pattern detected.  */
2151   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2152 
2153   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2154   var = vect_recog_temp_ssa_var (type, NULL);
2155   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2156 
2157   return pattern_stmt;
2158 }
2159 
2160 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2161    vectorized:
2162 
2163    type a_t;
2164    TYPE b_T, res_T;
2165 
2166    S1 a_t = ;
2167    S2 b_T = ;
2168    S3 res_T = b_T op a_t;
2169 
2170   where type 'TYPE' is a type with different size than 'type',
2171   and op is <<, >> or rotate.
2172 
2173   Also detect cases:
2174 
2175    type a_t;
2176    TYPE b_T, c_T, res_T;
2177 
2178    S0 c_T = ;
2179    S1 a_t = (type) c_T;
2180    S2 b_T = ;
2181    S3 res_T = b_T op a_t;
2182 
2183   Input/Output:
2184 
2185   * STMT_VINFO: The stmt from which the pattern search begins,
2186     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2187     with a shift/rotate which has same type on both operands, in the
2188     second case just b_T op c_T, in the first case with added cast
2189     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2190 
2191   Output:
2192 
2193   * TYPE_OUT: The type of the output of this pattern.
2194 
2195   * Return value: A new stmt that will be used to replace the shift/rotate
2196     S3 stmt.  */
2197 
2198 static gimple *
vect_recog_vector_vector_shift_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2199 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2200 					tree *type_out)
2201 {
2202   gimple *last_stmt = stmt_vinfo->stmt;
2203   tree oprnd0, oprnd1, lhs, var;
2204   gimple *pattern_stmt;
2205   enum tree_code rhs_code;
2206   vec_info *vinfo = stmt_vinfo->vinfo;
2207 
2208   if (!is_gimple_assign (last_stmt))
2209     return NULL;
2210 
2211   rhs_code = gimple_assign_rhs_code (last_stmt);
2212   switch (rhs_code)
2213     {
2214     case LSHIFT_EXPR:
2215     case RSHIFT_EXPR:
2216     case LROTATE_EXPR:
2217     case RROTATE_EXPR:
2218       break;
2219     default:
2220       return NULL;
2221     }
2222 
2223   lhs = gimple_assign_lhs (last_stmt);
2224   oprnd0 = gimple_assign_rhs1 (last_stmt);
2225   oprnd1 = gimple_assign_rhs2 (last_stmt);
2226   if (TREE_CODE (oprnd0) != SSA_NAME
2227       || TREE_CODE (oprnd1) != SSA_NAME
2228       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2229       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2230       || TYPE_PRECISION (TREE_TYPE (lhs))
2231 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2232     return NULL;
2233 
2234   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2235   if (!def_vinfo)
2236     return NULL;
2237 
2238   *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2239   if (*type_out == NULL_TREE)
2240     return NULL;
2241 
2242   tree def = NULL_TREE;
2243   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2244   if (def_stmt && gimple_assign_cast_p (def_stmt))
2245     {
2246       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2247       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2248 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2249 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2250 	{
2251 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2252 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2253 	    def = rhs1;
2254 	  else
2255 	    {
2256 	      tree mask
2257 		= build_low_bits_mask (TREE_TYPE (rhs1),
2258 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2259 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2260 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2261 	      tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2262 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2263 	    }
2264 	}
2265     }
2266 
2267   if (def == NULL_TREE)
2268     {
2269       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2270       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2271       append_pattern_def_seq (stmt_vinfo, def_stmt);
2272     }
2273 
2274   /* Pattern detected.  */
2275   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2276 
2277   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2278   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2279   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2280 
2281   return pattern_stmt;
2282 }
2283 
2284 /* Return true iff the target has a vector optab implementing the operation
2285    CODE on type VECTYPE.  */
2286 
2287 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2288 target_has_vecop_for_code (tree_code code, tree vectype)
2289 {
2290   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2291   return voptab
2292 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2293 }
2294 
2295 /* Verify that the target has optabs of VECTYPE to perform all the steps
2296    needed by the multiplication-by-immediate synthesis algorithm described by
2297    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2298    present.  Return true iff the target supports all the steps.  */
2299 
2300 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2301 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2302 				 tree vectype, bool synth_shift_p)
2303 {
2304   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2305     return false;
2306 
2307   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2308   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2309 
2310   if (var == negate_variant
2311       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2312     return false;
2313 
2314   /* If we must synthesize shifts with additions make sure that vector
2315      addition is available.  */
2316   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2317     return false;
2318 
2319   for (int i = 1; i < alg->ops; i++)
2320     {
2321       switch (alg->op[i])
2322 	{
2323 	case alg_shift:
2324 	  break;
2325 	case alg_add_t_m2:
2326 	case alg_add_t2_m:
2327 	case alg_add_factor:
2328 	  if (!supports_vplus)
2329 	    return false;
2330 	  break;
2331 	case alg_sub_t_m2:
2332 	case alg_sub_t2_m:
2333 	case alg_sub_factor:
2334 	  if (!supports_vminus)
2335 	    return false;
2336 	  break;
2337 	case alg_unknown:
2338 	case alg_m:
2339 	case alg_zero:
2340 	case alg_impossible:
2341 	  return false;
2342 	default:
2343 	  gcc_unreachable ();
2344 	}
2345     }
2346 
2347   return true;
2348 }
2349 
2350 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2351    putting the final result in DEST.  Append all statements but the last into
2352    VINFO.  Return the last statement.  */
2353 
2354 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2355 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2356 			   stmt_vec_info vinfo)
2357 {
2358   HOST_WIDE_INT i;
2359   tree itype = TREE_TYPE (op);
2360   tree prev_res = op;
2361   gcc_assert (amnt >= 0);
2362   for (i = 0; i < amnt; i++)
2363     {
2364       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2365 		      : dest;
2366       gimple *stmt
2367         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2368       prev_res = tmp_var;
2369       if (i < amnt - 1)
2370 	append_pattern_def_seq (vinfo, stmt);
2371       else
2372 	return stmt;
2373     }
2374   gcc_unreachable ();
2375   return NULL;
2376 }
2377 
2378 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2379    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2380    the process if necessary.  Append the resulting assignment statements
2381    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2382    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2383    left shifts using additions.  */
2384 
2385 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2386 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2387 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2388 {
2389   if (integer_zerop (op2)
2390       && (code == LSHIFT_EXPR
2391 	  || code == PLUS_EXPR))
2392     {
2393       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2394       return op1;
2395     }
2396 
2397   gimple *stmt;
2398   tree itype = TREE_TYPE (op1);
2399   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2400 
2401   if (code == LSHIFT_EXPR
2402       && synth_shift_p)
2403     {
2404       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2405 					 stmt_vinfo);
2406       append_pattern_def_seq (stmt_vinfo, stmt);
2407       return tmp_var;
2408     }
2409 
2410   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2411   append_pattern_def_seq (stmt_vinfo, stmt);
2412   return tmp_var;
2413 }
2414 
2415 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2416    and simple arithmetic operations to be vectorized.  Record the statements
2417    produced in STMT_VINFO and return the last statement in the sequence or
2418    NULL if it's not possible to synthesize such a multiplication.
2419    This function mirrors the behavior of expand_mult_const in expmed.c but
2420    works on tree-ssa form.  */
2421 
2422 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2423 vect_synth_mult_by_constant (tree op, tree val,
2424 			     stmt_vec_info stmt_vinfo)
2425 {
2426   tree itype = TREE_TYPE (op);
2427   machine_mode mode = TYPE_MODE (itype);
2428   struct algorithm alg;
2429   mult_variant variant;
2430   if (!tree_fits_shwi_p (val))
2431     return NULL;
2432 
2433   /* Multiplication synthesis by shifts, adds and subs can introduce
2434      signed overflow where the original operation didn't.  Perform the
2435      operations on an unsigned type and cast back to avoid this.
2436      In the future we may want to relax this for synthesis algorithms
2437      that we can prove do not cause unexpected overflow.  */
2438   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2439 
2440   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2441 
2442   /* Targets that don't support vector shifts but support vector additions
2443      can synthesize shifts that way.  */
2444   bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2445 
2446   HOST_WIDE_INT hwval = tree_to_shwi (val);
2447   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2448      The vectorizer's benefit analysis will decide whether it's beneficial
2449      to do this.  */
2450   bool possible = choose_mult_variant (mode, hwval, &alg,
2451 					&variant, MAX_COST);
2452   if (!possible)
2453     return NULL;
2454 
2455   tree vectype = get_vectype_for_scalar_type (multtype);
2456 
2457   if (!vectype
2458       || !target_supports_mult_synth_alg (&alg, variant,
2459 					   vectype, synth_shift_p))
2460     return NULL;
2461 
2462   tree accumulator;
2463 
2464   /* Clear out the sequence of statements so we can populate it below.  */
2465   gimple *stmt = NULL;
2466 
2467   if (cast_to_unsigned_p)
2468     {
2469       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2470       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2471       append_pattern_def_seq (stmt_vinfo, stmt);
2472       op = tmp_op;
2473     }
2474 
2475   if (alg.op[0] == alg_zero)
2476     accumulator = build_int_cst (multtype, 0);
2477   else
2478     accumulator = op;
2479 
2480   bool needs_fixup = (variant == negate_variant)
2481 		      || (variant == add_variant);
2482 
2483   for (int i = 1; i < alg.ops; i++)
2484     {
2485       tree shft_log = build_int_cst (multtype, alg.log[i]);
2486       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2487       tree tmp_var = NULL_TREE;
2488 
2489       switch (alg.op[i])
2490 	{
2491 	case alg_shift:
2492 	  if (synth_shift_p)
2493 	    stmt
2494 	      = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2495 					    stmt_vinfo);
2496 	  else
2497 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2498 					 shft_log);
2499 	  break;
2500 	case alg_add_t_m2:
2501 	  tmp_var
2502 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2503 					    stmt_vinfo, synth_shift_p);
2504 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2505 				       tmp_var);
2506 	  break;
2507 	case alg_sub_t_m2:
2508 	  tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2509 						  shft_log, stmt_vinfo,
2510 						  synth_shift_p);
2511 	  /* In some algorithms the first step involves zeroing the
2512 	     accumulator.  If subtracting from such an accumulator
2513 	     just emit the negation directly.  */
2514 	  if (integer_zerop (accumulator))
2515 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2516 	  else
2517 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2518 					tmp_var);
2519 	  break;
2520 	case alg_add_t2_m:
2521 	  tmp_var
2522 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2523 					   stmt_vinfo, synth_shift_p);
2524 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2525 	  break;
2526 	case alg_sub_t2_m:
2527 	  tmp_var
2528 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2529 					   stmt_vinfo, synth_shift_p);
2530 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2531 	  break;
2532 	case alg_add_factor:
2533 	  tmp_var
2534 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2535 					    stmt_vinfo, synth_shift_p);
2536 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2537 				       tmp_var);
2538 	  break;
2539 	case alg_sub_factor:
2540 	  tmp_var
2541 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2542 					   stmt_vinfo, synth_shift_p);
2543 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2544 				      accumulator);
2545 	  break;
2546 	default:
2547 	  gcc_unreachable ();
2548 	}
2549       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2550 	 but rather return it directly.  */
2551 
2552       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2553 	append_pattern_def_seq (stmt_vinfo, stmt);
2554       accumulator = accum_tmp;
2555     }
2556   if (variant == negate_variant)
2557     {
2558       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2559       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2560       accumulator = accum_tmp;
2561       if (cast_to_unsigned_p)
2562 	append_pattern_def_seq (stmt_vinfo, stmt);
2563     }
2564   else if (variant == add_variant)
2565     {
2566       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2567       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2568       accumulator = accum_tmp;
2569       if (cast_to_unsigned_p)
2570 	append_pattern_def_seq (stmt_vinfo, stmt);
2571     }
2572   /* Move back to a signed if needed.  */
2573   if (cast_to_unsigned_p)
2574     {
2575       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2576       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2577     }
2578 
2579   return stmt;
2580 }
2581 
2582 /* Detect multiplication by constant and convert it into a sequence of
2583    shifts and additions, subtractions, negations.  We reuse the
2584    choose_mult_variant algorithms from expmed.c
2585 
2586    Input/Output:
2587 
2588    STMT_VINFO: The stmt from which the pattern search begins,
2589    i.e. the mult stmt.
2590 
2591  Output:
2592 
2593   * TYPE_OUT: The type of the output of this pattern.
2594 
2595   * Return value: A new stmt that will be used to replace
2596     the multiplication.  */
2597 
2598 static gimple *
vect_recog_mult_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2599 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2600 {
2601   gimple *last_stmt = stmt_vinfo->stmt;
2602   tree oprnd0, oprnd1, vectype, itype;
2603   gimple *pattern_stmt;
2604 
2605   if (!is_gimple_assign (last_stmt))
2606     return NULL;
2607 
2608   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2609     return NULL;
2610 
2611   oprnd0 = gimple_assign_rhs1 (last_stmt);
2612   oprnd1 = gimple_assign_rhs2 (last_stmt);
2613   itype = TREE_TYPE (oprnd0);
2614 
2615   if (TREE_CODE (oprnd0) != SSA_NAME
2616       || TREE_CODE (oprnd1) != INTEGER_CST
2617       || !INTEGRAL_TYPE_P (itype)
2618       || !type_has_mode_precision_p (itype))
2619     return NULL;
2620 
2621   vectype = get_vectype_for_scalar_type (itype);
2622   if (vectype == NULL_TREE)
2623     return NULL;
2624 
2625   /* If the target can handle vectorized multiplication natively,
2626      don't attempt to optimize this.  */
2627   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2628   if (mul_optab != unknown_optab)
2629     {
2630       machine_mode vec_mode = TYPE_MODE (vectype);
2631       int icode = (int) optab_handler (mul_optab, vec_mode);
2632       if (icode != CODE_FOR_nothing)
2633        return NULL;
2634     }
2635 
2636   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2637   if (!pattern_stmt)
2638     return NULL;
2639 
2640   /* Pattern detected.  */
2641   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2642 
2643   *type_out = vectype;
2644 
2645   return pattern_stmt;
2646 }
2647 
2648 /* Detect a signed division by a constant that wouldn't be
2649    otherwise vectorized:
2650 
2651    type a_t, b_t;
2652 
2653    S1 a_t = b_t / N;
2654 
2655   where type 'type' is an integral type and N is a constant.
2656 
2657   Similarly handle modulo by a constant:
2658 
2659    S4 a_t = b_t % N;
2660 
2661   Input/Output:
2662 
2663   * STMT_VINFO: The stmt from which the pattern search begins,
2664     i.e. the division stmt.  S1 is replaced by if N is a power
2665     of two constant and type is signed:
2666   S3  y_t = b_t < 0 ? N - 1 : 0;
2667   S2  x_t = b_t + y_t;
2668   S1' a_t = x_t >> log2 (N);
2669 
2670     S4 is replaced if N is a power of two constant and
2671     type is signed by (where *_T temporaries have unsigned type):
2672   S9  y_T = b_t < 0 ? -1U : 0U;
2673   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2674   S7  z_t = (type) z_T;
2675   S6  w_t = b_t + z_t;
2676   S5  x_t = w_t & (N - 1);
2677   S4' a_t = x_t - z_t;
2678 
2679   Output:
2680 
2681   * TYPE_OUT: The type of the output of this pattern.
2682 
2683   * Return value: A new stmt that will be used to replace the division
2684     S1 or modulo S4 stmt.  */
2685 
2686 static gimple *
vect_recog_divmod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2687 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2688 {
2689   gimple *last_stmt = stmt_vinfo->stmt;
2690   tree oprnd0, oprnd1, vectype, itype, cond;
2691   gimple *pattern_stmt, *def_stmt;
2692   enum tree_code rhs_code;
2693   optab optab;
2694   tree q;
2695   int dummy_int, prec;
2696 
2697   if (!is_gimple_assign (last_stmt))
2698     return NULL;
2699 
2700   rhs_code = gimple_assign_rhs_code (last_stmt);
2701   switch (rhs_code)
2702     {
2703     case TRUNC_DIV_EXPR:
2704     case EXACT_DIV_EXPR:
2705     case TRUNC_MOD_EXPR:
2706       break;
2707     default:
2708       return NULL;
2709     }
2710 
2711   oprnd0 = gimple_assign_rhs1 (last_stmt);
2712   oprnd1 = gimple_assign_rhs2 (last_stmt);
2713   itype = TREE_TYPE (oprnd0);
2714   if (TREE_CODE (oprnd0) != SSA_NAME
2715       || TREE_CODE (oprnd1) != INTEGER_CST
2716       || TREE_CODE (itype) != INTEGER_TYPE
2717       || !type_has_mode_precision_p (itype))
2718     return NULL;
2719 
2720   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2721   vectype = get_vectype_for_scalar_type (itype);
2722   if (vectype == NULL_TREE)
2723     return NULL;
2724 
2725   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2726     {
2727       /* If the target can handle vectorized division or modulo natively,
2728 	 don't attempt to optimize this, since native division is likely
2729 	 to give smaller code.  */
2730       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2731       if (optab != unknown_optab)
2732 	{
2733 	  machine_mode vec_mode = TYPE_MODE (vectype);
2734 	  int icode = (int) optab_handler (optab, vec_mode);
2735 	  if (icode != CODE_FOR_nothing)
2736 	    return NULL;
2737 	}
2738     }
2739 
2740   prec = TYPE_PRECISION (itype);
2741   if (integer_pow2p (oprnd1))
2742     {
2743       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2744 	return NULL;
2745 
2746       /* Pattern detected.  */
2747       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2748 
2749       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2750 		     build_int_cst (itype, 0));
2751       if (rhs_code == TRUNC_DIV_EXPR
2752 	  || rhs_code == EXACT_DIV_EXPR)
2753 	{
2754 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
2755 	  tree shift;
2756 	  def_stmt
2757 	    = gimple_build_assign (var, COND_EXPR, cond,
2758 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2759 						build_int_cst (itype, 1)),
2760 				   build_int_cst (itype, 0));
2761 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 	  var = vect_recog_temp_ssa_var (itype, NULL);
2763 	  def_stmt
2764 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2765 				   gimple_assign_lhs (def_stmt));
2766 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 
2768 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
2769 	  pattern_stmt
2770 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2771 				   RSHIFT_EXPR, var, shift);
2772 	}
2773       else
2774 	{
2775 	  tree signmask;
2776 	  if (compare_tree_int (oprnd1, 2) == 0)
2777 	    {
2778 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2779 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2780 					      build_int_cst (itype, 1),
2781 					      build_int_cst (itype, 0));
2782 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2783 	    }
2784 	  else
2785 	    {
2786 	      tree utype
2787 		= build_nonstandard_integer_type (prec, 1);
2788 	      tree vecutype = get_vectype_for_scalar_type (utype);
2789 	      tree shift
2790 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2791 					- tree_log2 (oprnd1));
2792 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2793 
2794 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2795 					      build_int_cst (utype, -1),
2796 					      build_int_cst (utype, 0));
2797 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2798 	      var = vect_recog_temp_ssa_var (utype, NULL);
2799 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2800 					      gimple_assign_lhs (def_stmt),
2801 					      shift);
2802 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2803 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2804 	      def_stmt
2805 		= gimple_build_assign (signmask, NOP_EXPR, var);
2806 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2807 	    }
2808 	  def_stmt
2809 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2810 				   PLUS_EXPR, oprnd0, signmask);
2811 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2812 	  def_stmt
2813 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2814 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2815 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2816 						build_int_cst (itype, 1)));
2817 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2818 
2819 	  pattern_stmt
2820 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2821 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
2822 				   signmask);
2823 	}
2824 
2825       *type_out = vectype;
2826       return pattern_stmt;
2827     }
2828 
2829   if (prec > HOST_BITS_PER_WIDE_INT
2830       || integer_zerop (oprnd1))
2831     return NULL;
2832 
2833   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2834     return NULL;
2835 
2836   if (TYPE_UNSIGNED (itype))
2837     {
2838       unsigned HOST_WIDE_INT mh, ml;
2839       int pre_shift, post_shift;
2840       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2841 				  & GET_MODE_MASK (itype_mode));
2842       tree t1, t2, t3, t4;
2843 
2844       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2845 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2846 	return NULL;
2847 
2848       /* Find a suitable multiplier and right shift count
2849 	 instead of multiplying with D.  */
2850       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2851 
2852       /* If the suggested multiplier is more than SIZE bits, we can do better
2853 	 for even divisors, using an initial right shift.  */
2854       if (mh != 0 && (d & 1) == 0)
2855 	{
2856 	  pre_shift = ctz_or_zero (d);
2857 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2858 				  &ml, &post_shift, &dummy_int);
2859 	  gcc_assert (!mh);
2860 	}
2861       else
2862 	pre_shift = 0;
2863 
2864       if (mh != 0)
2865 	{
2866 	  if (post_shift - 1 >= prec)
2867 	    return NULL;
2868 
2869 	  /* t1 = oprnd0 h* ml;
2870 	     t2 = oprnd0 - t1;
2871 	     t3 = t2 >> 1;
2872 	     t4 = t1 + t3;
2873 	     q = t4 >> (post_shift - 1);  */
2874 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2875 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2876 					  build_int_cst (itype, ml));
2877 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2878 
2879 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2880 	  def_stmt
2881 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2882 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2883 
2884 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2885 	  def_stmt
2886 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2887 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2888 
2889 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2890 	  def_stmt
2891 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2892 
2893 	  if (post_shift != 1)
2894 	    {
2895 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2896 
2897 	      q = vect_recog_temp_ssa_var (itype, NULL);
2898 	      pattern_stmt
2899 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
2900 				       build_int_cst (itype, post_shift - 1));
2901 	    }
2902 	  else
2903 	    {
2904 	      q = t4;
2905 	      pattern_stmt = def_stmt;
2906 	    }
2907 	}
2908       else
2909 	{
2910 	  if (pre_shift >= prec || post_shift >= prec)
2911 	    return NULL;
2912 
2913 	  /* t1 = oprnd0 >> pre_shift;
2914 	     t2 = t1 h* ml;
2915 	     q = t2 >> post_shift;  */
2916 	  if (pre_shift)
2917 	    {
2918 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2919 	      def_stmt
2920 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2921 				       build_int_cst (NULL, pre_shift));
2922 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2923 	    }
2924 	  else
2925 	    t1 = oprnd0;
2926 
2927 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2928 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2929 					  build_int_cst (itype, ml));
2930 
2931 	  if (post_shift)
2932 	    {
2933 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2934 
2935 	      q = vect_recog_temp_ssa_var (itype, NULL);
2936 	      def_stmt
2937 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
2938 				       build_int_cst (itype, post_shift));
2939 	    }
2940 	  else
2941 	    q = t2;
2942 
2943 	  pattern_stmt = def_stmt;
2944 	}
2945     }
2946   else
2947     {
2948       unsigned HOST_WIDE_INT ml;
2949       int post_shift;
2950       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2951       unsigned HOST_WIDE_INT abs_d;
2952       bool add = false;
2953       tree t1, t2, t3, t4;
2954 
2955       /* Give up for -1.  */
2956       if (d == -1)
2957 	return NULL;
2958 
2959       /* Since d might be INT_MIN, we have to cast to
2960 	 unsigned HOST_WIDE_INT before negating to avoid
2961 	 undefined signed overflow.  */
2962       abs_d = (d >= 0
2963 	       ? (unsigned HOST_WIDE_INT) d
2964 	       : - (unsigned HOST_WIDE_INT) d);
2965 
2966       /* n rem d = n rem -d */
2967       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2968 	{
2969 	  d = abs_d;
2970 	  oprnd1 = build_int_cst (itype, abs_d);
2971 	}
2972       if (HOST_BITS_PER_WIDE_INT >= prec
2973 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2974 	/* This case is not handled correctly below.  */
2975 	return NULL;
2976 
2977       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2978       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2979 	{
2980 	  add = true;
2981 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
2982 	}
2983       if (post_shift >= prec)
2984 	return NULL;
2985 
2986       /* t1 = oprnd0 h* ml;  */
2987       t1 = vect_recog_temp_ssa_var (itype, NULL);
2988       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2989 				      build_int_cst (itype, ml));
2990 
2991       if (add)
2992 	{
2993 	  /* t2 = t1 + oprnd0;  */
2994 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2995 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2996 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2997 	}
2998       else
2999 	t2 = t1;
3000 
3001       if (post_shift)
3002 	{
3003 	  /* t3 = t2 >> post_shift;  */
3004 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3005 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3006 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3007 					  build_int_cst (itype, post_shift));
3008 	}
3009       else
3010 	t3 = t2;
3011 
3012       wide_int oprnd0_min, oprnd0_max;
3013       int msb = 1;
3014       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3015 	{
3016 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3017 	    msb = 0;
3018 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3019 	    msb = -1;
3020 	}
3021 
3022       if (msb == 0 && d >= 0)
3023 	{
3024 	  /* q = t3;  */
3025 	  q = t3;
3026 	  pattern_stmt = def_stmt;
3027 	}
3028       else
3029 	{
3030 	  /* t4 = oprnd0 >> (prec - 1);
3031 	     or if we know from VRP that oprnd0 >= 0
3032 	     t4 = 0;
3033 	     or if we know from VRP that oprnd0 < 0
3034 	     t4 = -1;  */
3035 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3036 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3037 	  if (msb != 1)
3038 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3039 					    build_int_cst (itype, msb));
3040 	  else
3041 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3042 					    build_int_cst (itype, prec - 1));
3043 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3044 
3045 	  /* q = t3 - t4;  or q = t4 - t3;  */
3046 	  q = vect_recog_temp_ssa_var (itype, NULL);
3047 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3048 					      d < 0 ? t3 : t4);
3049 	}
3050     }
3051 
3052   if (rhs_code == TRUNC_MOD_EXPR)
3053     {
3054       tree r, t1;
3055 
3056       /* We divided.  Now finish by:
3057 	 t1 = q * oprnd1;
3058 	 r = oprnd0 - t1;  */
3059       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3060 
3061       t1 = vect_recog_temp_ssa_var (itype, NULL);
3062       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3063       append_pattern_def_seq (stmt_vinfo, def_stmt);
3064 
3065       r = vect_recog_temp_ssa_var (itype, NULL);
3066       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3067     }
3068 
3069   /* Pattern detected.  */
3070   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3071 
3072   *type_out = vectype;
3073   return pattern_stmt;
3074 }
3075 
3076 /* Function vect_recog_mixed_size_cond_pattern
3077 
3078    Try to find the following pattern:
3079 
3080      type x_t, y_t;
3081      TYPE a_T, b_T, c_T;
3082    loop:
3083      S1  a_T = x_t CMP y_t ? b_T : c_T;
3084 
3085    where type 'TYPE' is an integral type which has different size
3086    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3087    than 'type', the constants need to fit into an integer type
3088    with the same width as 'type') or results of conversion from 'type'.
3089 
3090    Input:
3091 
3092    * STMT_VINFO: The stmt from which the pattern search begins.
3093 
3094    Output:
3095 
3096    * TYPE_OUT: The type of the output of this pattern.
3097 
3098    * Return value: A new stmt that will be used to replace the pattern.
3099 	Additionally a def_stmt is added.
3100 
3101 	a_it = x_t CMP y_t ? b_it : c_it;
3102 	a_T = (TYPE) a_it;  */
3103 
3104 static gimple *
vect_recog_mixed_size_cond_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3105 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3106 {
3107   gimple *last_stmt = stmt_vinfo->stmt;
3108   tree cond_expr, then_clause, else_clause;
3109   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3110   gimple *pattern_stmt, *def_stmt;
3111   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3112   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3113   bool promotion;
3114   tree comp_scalar_type;
3115 
3116   if (!is_gimple_assign (last_stmt)
3117       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3118       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3119     return NULL;
3120 
3121   cond_expr = gimple_assign_rhs1 (last_stmt);
3122   then_clause = gimple_assign_rhs2 (last_stmt);
3123   else_clause = gimple_assign_rhs3 (last_stmt);
3124 
3125   if (!COMPARISON_CLASS_P (cond_expr))
3126     return NULL;
3127 
3128   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3129   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3130   if (comp_vectype == NULL_TREE)
3131     return NULL;
3132 
3133   type = gimple_expr_type (last_stmt);
3134   if (types_compatible_p (type, comp_scalar_type)
3135       || ((TREE_CODE (then_clause) != INTEGER_CST
3136 	   || TREE_CODE (else_clause) != INTEGER_CST)
3137 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3138       || !INTEGRAL_TYPE_P (type))
3139     return NULL;
3140 
3141   if ((TREE_CODE (then_clause) != INTEGER_CST
3142        && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3143 			      &def_stmt0, &promotion))
3144       || (TREE_CODE (else_clause) != INTEGER_CST
3145 	  && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3146 				 &def_stmt1, &promotion)))
3147     return NULL;
3148 
3149   if (orig_type0 && orig_type1
3150       && !types_compatible_p (orig_type0, orig_type1))
3151     return NULL;
3152 
3153   if (orig_type0)
3154     {
3155       if (!types_compatible_p (orig_type0, comp_scalar_type))
3156 	return NULL;
3157       then_clause = gimple_assign_rhs1 (def_stmt0);
3158       itype = orig_type0;
3159     }
3160 
3161   if (orig_type1)
3162     {
3163       if (!types_compatible_p (orig_type1, comp_scalar_type))
3164 	return NULL;
3165       else_clause = gimple_assign_rhs1 (def_stmt1);
3166       itype = orig_type1;
3167     }
3168 
3169 
3170   HOST_WIDE_INT cmp_mode_size
3171     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3172 
3173   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3174   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3175     return NULL;
3176 
3177   vectype = get_vectype_for_scalar_type (type);
3178   if (vectype == NULL_TREE)
3179     return NULL;
3180 
3181   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3182     return NULL;
3183 
3184   if (itype == NULL_TREE)
3185     itype = build_nonstandard_integer_type (cmp_mode_size,
3186   					    TYPE_UNSIGNED (type));
3187 
3188   if (itype == NULL_TREE
3189       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3190     return NULL;
3191 
3192   vecitype = get_vectype_for_scalar_type (itype);
3193   if (vecitype == NULL_TREE)
3194     return NULL;
3195 
3196   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3197     return NULL;
3198 
3199   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3200     {
3201       if ((TREE_CODE (then_clause) == INTEGER_CST
3202 	   && !int_fits_type_p (then_clause, itype))
3203 	  || (TREE_CODE (else_clause) == INTEGER_CST
3204 	      && !int_fits_type_p (else_clause, itype)))
3205 	return NULL;
3206     }
3207 
3208   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3209 				  COND_EXPR, unshare_expr (cond_expr),
3210 				  fold_convert (itype, then_clause),
3211 				  fold_convert (itype, else_clause));
3212   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3213 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3214 
3215   append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3216   *type_out = vectype;
3217 
3218   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3219 
3220   return pattern_stmt;
3221 }
3222 
3223 
3224 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3225    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3226    in case it's a result of a comparison which can be directly vectorized into
3227    a vector comparison.  Fills in STMTS with all stmts visited during the
3228    walk.  */
3229 
3230 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3231 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3232 {
3233   tree rhs1;
3234   enum tree_code rhs_code;
3235 
3236   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3237   if (!def_stmt_info)
3238     return false;
3239 
3240   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3241   if (!def_stmt)
3242     return false;
3243 
3244   if (stmts.contains (def_stmt))
3245     return true;
3246 
3247   rhs1 = gimple_assign_rhs1 (def_stmt);
3248   rhs_code = gimple_assign_rhs_code (def_stmt);
3249   switch (rhs_code)
3250     {
3251     case SSA_NAME:
3252       if (! check_bool_pattern (rhs1, vinfo, stmts))
3253 	return false;
3254       break;
3255 
3256     CASE_CONVERT:
3257       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3258 	return false;
3259       if (! check_bool_pattern (rhs1, vinfo, stmts))
3260 	return false;
3261       break;
3262 
3263     case BIT_NOT_EXPR:
3264       if (! check_bool_pattern (rhs1, vinfo, stmts))
3265 	return false;
3266       break;
3267 
3268     case BIT_AND_EXPR:
3269     case BIT_IOR_EXPR:
3270     case BIT_XOR_EXPR:
3271       if (! check_bool_pattern (rhs1, vinfo, stmts)
3272 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3273 	return false;
3274       break;
3275 
3276     default:
3277       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3278 	{
3279 	  tree vecitype, comp_vectype;
3280 
3281 	  /* If the comparison can throw, then is_gimple_condexpr will be
3282 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3283 	  if (stmt_could_throw_p (cfun, def_stmt))
3284 	    return false;
3285 
3286 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3287 	  if (comp_vectype == NULL_TREE)
3288 	    return false;
3289 
3290 	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3291 	  if (mask_type
3292 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3293 	    return false;
3294 
3295 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3296 	    {
3297 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3298 	      tree itype
3299 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3300 	      vecitype = get_vectype_for_scalar_type (itype);
3301 	      if (vecitype == NULL_TREE)
3302 		return false;
3303 	    }
3304 	  else
3305 	    vecitype = comp_vectype;
3306 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3307 	    return false;
3308 	}
3309       else
3310 	return false;
3311       break;
3312     }
3313 
3314   bool res = stmts.add (def_stmt);
3315   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3316   gcc_assert (!res);
3317 
3318   return true;
3319 }
3320 
3321 
3322 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3323    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3324    pattern sequence.  */
3325 
3326 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3327 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3328 {
3329   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3330 					   NOP_EXPR, var);
3331   append_pattern_def_seq (stmt_info, cast_stmt,
3332 			  get_vectype_for_scalar_type (type));
3333   return gimple_assign_lhs (cast_stmt);
3334 }
3335 
3336 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3337    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3338    type, OUT_TYPE is the desired final integer type of the whole pattern.
3339    STMT_INFO is the info of the pattern root and is where pattern stmts should
3340    be associated with.  DEFS is a map of pattern defs.  */
3341 
3342 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3343 adjust_bool_pattern (tree var, tree out_type,
3344 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3345 {
3346   gimple *stmt = SSA_NAME_DEF_STMT (var);
3347   enum tree_code rhs_code, def_rhs_code;
3348   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3349   location_t loc;
3350   gimple *pattern_stmt, *def_stmt;
3351   tree trueval = NULL_TREE;
3352 
3353   rhs1 = gimple_assign_rhs1 (stmt);
3354   rhs2 = gimple_assign_rhs2 (stmt);
3355   rhs_code = gimple_assign_rhs_code (stmt);
3356   loc = gimple_location (stmt);
3357   switch (rhs_code)
3358     {
3359     case SSA_NAME:
3360     CASE_CONVERT:
3361       irhs1 = *defs.get (rhs1);
3362       itype = TREE_TYPE (irhs1);
3363       pattern_stmt
3364 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3365 			       SSA_NAME, irhs1);
3366       break;
3367 
3368     case BIT_NOT_EXPR:
3369       irhs1 = *defs.get (rhs1);
3370       itype = TREE_TYPE (irhs1);
3371       pattern_stmt
3372 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3373 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3374       break;
3375 
3376     case BIT_AND_EXPR:
3377       /* Try to optimize x = y & (a < b ? 1 : 0); into
3378 	 x = (a < b ? y : 0);
3379 
3380 	 E.g. for:
3381 	   bool a_b, b_b, c_b;
3382 	   TYPE d_T;
3383 
3384 	   S1  a_b = x1 CMP1 y1;
3385 	   S2  b_b = x2 CMP2 y2;
3386 	   S3  c_b = a_b & b_b;
3387 	   S4  d_T = (TYPE) c_b;
3388 
3389 	 we would normally emit:
3390 
3391 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3392 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3393 	   S3'  c_T = a_T & b_T;
3394 	   S4'  d_T = c_T;
3395 
3396 	 but we can save one stmt by using the
3397 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3398 	 BIT_AND_EXPR stmt out:
3399 
3400 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3401 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3402 	   S4'  f_T = c_T;
3403 
3404 	 At least when VEC_COND_EXPR is implemented using masks
3405 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3406 	 computes the comparison masks and ands it, in one case with
3407 	 all ones vector, in the other case with a vector register.
3408 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3409 	 often more expensive.  */
3410       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3411       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3413 	{
3414 	  irhs1 = *defs.get (rhs1);
3415 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3417 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3418 	    {
3419 	      rhs_code = def_rhs_code;
3420 	      rhs1 = def_rhs1;
3421 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3422 	      trueval = irhs1;
3423 	      goto do_compare;
3424 	    }
3425 	  else
3426 	    irhs2 = *defs.get (rhs2);
3427 	  goto and_ior_xor;
3428 	}
3429       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3430       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3431       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3432 	{
3433 	  irhs2 = *defs.get (rhs2);
3434 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3435 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3436 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3437 	    {
3438 	      rhs_code = def_rhs_code;
3439 	      rhs1 = def_rhs1;
3440 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3441 	      trueval = irhs2;
3442 	      goto do_compare;
3443 	    }
3444 	  else
3445 	    irhs1 = *defs.get (rhs1);
3446 	  goto and_ior_xor;
3447 	}
3448       /* FALLTHRU */
3449     case BIT_IOR_EXPR:
3450     case BIT_XOR_EXPR:
3451       irhs1 = *defs.get (rhs1);
3452       irhs2 = *defs.get (rhs2);
3453     and_ior_xor:
3454       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3455 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3456 	{
3457 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3458 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3459 	  int out_prec = TYPE_PRECISION (out_type);
3460 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3461 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3462 					      stmt_info);
3463 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3464 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3465 					      stmt_info);
3466 	  else
3467 	    {
3468 	      irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3469 	      irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3470 	    }
3471 	}
3472       itype = TREE_TYPE (irhs1);
3473       pattern_stmt
3474 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3475 			       rhs_code, irhs1, irhs2);
3476       break;
3477 
3478     default:
3479     do_compare:
3480       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3481       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3482 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3483 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3484 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3485 	{
3486 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3487 	  itype
3488 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3489 	}
3490       else
3491 	itype = TREE_TYPE (rhs1);
3492       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3493       if (trueval == NULL_TREE)
3494 	trueval = build_int_cst (itype, 1);
3495       else
3496 	gcc_checking_assert (useless_type_conversion_p (itype,
3497 							TREE_TYPE (trueval)));
3498       pattern_stmt
3499 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3500 			       COND_EXPR, cond_expr, trueval,
3501 			       build_int_cst (itype, 0));
3502       break;
3503     }
3504 
3505   gimple_set_location (pattern_stmt, loc);
3506   append_pattern_def_seq (stmt_info, pattern_stmt,
3507 			  get_vectype_for_scalar_type (itype));
3508   defs.put (var, gimple_assign_lhs (pattern_stmt));
3509 }
3510 
3511 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3512 
3513 static int
sort_after_uid(const void * p1,const void * p2)3514 sort_after_uid (const void *p1, const void *p2)
3515 {
3516   const gimple *stmt1 = *(const gimple * const *)p1;
3517   const gimple *stmt2 = *(const gimple * const *)p2;
3518   return gimple_uid (stmt1) - gimple_uid (stmt2);
3519 }
3520 
3521 /* Create pattern stmts for all stmts participating in the bool pattern
3522    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3523    OUT_TYPE.  Return the def of the pattern root.  */
3524 
3525 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3526 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3527 		   tree out_type, stmt_vec_info stmt_info)
3528 {
3529   /* Gather original stmts in the bool pattern in their order of appearance
3530      in the IL.  */
3531   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3532   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3533        i != bool_stmt_set.end (); ++i)
3534     bool_stmts.quick_push (*i);
3535   bool_stmts.qsort (sort_after_uid);
3536 
3537   /* Now process them in that order, producing pattern stmts.  */
3538   hash_map <tree, tree> defs;
3539   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3540     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3541 			 out_type, stmt_info, defs);
3542 
3543   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3544   gimple *pattern_stmt
3545     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3546   return gimple_assign_lhs (pattern_stmt);
3547 }
3548 
3549 /* Helper for search_type_for_mask.  */
3550 
3551 static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3552 search_type_for_mask_1 (tree var, vec_info *vinfo,
3553 			hash_map<gimple *, tree> &cache)
3554 {
3555   tree rhs1;
3556   enum tree_code rhs_code;
3557   tree res = NULL_TREE, res2;
3558 
3559   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3560     return NULL_TREE;
3561 
3562   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3563   if (!def_stmt_info)
3564     return NULL_TREE;
3565 
3566   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3567   if (!def_stmt)
3568     return NULL_TREE;
3569 
3570   tree *c = cache.get (def_stmt);
3571   if (c)
3572     return *c;
3573 
3574   rhs_code = gimple_assign_rhs_code (def_stmt);
3575   rhs1 = gimple_assign_rhs1 (def_stmt);
3576 
3577   switch (rhs_code)
3578     {
3579     case SSA_NAME:
3580     case BIT_NOT_EXPR:
3581     CASE_CONVERT:
3582       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583       break;
3584 
3585     case BIT_AND_EXPR:
3586     case BIT_IOR_EXPR:
3587     case BIT_XOR_EXPR:
3588       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589       res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590 				     cache);
3591       if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592 	res = res2;
3593       break;
3594 
3595     default:
3596       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597 	{
3598 	  tree comp_vectype, mask_type;
3599 
3600 	  if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601 	    {
3602 	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603 	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604 					     vinfo, cache);
3605 	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606 		res = res2;
3607 	      break;
3608 	    }
3609 
3610 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611 	  if (comp_vectype == NULL_TREE)
3612 	    {
3613 	      res = NULL_TREE;
3614 	      break;
3615 	    }
3616 
3617 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618 	  if (!mask_type
3619 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620 	    {
3621 	      res = NULL_TREE;
3622 	      break;
3623 	    }
3624 
3625 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627 	    {
3628 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629 	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630 	    }
3631 	  else
3632 	    res = TREE_TYPE (rhs1);
3633 	}
3634     }
3635 
3636   cache.put (def_stmt, res);
3637   return res;
3638 }
3639 
3640 /* Return the proper type for converting bool VAR into
3641    an integer value or NULL_TREE if no such type exists.
3642    The type is chosen so that converted value has the
3643    same number of elements as VAR's vector type.  */
3644 
3645 static tree
search_type_for_mask(tree var,vec_info * vinfo)3646 search_type_for_mask (tree var, vec_info *vinfo)
3647 {
3648   hash_map<gimple *, tree> cache;
3649   return search_type_for_mask_1 (var, vinfo, cache);
3650 }
3651 
3652 /* Function vect_recog_bool_pattern
3653 
3654    Try to find pattern like following:
3655 
3656      bool a_b, b_b, c_b, d_b, e_b;
3657      TYPE f_T;
3658    loop:
3659      S1  a_b = x1 CMP1 y1;
3660      S2  b_b = x2 CMP2 y2;
3661      S3  c_b = a_b & b_b;
3662      S4  d_b = x3 CMP3 y3;
3663      S5  e_b = c_b | d_b;
3664      S6  f_T = (TYPE) e_b;
3665 
3666    where type 'TYPE' is an integral type.  Or a similar pattern
3667    ending in
3668 
3669      S6  f_Y = e_b ? r_Y : s_Y;
3670 
3671    as results from if-conversion of a complex condition.
3672 
3673    Input:
3674 
3675    * STMT_VINFO: The stmt at the end from which the pattern
3676 		 search begins, i.e. cast of a bool to
3677 		 an integer type.
3678 
3679    Output:
3680 
3681    * TYPE_OUT: The type of the output of this pattern.
3682 
3683    * Return value: A new stmt that will be used to replace the pattern.
3684 
3685 	Assuming size of TYPE is the same as size of all comparisons
3686 	(otherwise some casts would be added where needed), the above
3687 	sequence we create related pattern stmts:
3688 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3689 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3690 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
3691 	S5'  e_T = c_T | d_T;
3692 	S6'  f_T = e_T;
3693 
3694 	Instead of the above S3' we could emit:
3695 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3696 	S3'  c_T = a_T | b_T;
3697 	but the above is more efficient.  */
3698 
3699 static gimple *
vect_recog_bool_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3700 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3701 {
3702   gimple *last_stmt = stmt_vinfo->stmt;
3703   enum tree_code rhs_code;
3704   tree var, lhs, rhs, vectype;
3705   vec_info *vinfo = stmt_vinfo->vinfo;
3706   gimple *pattern_stmt;
3707 
3708   if (!is_gimple_assign (last_stmt))
3709     return NULL;
3710 
3711   var = gimple_assign_rhs1 (last_stmt);
3712   lhs = gimple_assign_lhs (last_stmt);
3713   rhs_code = gimple_assign_rhs_code (last_stmt);
3714 
3715   if (rhs_code == VIEW_CONVERT_EXPR)
3716     var = TREE_OPERAND (var, 0);
3717 
3718   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3719     return NULL;
3720 
3721   hash_set<gimple *> bool_stmts;
3722 
3723   if (CONVERT_EXPR_CODE_P (rhs_code)
3724       || rhs_code == VIEW_CONVERT_EXPR)
3725     {
3726       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3727 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3728 	return NULL;
3729       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3730       if (vectype == NULL_TREE)
3731 	return NULL;
3732 
3733       if (check_bool_pattern (var, vinfo, bool_stmts))
3734 	{
3735 	  rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3736 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3737 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3738 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3739 	  else
3740 	    pattern_stmt
3741 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
3742 	}
3743       else
3744 	{
3745 	  tree type = search_type_for_mask (var, vinfo);
3746 	  tree cst0, cst1, tmp;
3747 
3748 	  if (!type)
3749 	    return NULL;
3750 
3751 	  /* We may directly use cond with narrowed type to avoid
3752 	     multiple cond exprs with following result packing and
3753 	     perform single cond with packed mask instead.  In case
3754 	     of widening we better make cond first and then extract
3755 	     results.  */
3756 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3757 	    type = TREE_TYPE (lhs);
3758 
3759 	  cst0 = build_int_cst (type, 0);
3760 	  cst1 = build_int_cst (type, 1);
3761 	  tmp = vect_recog_temp_ssa_var (type, NULL);
3762 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3763 
3764 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3765 	    {
3766 	      tree new_vectype = get_vectype_for_scalar_type (type);
3767 	      append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3768 
3769 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3770 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3771 	    }
3772 	}
3773 
3774       *type_out = vectype;
3775       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3776 
3777       return pattern_stmt;
3778     }
3779   else if (rhs_code == COND_EXPR
3780 	   && TREE_CODE (var) == SSA_NAME)
3781     {
3782       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3783       if (vectype == NULL_TREE)
3784 	return NULL;
3785 
3786       /* Build a scalar type for the boolean result that when
3787          vectorized matches the vector type of the result in
3788 	 size and number of elements.  */
3789       unsigned prec
3790 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3791 			       TYPE_VECTOR_SUBPARTS (vectype));
3792 
3793       tree type
3794 	= build_nonstandard_integer_type (prec,
3795 					  TYPE_UNSIGNED (TREE_TYPE (var)));
3796       if (get_vectype_for_scalar_type (type) == NULL_TREE)
3797 	return NULL;
3798 
3799       if (!check_bool_pattern (var, vinfo, bool_stmts))
3800 	return NULL;
3801 
3802       rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3803 
3804       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3805       pattern_stmt
3806 	  = gimple_build_assign (lhs, COND_EXPR,
3807 				 build2 (NE_EXPR, boolean_type_node,
3808 					 rhs, build_int_cst (type, 0)),
3809 				 gimple_assign_rhs2 (last_stmt),
3810 				 gimple_assign_rhs3 (last_stmt));
3811       *type_out = vectype;
3812       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3813 
3814       return pattern_stmt;
3815     }
3816   else if (rhs_code == SSA_NAME
3817 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
3818     {
3819       stmt_vec_info pattern_stmt_info;
3820       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3821       gcc_assert (vectype != NULL_TREE);
3822       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3823 	return NULL;
3824 
3825       if (check_bool_pattern (var, vinfo, bool_stmts))
3826 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3827       else
3828 	{
3829 	  tree type = search_type_for_mask (var, vinfo);
3830 	  tree cst0, cst1, new_vectype;
3831 
3832 	  if (!type)
3833 	    return NULL;
3834 
3835 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3836 	    type = TREE_TYPE (vectype);
3837 
3838 	  cst0 = build_int_cst (type, 0);
3839 	  cst1 = build_int_cst (type, 1);
3840 	  new_vectype = get_vectype_for_scalar_type (type);
3841 
3842 	  rhs = vect_recog_temp_ssa_var (type, NULL);
3843 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3844 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3845 	}
3846 
3847       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3848       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3849 	{
3850 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3851 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3852 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
3853 	  rhs = rhs2;
3854 	}
3855       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3856       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3857       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3858       *type_out = vectype;
3859       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3860 
3861       return pattern_stmt;
3862     }
3863   else
3864     return NULL;
3865 }
3866 
3867 
3868 /* A helper for vect_recog_mask_conversion_pattern.  Build
3869    conversion of MASK to a type suitable for masking VECTYPE.
3870    Built statement gets required vectype and is appended to
3871    a pattern sequence of STMT_VINFO.
3872 
3873    Return converted mask.  */
3874 
3875 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo)3876 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3877 {
3878   gimple *stmt;
3879   tree masktype, tmp;
3880 
3881   masktype = build_same_sized_truth_vector_type (vectype);
3882   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3883   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3884   append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3885 
3886   return tmp;
3887 }
3888 
3889 
3890 /* Function vect_recog_mask_conversion_pattern
3891 
3892    Try to find statements which require boolean type
3893    converison.  Additional conversion statements are
3894    added to handle such cases.  For example:
3895 
3896    bool m_1, m_2, m_3;
3897    int i_4, i_5;
3898    double d_6, d_7;
3899    char c_1, c_2, c_3;
3900 
3901    S1   m_1 = i_4 > i_5;
3902    S2   m_2 = d_6 < d_7;
3903    S3   m_3 = m_1 & m_2;
3904    S4   c_1 = m_3 ? c_2 : c_3;
3905 
3906    Will be transformed into:
3907 
3908    S1   m_1 = i_4 > i_5;
3909    S2   m_2 = d_6 < d_7;
3910    S3'' m_2' = (_Bool[bitsize=32])m_2
3911    S3'  m_3' = m_1 & m_2';
3912    S4'' m_3'' = (_Bool[bitsize=8])m_3'
3913    S4'  c_1' = m_3'' ? c_2 : c_3;  */
3914 
3915 static gimple *
vect_recog_mask_conversion_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3916 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3917 {
3918   gimple *last_stmt = stmt_vinfo->stmt;
3919   enum tree_code rhs_code;
3920   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3921   tree vectype1, vectype2;
3922   stmt_vec_info pattern_stmt_info;
3923   vec_info *vinfo = stmt_vinfo->vinfo;
3924 
3925   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
3926   if (is_gimple_call (last_stmt)
3927       && gimple_call_internal_p (last_stmt))
3928     {
3929       gcall *pattern_stmt;
3930 
3931       internal_fn ifn = gimple_call_internal_fn (last_stmt);
3932       int mask_argno = internal_fn_mask_index (ifn);
3933       if (mask_argno < 0)
3934 	return NULL;
3935 
3936       bool store_p = internal_store_fn_p (ifn);
3937       if (store_p)
3938 	{
3939 	  int rhs_index = internal_fn_stored_value_index (ifn);
3940 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
3941 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3942 	}
3943       else
3944 	{
3945 	  lhs = gimple_call_lhs (last_stmt);
3946 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3947 	}
3948 
3949       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3950       tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3951       if (!mask_arg_type)
3952 	return NULL;
3953       vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3954 
3955       if (!vectype1 || !vectype2
3956 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3957 		       TYPE_VECTOR_SUBPARTS (vectype2)))
3958 	return NULL;
3959 
3960       tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3961 
3962       auto_vec<tree, 8> args;
3963       unsigned int nargs = gimple_call_num_args (last_stmt);
3964       args.safe_grow (nargs);
3965       for (unsigned int i = 0; i < nargs; ++i)
3966 	args[i] = ((int) i == mask_argno
3967 		   ? tmp
3968 		   : gimple_call_arg (last_stmt, i));
3969       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3970 
3971       if (!store_p)
3972 	{
3973 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3974 	  gimple_call_set_lhs (pattern_stmt, lhs);
3975 	}
3976       gimple_call_set_nothrow (pattern_stmt, true);
3977 
3978       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3979       if (STMT_VINFO_DATA_REF (stmt_vinfo))
3980 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3981 
3982       *type_out = vectype1;
3983       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3984 
3985       return pattern_stmt;
3986     }
3987 
3988   if (!is_gimple_assign (last_stmt))
3989     return NULL;
3990 
3991   gimple *pattern_stmt;
3992   lhs = gimple_assign_lhs (last_stmt);
3993   rhs1 = gimple_assign_rhs1 (last_stmt);
3994   rhs_code = gimple_assign_rhs_code (last_stmt);
3995 
3996   /* Check for cond expression requiring mask conversion.  */
3997   if (rhs_code == COND_EXPR)
3998     {
3999       vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4000 
4001       if (TREE_CODE (rhs1) == SSA_NAME)
4002 	{
4003 	  rhs1_type = search_type_for_mask (rhs1, vinfo);
4004 	  if (!rhs1_type)
4005 	    return NULL;
4006 	}
4007       else if (COMPARISON_CLASS_P (rhs1))
4008 	{
4009 	  /* Check whether we're comparing scalar booleans and (if so)
4010 	     whether a better mask type exists than the mask associated
4011 	     with boolean-sized elements.  This avoids unnecessary packs
4012 	     and unpacks if the booleans are set from comparisons of
4013 	     wider types.  E.g. in:
4014 
4015 	       int x1, x2, x3, x4, y1, y1;
4016 	       ...
4017 	       bool b1 = (x1 == x2);
4018 	       bool b2 = (x3 == x4);
4019 	       ... = b1 == b2 ? y1 : y2;
4020 
4021 	     it is better for b1 and b2 to use the mask type associated
4022 	     with int elements rather bool (byte) elements.  */
4023 	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4024 	  if (!rhs1_type)
4025 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4026 	}
4027       else
4028 	return NULL;
4029 
4030       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4031 
4032       if (!vectype1 || !vectype2)
4033 	return NULL;
4034 
4035       /* Continue if a conversion is needed.  Also continue if we have
4036 	 a comparison whose vector type would normally be different from
4037 	 VECTYPE2 when considered in isolation.  In that case we'll
4038 	 replace the comparison with an SSA name (so that we can record
4039 	 its vector type) and behave as though the comparison was an SSA
4040 	 name from the outset.  */
4041       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4042 		    TYPE_VECTOR_SUBPARTS (vectype2))
4043 	  && (TREE_CODE (rhs1) == SSA_NAME
4044 	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4045 	return NULL;
4046 
4047       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4048          in place, we can handle it in vectorizable_condition.  This avoids
4049 	 unnecessary promotion stmts and increased vectorization factor.  */
4050       if (COMPARISON_CLASS_P (rhs1)
4051 	  && INTEGRAL_TYPE_P (rhs1_type)
4052 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4053 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4054 	{
4055 	  enum vect_def_type dt;
4056 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4057 	      && dt == vect_external_def
4058 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4059 	      && (dt == vect_external_def
4060 		  || dt == vect_constant_def))
4061 	    {
4062 	      tree wide_scalar_type = build_nonstandard_integer_type
4063 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4064 		 TYPE_UNSIGNED (rhs1_type));
4065 	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4066 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4067 		return NULL;
4068 	    }
4069 	}
4070 
4071       /* If rhs1 is a comparison we need to move it into a
4072 	 separate statement.  */
4073       if (TREE_CODE (rhs1) != SSA_NAME)
4074 	{
4075 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4076 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
4077 	  rhs1 = tmp;
4078 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4079 	}
4080 
4081       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4082 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4083 	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4084       else
4085 	tmp = rhs1;
4086 
4087       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4088       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4089 					  gimple_assign_rhs2 (last_stmt),
4090 					  gimple_assign_rhs3 (last_stmt));
4091 
4092       *type_out = vectype1;
4093       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4094 
4095       return pattern_stmt;
4096     }
4097 
4098   /* Now check for binary boolean operations requiring conversion for
4099      one of operands.  */
4100   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4101     return NULL;
4102 
4103   if (rhs_code != BIT_IOR_EXPR
4104       && rhs_code != BIT_XOR_EXPR
4105       && rhs_code != BIT_AND_EXPR
4106       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4107     return NULL;
4108 
4109   rhs2 = gimple_assign_rhs2 (last_stmt);
4110 
4111   rhs1_type = search_type_for_mask (rhs1, vinfo);
4112   rhs2_type = search_type_for_mask (rhs2, vinfo);
4113 
4114   if (!rhs1_type || !rhs2_type
4115       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4116     return NULL;
4117 
4118   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4119     {
4120       vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4121       if (!vectype1)
4122 	return NULL;
4123       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4124     }
4125   else
4126     {
4127       vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4128       if (!vectype1)
4129 	return NULL;
4130       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4131     }
4132 
4133   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4134   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4135 
4136   *type_out = vectype1;
4137   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4138 
4139   return pattern_stmt;
4140 }
4141 
4142 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4143    the boolean condition under which it occurs, otherwise return null.  */
4144 
4145 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4146 vect_get_load_store_mask (stmt_vec_info stmt_info)
4147 {
4148   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4149     {
4150       gcc_assert (gimple_assign_single_p (def_assign));
4151       return NULL_TREE;
4152     }
4153 
4154   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4155     {
4156       internal_fn ifn = gimple_call_internal_fn (def_call);
4157       int mask_index = internal_fn_mask_index (ifn);
4158       return gimple_call_arg (def_call, mask_index);
4159     }
4160 
4161   gcc_unreachable ();
4162 }
4163 
4164 /* Return the scalar offset type that an internal gather/scatter function
4165    should use.  GS_INFO describes the gather/scatter operation.  */
4166 
4167 static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4168 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4169 {
4170   tree offset_type = TREE_TYPE (gs_info->offset);
4171   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4172 
4173   /* Enforced by vect_check_gather_scatter.  */
4174   unsigned int offset_bits = TYPE_PRECISION (offset_type);
4175   gcc_assert (element_bits >= offset_bits);
4176 
4177   /* If the offset is narrower than the elements, extend it according
4178      to its sign.  */
4179   if (element_bits > offset_bits)
4180     return build_nonstandard_integer_type (element_bits,
4181 					   TYPE_UNSIGNED (offset_type));
4182 
4183   return offset_type;
4184 }
4185 
4186 /* Return MASK if MASK is suitable for masking an operation on vectors
4187    of type VECTYPE, otherwise convert it into such a form and return
4188    the result.  Associate any conversion statements with STMT_INFO's
4189    pattern.  */
4190 
4191 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4192 vect_convert_mask_for_vectype (tree mask, tree vectype,
4193 			       stmt_vec_info stmt_info, vec_info *vinfo)
4194 {
4195   tree mask_type = search_type_for_mask (mask, vinfo);
4196   if (mask_type)
4197     {
4198       tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4199       if (mask_vectype
4200 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4201 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4202 	mask = build_mask_conversion (mask, vectype, stmt_info);
4203     }
4204   return mask;
4205 }
4206 
4207 /* Return the equivalent of:
4208 
4209      fold_convert (TYPE, VALUE)
4210 
4211    with the expectation that the operation will be vectorized.
4212    If new statements are needed, add them as pattern statements
4213    to STMT_INFO.  */
4214 
4215 static tree
vect_add_conversion_to_pattern(tree type,tree value,stmt_vec_info stmt_info)4216 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4217 {
4218   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4219     return value;
4220 
4221   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4222   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4223   append_pattern_def_seq (stmt_info, conversion,
4224 			  get_vectype_for_scalar_type (type));
4225   return new_value;
4226 }
4227 
4228 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4229    internal function.  Return the final statement on success and set
4230    *TYPE_OUT to the vector type being loaded or stored.
4231 
4232    This function only handles gathers and scatters that were recognized
4233    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4234 
4235 static gimple *
vect_recog_gather_scatter_pattern(stmt_vec_info stmt_info,tree * type_out)4236 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4237 {
4238   /* Currently we only support this for loop vectorization.  */
4239   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4240   if (!loop_vinfo)
4241     return NULL;
4242 
4243   /* Make sure that we're looking at a gather load or scatter store.  */
4244   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4245   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4246     return NULL;
4247 
4248   /* Get the boolean that controls whether the load or store happens.
4249      This is null if the operation is unconditional.  */
4250   tree mask = vect_get_load_store_mask (stmt_info);
4251 
4252   /* Make sure that the target supports an appropriate internal
4253      function for the gather/scatter operation.  */
4254   gather_scatter_info gs_info;
4255   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4256       || gs_info.decl)
4257     return NULL;
4258 
4259   /* Convert the mask to the right form.  */
4260   tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4261   if (mask)
4262     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4263 					  loop_vinfo);
4264 
4265   /* Get the invariant base and non-invariant offset, converting the
4266      latter to the same width as the vector elements.  */
4267   tree base = gs_info.base;
4268   tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4269   tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4270 						stmt_info);
4271 
4272   /* Build the new pattern statement.  */
4273   tree scale = size_int (gs_info.scale);
4274   gcall *pattern_stmt;
4275   if (DR_IS_READ (dr))
4276     {
4277       if (mask != NULL)
4278 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4279 						   offset, scale, mask);
4280       else
4281 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4282 						   offset, scale);
4283       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4284       gimple_call_set_lhs (pattern_stmt, load_lhs);
4285     }
4286   else
4287     {
4288       tree rhs = vect_get_store_rhs (stmt_info);
4289       if (mask != NULL)
4290 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4291 						   base, offset, scale, rhs,
4292 						   mask);
4293       else
4294 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4295 						   base, offset, scale, rhs);
4296     }
4297   gimple_call_set_nothrow (pattern_stmt, true);
4298 
4299   /* Copy across relevant vectorization info and associate DR with the
4300      new pattern statement instead of the original statement.  */
4301   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4302   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4303 
4304   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4305   *type_out = vectype;
4306   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4307 
4308   return pattern_stmt;
4309 }
4310 
4311 /* Return true if TYPE is a non-boolean integer type.  These are the types
4312    that we want to consider for narrowing.  */
4313 
4314 static bool
vect_narrowable_type_p(tree type)4315 vect_narrowable_type_p (tree type)
4316 {
4317   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4318 }
4319 
4320 /* Return true if the operation given by CODE can be truncated to N bits
4321    when only N bits of the output are needed.  This is only true if bit N+1
4322    of the inputs has no effect on the low N bits of the result.  */
4323 
4324 static bool
vect_truncatable_operation_p(tree_code code)4325 vect_truncatable_operation_p (tree_code code)
4326 {
4327   switch (code)
4328     {
4329     case PLUS_EXPR:
4330     case MINUS_EXPR:
4331     case MULT_EXPR:
4332     case BIT_AND_EXPR:
4333     case BIT_IOR_EXPR:
4334     case BIT_XOR_EXPR:
4335     case COND_EXPR:
4336       return true;
4337 
4338     default:
4339       return false;
4340     }
4341 }
4342 
4343 /* Record that STMT_INFO could be changed from operating on TYPE to
4344    operating on a type with the precision and sign given by PRECISION
4345    and SIGN respectively.  PRECISION is an arbitrary bit precision;
4346    it might not be a whole number of bytes.  */
4347 
4348 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4349 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4350 			 unsigned int precision, signop sign)
4351 {
4352   /* Round the precision up to a whole number of bytes.  */
4353   precision = vect_element_precision (precision);
4354   if (precision < TYPE_PRECISION (type)
4355       && (!stmt_info->operation_precision
4356 	  || stmt_info->operation_precision > precision))
4357     {
4358       stmt_info->operation_precision = precision;
4359       stmt_info->operation_sign = sign;
4360     }
4361 }
4362 
4363 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4364    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
4365    is an arbitrary bit precision; it might not be a whole number of bytes.  */
4366 
4367 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4368 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4369 			      unsigned int min_input_precision)
4370 {
4371   /* This operation in isolation only requires the inputs to have
4372      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
4373      that MIN_INPUT_PRECISION is a natural precision for the chain
4374      as a whole.  E.g. consider something like:
4375 
4376 	 unsigned short *x, *y;
4377 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4378 
4379      The right shift can be done on unsigned chars, and only requires the
4380      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
4381      approach would mean turning a natural chain of single-vector unsigned
4382      short operations into one that truncates "*x" and then extends
4383      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4384      operation and one vector for each unsigned char operation.
4385      This would be a significant pessimization.
4386 
4387      Instead only propagate the maximum of this precision and the precision
4388      required by the users of the result.  This means that we don't pessimize
4389      the case above but continue to optimize things like:
4390 
4391 	 unsigned char *y;
4392 	 unsigned short *x;
4393 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4394 
4395      Here we would truncate two vectors of *x to a single vector of
4396      unsigned chars and use single-vector unsigned char operations for
4397      everything else, rather than doing two unsigned short copies of
4398      "(*x & 0xf0) >> 4" and then truncating the result.  */
4399   min_input_precision = MAX (min_input_precision,
4400 			     stmt_info->min_output_precision);
4401 
4402   if (min_input_precision < TYPE_PRECISION (type)
4403       && (!stmt_info->min_input_precision
4404 	  || stmt_info->min_input_precision > min_input_precision))
4405     stmt_info->min_input_precision = min_input_precision;
4406 }
4407 
4408 /* Subroutine of vect_determine_min_output_precision.  Return true if
4409    we can calculate a reduced number of output bits for STMT_INFO,
4410    whose result is LHS.  */
4411 
4412 static bool
vect_determine_min_output_precision_1(stmt_vec_info stmt_info,tree lhs)4413 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4414 {
4415   /* Take the maximum precision required by users of the result.  */
4416   vec_info *vinfo = stmt_info->vinfo;
4417   unsigned int precision = 0;
4418   imm_use_iterator iter;
4419   use_operand_p use;
4420   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4421     {
4422       gimple *use_stmt = USE_STMT (use);
4423       if (is_gimple_debug (use_stmt))
4424 	continue;
4425       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4426       if (!use_stmt_info || !use_stmt_info->min_input_precision)
4427 	return false;
4428       /* The input precision recorded for COND_EXPRs applies only to the
4429 	 "then" and "else" values.  */
4430       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4431       if (assign
4432 	  && gimple_assign_rhs_code (assign) == COND_EXPR
4433 	  && use->use != gimple_assign_rhs2_ptr (assign)
4434 	  && use->use != gimple_assign_rhs3_ptr (assign))
4435 	return false;
4436       precision = MAX (precision, use_stmt_info->min_input_precision);
4437     }
4438 
4439   if (dump_enabled_p ())
4440     dump_printf_loc (MSG_NOTE, vect_location,
4441 		     "only the low %d bits of %T are significant\n",
4442 		     precision, lhs);
4443   stmt_info->min_output_precision = precision;
4444   return true;
4445 }
4446 
4447 /* Calculate min_output_precision for STMT_INFO.  */
4448 
4449 static void
vect_determine_min_output_precision(stmt_vec_info stmt_info)4450 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4451 {
4452   /* We're only interested in statements with a narrowable result.  */
4453   tree lhs = gimple_get_lhs (stmt_info->stmt);
4454   if (!lhs
4455       || TREE_CODE (lhs) != SSA_NAME
4456       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4457     return;
4458 
4459   if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4460     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4461 }
4462 
4463 /* Use range information to decide whether STMT (described by STMT_INFO)
4464    could be done in a narrower type.  This is effectively a forward
4465    propagation, since it uses context-independent information that applies
4466    to all users of an SSA name.  */
4467 
4468 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4469 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4470 {
4471   tree lhs = gimple_assign_lhs (stmt);
4472   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4473     return;
4474 
4475   tree type = TREE_TYPE (lhs);
4476   if (!vect_narrowable_type_p (type))
4477     return;
4478 
4479   /* First see whether we have any useful range information for the result.  */
4480   unsigned int precision = TYPE_PRECISION (type);
4481   signop sign = TYPE_SIGN (type);
4482   wide_int min_value, max_value;
4483   if (!vect_get_range_info (lhs, &min_value, &max_value))
4484     return;
4485 
4486   tree_code code = gimple_assign_rhs_code (stmt);
4487   unsigned int nops = gimple_num_ops (stmt);
4488 
4489   if (!vect_truncatable_operation_p (code))
4490     /* Check that all relevant input operands are compatible, and update
4491        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
4492     for (unsigned int i = 1; i < nops; ++i)
4493       {
4494 	tree op = gimple_op (stmt, i);
4495 	if (TREE_CODE (op) == INTEGER_CST)
4496 	  {
4497 	    /* Don't require the integer to have RHS_TYPE (which it might
4498 	       not for things like shift amounts, etc.), but do require it
4499 	       to fit the type.  */
4500 	    if (!int_fits_type_p (op, type))
4501 	      return;
4502 
4503 	    min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4504 	    max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4505 	  }
4506 	else if (TREE_CODE (op) == SSA_NAME)
4507 	  {
4508 	    /* Ignore codes that don't take uniform arguments.  */
4509 	    if (!types_compatible_p (TREE_TYPE (op), type))
4510 	      return;
4511 
4512 	    wide_int op_min_value, op_max_value;
4513 	    if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4514 	      return;
4515 
4516 	    min_value = wi::min (min_value, op_min_value, sign);
4517 	    max_value = wi::max (max_value, op_max_value, sign);
4518 	  }
4519 	else
4520 	  return;
4521       }
4522 
4523   /* Try to switch signed types for unsigned types if we can.
4524      This is better for two reasons.  First, unsigned ops tend
4525      to be cheaper than signed ops.  Second, it means that we can
4526      handle things like:
4527 
4528 	signed char c;
4529 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4530 
4531      as:
4532 
4533 	signed char c;
4534 	unsigned short res_1 = (unsigned short) c & 0xff00;
4535 	int res = (int) res_1;
4536 
4537      where the intermediate result res_1 has unsigned rather than
4538      signed type.  */
4539   if (sign == SIGNED && !wi::neg_p (min_value))
4540     sign = UNSIGNED;
4541 
4542   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
4543   unsigned int precision1 = wi::min_precision (min_value, sign);
4544   unsigned int precision2 = wi::min_precision (max_value, sign);
4545   unsigned int value_precision = MAX (precision1, precision2);
4546   if (value_precision >= precision)
4547     return;
4548 
4549   if (dump_enabled_p ())
4550     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4551 		     " without loss of precision: %G",
4552 		     sign == SIGNED ? "signed" : "unsigned",
4553 		     value_precision, stmt);
4554 
4555   vect_set_operation_type (stmt_info, type, value_precision, sign);
4556   vect_set_min_input_precision (stmt_info, type, value_precision);
4557 }
4558 
4559 /* Use information about the users of STMT's result to decide whether
4560    STMT (described by STMT_INFO) could be done in a narrower type.
4561    This is effectively a backward propagation.  */
4562 
4563 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4564 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4565 {
4566   tree_code code = gimple_assign_rhs_code (stmt);
4567   unsigned int opno = (code == COND_EXPR ? 2 : 1);
4568   tree type = TREE_TYPE (gimple_op (stmt, opno));
4569   if (!vect_narrowable_type_p (type))
4570     return;
4571 
4572   unsigned int precision = TYPE_PRECISION (type);
4573   unsigned int operation_precision, min_input_precision;
4574   switch (code)
4575     {
4576     CASE_CONVERT:
4577       /* Only the bits that contribute to the output matter.  Don't change
4578 	 the precision of the operation itself.  */
4579       operation_precision = precision;
4580       min_input_precision = stmt_info->min_output_precision;
4581       break;
4582 
4583     case LSHIFT_EXPR:
4584     case RSHIFT_EXPR:
4585       {
4586 	tree shift = gimple_assign_rhs2 (stmt);
4587 	if (TREE_CODE (shift) != INTEGER_CST
4588 	    || !wi::ltu_p (wi::to_widest (shift), precision))
4589 	  return;
4590 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
4591 	if (code == LSHIFT_EXPR)
4592 	  {
4593 	    /* Avoid creating an undefined shift.
4594 
4595 	       ??? We could instead use min_output_precision as-is and
4596 	       optimize out-of-range shifts to zero.  However, only
4597 	       degenerate testcases shift away all their useful input data,
4598 	       and it isn't natural to drop input operations in the middle
4599 	       of vectorization.  This sort of thing should really be
4600 	       handled before vectorization.  */
4601 	    operation_precision = MAX (stmt_info->min_output_precision,
4602 				       const_shift + 1);
4603 	    /* We need CONST_SHIFT fewer bits of the input.  */
4604 	    min_input_precision = (MAX (operation_precision, const_shift)
4605 				   - const_shift);
4606 	  }
4607 	else
4608 	  {
4609 	    /* We need CONST_SHIFT extra bits to do the operation.  */
4610 	    operation_precision = (stmt_info->min_output_precision
4611 				   + const_shift);
4612 	    min_input_precision = operation_precision;
4613 	  }
4614 	break;
4615       }
4616 
4617     default:
4618       if (vect_truncatable_operation_p (code))
4619 	{
4620 	  /* Input bit N has no effect on output bits N-1 and lower.  */
4621 	  operation_precision = stmt_info->min_output_precision;
4622 	  min_input_precision = operation_precision;
4623 	  break;
4624 	}
4625       return;
4626     }
4627 
4628   if (operation_precision < precision)
4629     {
4630       if (dump_enabled_p ())
4631 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4632 			 " without affecting users: %G",
4633 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4634 			 operation_precision, stmt);
4635       vect_set_operation_type (stmt_info, type, operation_precision,
4636 			       TYPE_SIGN (type));
4637     }
4638   vect_set_min_input_precision (stmt_info, type, min_input_precision);
4639 }
4640 
4641 /* Handle vect_determine_precisions for STMT_INFO, given that we
4642    have already done so for the users of its result.  */
4643 
4644 void
vect_determine_stmt_precisions(stmt_vec_info stmt_info)4645 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4646 {
4647   vect_determine_min_output_precision (stmt_info);
4648   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4649     {
4650       vect_determine_precisions_from_range (stmt_info, stmt);
4651       vect_determine_precisions_from_users (stmt_info, stmt);
4652     }
4653 }
4654 
4655 /* Walk backwards through the vectorizable region to determine the
4656    values of these fields:
4657 
4658    - min_output_precision
4659    - min_input_precision
4660    - operation_precision
4661    - operation_sign.  */
4662 
4663 void
vect_determine_precisions(vec_info * vinfo)4664 vect_determine_precisions (vec_info *vinfo)
4665 {
4666   DUMP_VECT_SCOPE ("vect_determine_precisions");
4667 
4668   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4669     {
4670       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4671       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4672       unsigned int nbbs = loop->num_nodes;
4673 
4674       for (unsigned int i = 0; i < nbbs; i++)
4675 	{
4676 	  basic_block bb = bbs[nbbs - i - 1];
4677 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
4678 	       !gsi_end_p (si); gsi_prev (&si))
4679 	    vect_determine_stmt_precisions
4680 	      (vinfo->lookup_stmt (gsi_stmt (si)));
4681 	}
4682     }
4683   else
4684     {
4685       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4686       gimple_stmt_iterator si = bb_vinfo->region_end;
4687       gimple *stmt;
4688       do
4689 	{
4690 	  if (!gsi_stmt (si))
4691 	    si = gsi_last_bb (bb_vinfo->bb);
4692 	  else
4693 	    gsi_prev (&si);
4694 	  stmt = gsi_stmt (si);
4695 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4696 	  if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4697 	    vect_determine_stmt_precisions (stmt_info);
4698 	}
4699       while (stmt != gsi_stmt (bb_vinfo->region_begin));
4700     }
4701 }
4702 
4703 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4704 
4705 struct vect_recog_func
4706 {
4707   vect_recog_func_ptr fn;
4708   const char *name;
4709 };
4710 
4711 /* Note that ordering matters - the first pattern matching on a stmt is
4712    taken which means usually the more complex one needs to preceed the
4713    less comples onex (widen_sum only after dot_prod or sad for example).  */
4714 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4715   { vect_recog_over_widening_pattern, "over_widening" },
4716   /* Must come after over_widening, which narrows the shift as much as
4717      possible beforehand.  */
4718   { vect_recog_average_pattern, "average" },
4719   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4720   { vect_recog_widen_mult_pattern, "widen_mult" },
4721   { vect_recog_dot_prod_pattern, "dot_prod" },
4722   { vect_recog_sad_pattern, "sad" },
4723   { vect_recog_widen_sum_pattern, "widen_sum" },
4724   { vect_recog_pow_pattern, "pow" },
4725   { vect_recog_widen_shift_pattern, "widen_shift" },
4726   { vect_recog_rotate_pattern, "rotate" },
4727   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4728   { vect_recog_divmod_pattern, "divmod" },
4729   { vect_recog_mult_pattern, "mult" },
4730   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4731   { vect_recog_bool_pattern, "bool" },
4732   /* This must come before mask conversion, and includes the parts
4733      of mask conversion that are needed for gather and scatter
4734      internal functions.  */
4735   { vect_recog_gather_scatter_pattern, "gather_scatter" },
4736   { vect_recog_mask_conversion_pattern, "mask_conversion" }
4737 };
4738 
4739 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4740 
4741 /* Mark statements that are involved in a pattern.  */
4742 
4743 static inline void
vect_mark_pattern_stmts(stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)4744 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4745                          tree pattern_vectype)
4746 {
4747   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4748 
4749   gimple *orig_pattern_stmt = NULL;
4750   if (is_pattern_stmt_p (orig_stmt_info))
4751     {
4752       /* We're replacing a statement in an existing pattern definition
4753 	 sequence.  */
4754       orig_pattern_stmt = orig_stmt_info->stmt;
4755       if (dump_enabled_p ())
4756 	dump_printf_loc (MSG_NOTE, vect_location,
4757 			 "replacing earlier pattern %G", orig_pattern_stmt);
4758 
4759       /* To keep the book-keeping simple, just swap the lhs of the
4760 	 old and new statements, so that the old one has a valid but
4761 	 unused lhs.  */
4762       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4763       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4764       gimple_set_lhs (pattern_stmt, old_lhs);
4765 
4766       if (dump_enabled_p ())
4767 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4768 
4769       /* Switch to the statement that ORIG replaces.  */
4770       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4771 
4772       /* We shouldn't be replacing the main pattern statement.  */
4773       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4774 		  != orig_pattern_stmt);
4775     }
4776 
4777   if (def_seq)
4778     for (gimple_stmt_iterator si = gsi_start (def_seq);
4779 	 !gsi_end_p (si); gsi_next (&si))
4780       {
4781 	stmt_vec_info pattern_stmt_info
4782 	  = vect_init_pattern_stmt (gsi_stmt (si),
4783 				    orig_stmt_info, pattern_vectype);
4784 	/* Stmts in the def sequence are not vectorizable cycle or
4785 	   induction defs, instead they should all be vect_internal_def
4786 	   feeding the main pattern stmt which retains this def type.  */
4787 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4788       }
4789 
4790   if (orig_pattern_stmt)
4791     {
4792       vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4793 
4794       /* Insert all the new pattern statements before the original one.  */
4795       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4796       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4797 					       orig_def_seq);
4798       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4799       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4800 
4801       /* Remove the pattern statement that this new pattern replaces.  */
4802       gsi_remove (&gsi, false);
4803     }
4804   else
4805     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4806 }
4807 
4808 /* Function vect_pattern_recog_1
4809 
4810    Input:
4811    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4812         computation pattern.
4813    STMT_INFO: A stmt from which the pattern search should start.
4814 
4815    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4816    a sequence of statements that has the same functionality and can be
4817    used to replace STMT_INFO.  It returns the last statement in the sequence
4818    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4819    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4820    statement, having first checked that the target supports the new operation
4821    in that type.
4822 
4823    This function also does some bookkeeping, as explained in the documentation
4824    for vect_recog_pattern.  */
4825 
4826 static void
vect_pattern_recog_1(vect_recog_func * recog_func,stmt_vec_info stmt_info)4827 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4828 {
4829   vec_info *vinfo = stmt_info->vinfo;
4830   gimple *pattern_stmt;
4831   loop_vec_info loop_vinfo;
4832   tree pattern_vectype;
4833 
4834   /* If this statement has already been replaced with pattern statements,
4835      leave the original statement alone, since the first match wins.
4836      Instead try to match against the definition statements that feed
4837      the main pattern statement.  */
4838   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4839     {
4840       gimple_stmt_iterator gsi;
4841       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4842 	   !gsi_end_p (gsi); gsi_next (&gsi))
4843 	vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4844       return;
4845     }
4846 
4847   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4848   pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4849   if (!pattern_stmt)
4850     {
4851       /* Clear any half-formed pattern definition sequence.  */
4852       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4853       return;
4854     }
4855 
4856   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4857   gcc_assert (pattern_vectype);
4858 
4859   /* Found a vectorizable pattern.  */
4860   if (dump_enabled_p ())
4861     dump_printf_loc (MSG_NOTE, vect_location,
4862 		     "%s pattern recognized: %G",
4863 		     recog_func->name, pattern_stmt);
4864 
4865   /* Mark the stmts that are involved in the pattern. */
4866   vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4867 
4868   /* Patterns cannot be vectorized using SLP, because they change the order of
4869      computation.  */
4870   if (loop_vinfo)
4871     {
4872       unsigned ix, ix2;
4873       stmt_vec_info *elem_ptr;
4874       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4875 			     elem_ptr, *elem_ptr == stmt_info);
4876     }
4877 }
4878 
4879 
4880 /* Function vect_pattern_recog
4881 
4882    Input:
4883    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4884         computation idioms.
4885 
4886    Output - for each computation idiom that is detected we create a new stmt
4887         that provides the same functionality and that can be vectorized.  We
4888         also record some information in the struct_stmt_info of the relevant
4889         stmts, as explained below:
4890 
4891    At the entry to this function we have the following stmts, with the
4892    following initial value in the STMT_VINFO fields:
4893 
4894          stmt                     in_pattern_p  related_stmt    vec_stmt
4895          S1: a_i = ....                 -       -               -
4896          S2: a_2 = ..use(a_i)..         -       -               -
4897          S3: a_1 = ..use(a_2)..         -       -               -
4898          S4: a_0 = ..use(a_1)..         -       -               -
4899          S5: ... = ..use(a_0)..         -       -               -
4900 
4901    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4902    represented by a single stmt.  We then:
4903    - create a new stmt S6 equivalent to the pattern (the stmt is not
4904      inserted into the code)
4905    - fill in the STMT_VINFO fields as follows:
4906 
4907                                   in_pattern_p  related_stmt    vec_stmt
4908          S1: a_i = ....                 -       -               -
4909          S2: a_2 = ..use(a_i)..         -       -               -
4910          S3: a_1 = ..use(a_2)..         -       -               -
4911          S4: a_0 = ..use(a_1)..         true    S6              -
4912           '---> S6: a_new = ....        -       S4              -
4913          S5: ... = ..use(a_0)..         -       -               -
4914 
4915    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4916    to each other through the RELATED_STMT field).
4917 
4918    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4919    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
4920    remain irrelevant unless used by stmts other than S4.
4921 
4922    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4923    (because they are marked as irrelevant).  It will vectorize S6, and record
4924    a pointer to the new vector stmt VS6 from S6 (as usual).
4925    S4 will be skipped, and S5 will be vectorized as usual:
4926 
4927                                   in_pattern_p  related_stmt    vec_stmt
4928          S1: a_i = ....                 -       -               -
4929          S2: a_2 = ..use(a_i)..         -       -               -
4930          S3: a_1 = ..use(a_2)..         -       -               -
4931        > VS6: va_new = ....             -       -               -
4932          S4: a_0 = ..use(a_1)..         true    S6              VS6
4933           '---> S6: a_new = ....        -       S4              VS6
4934        > VS5: ... = ..vuse(va_new)..    -       -               -
4935          S5: ... = ..use(a_0)..         -       -               -
4936 
4937    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4938    elsewhere), and we'll end up with:
4939 
4940         VS6: va_new = ....
4941         VS5: ... = ..vuse(va_new)..
4942 
4943    In case of more than one pattern statements, e.g., widen-mult with
4944    intermediate type:
4945 
4946      S1  a_t = ;
4947      S2  a_T = (TYPE) a_t;
4948            '--> S3: a_it = (interm_type) a_t;
4949      S4  prod_T = a_T * CONST;
4950            '--> S5: prod_T' = a_it w* CONST;
4951 
4952    there may be other users of a_T outside the pattern.  In that case S2 will
4953    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4954    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
4955    be recorded in S3.  */
4956 
4957 void
vect_pattern_recog(vec_info * vinfo)4958 vect_pattern_recog (vec_info *vinfo)
4959 {
4960   struct loop *loop;
4961   basic_block *bbs;
4962   unsigned int nbbs;
4963   gimple_stmt_iterator si;
4964   unsigned int i, j;
4965 
4966   vect_determine_precisions (vinfo);
4967 
4968   DUMP_VECT_SCOPE ("vect_pattern_recog");
4969 
4970   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4971     {
4972       loop = LOOP_VINFO_LOOP (loop_vinfo);
4973       bbs = LOOP_VINFO_BBS (loop_vinfo);
4974       nbbs = loop->num_nodes;
4975 
4976       /* Scan through the loop stmts, applying the pattern recognition
4977 	 functions starting at each stmt visited:  */
4978       for (i = 0; i < nbbs; i++)
4979 	{
4980 	  basic_block bb = bbs[i];
4981 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4982 	    {
4983 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4984 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
4985 	      for (j = 0; j < NUM_PATTERNS; j++)
4986 		vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4987 				      stmt_info);
4988 	    }
4989 	}
4990     }
4991   else
4992     {
4993       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4994       for (si = bb_vinfo->region_begin;
4995 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4996 	{
4997 	  gimple *stmt = gsi_stmt (si);
4998 	  stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4999 	  if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5000 	    continue;
5001 
5002 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
5003 	  for (j = 0; j < NUM_PATTERNS; j++)
5004 	    vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
5005 	}
5006     }
5007 }
5008