1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2021 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
54 
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58   value_range_kind vr_type = get_range_info (var, min_value, max_value);
59   wide_int nonzero = get_nonzero_bits (var);
60   signop sgn = TYPE_SIGN (TREE_TYPE (var));
61   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 					 nonzero, sgn) == VR_RANGE)
63     {
64       if (dump_enabled_p ())
65 	{
66 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 	  dump_printf (MSG_NOTE, " has range [");
68 	  dump_hex (MSG_NOTE, *min_value);
69 	  dump_printf (MSG_NOTE, ", ");
70 	  dump_hex (MSG_NOTE, *max_value);
71 	  dump_printf (MSG_NOTE, "]\n");
72 	}
73       return true;
74     }
75   else
76     {
77       if (dump_enabled_p ())
78 	{
79 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 	  dump_printf (MSG_NOTE, " has no range info\n");
81 	}
82       return false;
83     }
84 }
85 
86 /* Report that we've found an instance of pattern PATTERN in
87    statement STMT.  */
88 
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92   if (dump_enabled_p ())
93     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95 
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97    return the pattern statement's stmt_vec_info.  Set its vector type to
98    VECTYPE if it doesn't have one already.  */
99 
100 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 			stmt_vec_info orig_stmt_info, tree vectype)
103 {
104   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105   if (pattern_stmt_info == NULL)
106     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108 
109   pattern_stmt_info->pattern_stmt_p = true;
110   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
114     {
115       gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 		  == vect_use_mask_type_p (orig_stmt_info));
117       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
119     }
120   return pattern_stmt_info;
121 }
122 
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125    have one already.  */
126 
127 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 		       stmt_vec_info orig_stmt_info, tree vectype)
130 {
131   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132   STMT_VINFO_RELATED_STMT (orig_stmt_info)
133     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
134 }
135 
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
137    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138    be different from the vector type of the final pattern statement.
139    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140    from which it was derived.  */
141 
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 			stmt_vec_info stmt_info, gimple *new_stmt,
145 			tree vectype = NULL_TREE,
146 			tree scalar_type_for_mask = NULL_TREE)
147 {
148   gcc_assert (!scalar_type_for_mask
149 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150   if (vectype)
151     {
152       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154       if (scalar_type_for_mask)
155 	new_stmt_info->mask_precision
156 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
157     }
158   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 				      new_stmt);
160 }
161 
162 /* The caller wants to perform new operations on vect_external variable
163    VAR, so that the result of the operations would also be vect_external.
164    Return the edge on which the operations can be performed, if one exists.
165    Return null if the operations should instead be treated as part of
166    the pattern that needs them.  */
167 
168 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)169 vect_get_external_def_edge (vec_info *vinfo, tree var)
170 {
171   edge e = NULL;
172   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
173     {
174       e = loop_preheader_edge (loop_vinfo->loop);
175       if (!SSA_NAME_IS_DEFAULT_DEF (var))
176 	{
177 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 	  if (bb == NULL
179 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 	    e = NULL;
181 	}
182     }
183   return e;
184 }
185 
186 /* Return true if the target supports a vector version of CODE,
187    where CODE is known to map to a direct optab.  ITYPE specifies
188    the type of (some of) the scalar inputs and OTYPE specifies the
189    type of the scalar result.
190 
191    If CODE allows the inputs and outputs to have different type
192    (such as for WIDEN_SUM_EXPR), it is the input mode rather
193    than the output mode that determines the appropriate target pattern.
194    Operand 0 of the target pattern then specifies the mode that the output
195    must have.
196 
197    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199    is nonnull.  */
200 
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 				 tree itype, tree *vecotype_out,
204 				 tree *vecitype_out = NULL)
205 {
206   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207   if (!vecitype)
208     return false;
209 
210   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211   if (!vecotype)
212     return false;
213 
214   optab optab = optab_for_tree_code (code, vecitype, optab_default);
215   if (!optab)
216     return false;
217 
218   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219   if (icode == CODE_FOR_nothing
220       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221     return false;
222 
223   *vecotype_out = vecotype;
224   if (vecitype_out)
225     *vecitype_out = vecitype;
226   return true;
227 }
228 
229 /* Round bit precision PRECISION up to a full element.  */
230 
231 static unsigned int
vect_element_precision(unsigned int precision)232 vect_element_precision (unsigned int precision)
233 {
234   precision = 1 << ceil_log2 (precision);
235   return MAX (precision, BITS_PER_UNIT);
236 }
237 
238 /* If OP is defined by a statement that's being considered for vectorization,
239    return information about that statement, otherwise return NULL.  */
240 
241 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)242 vect_get_internal_def (vec_info *vinfo, tree op)
243 {
244   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245   if (def_stmt_info
246       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247     return def_stmt_info;
248   return NULL;
249 }
250 
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252    is a result of a type promotion, such that:
253      DEF_STMT: NAME = NOP (name0)
254    If CHECK_SIGN is TRUE, check that either both types are signed or both are
255    unsigned.  */
256 
257 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 		   tree *orig_type, gimple **def_stmt, bool *promotion)
260 {
261   tree type = TREE_TYPE (name);
262   tree oprnd0;
263   enum vect_def_type dt;
264 
265   stmt_vec_info def_stmt_info;
266   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267     return false;
268 
269   if (dt != vect_internal_def
270       && dt != vect_external_def && dt != vect_constant_def)
271     return false;
272 
273   if (!*def_stmt)
274     return false;
275 
276   if (!is_gimple_assign (*def_stmt))
277     return false;
278 
279   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280     return false;
281 
282   oprnd0 = gimple_assign_rhs1 (*def_stmt);
283 
284   *orig_type = TREE_TYPE (oprnd0);
285   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287     return false;
288 
289   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290     *promotion = true;
291   else
292     *promotion = false;
293 
294   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295     return false;
296 
297   return true;
298 }
299 
300 /* Holds information about an input operand after some sign changes
301    and type promotions have been peeled away.  */
302 class vect_unpromoted_value {
303 public:
304   vect_unpromoted_value ();
305 
306   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
307 
308   /* The value obtained after peeling away zero or more casts.  */
309   tree op;
310 
311   /* The type of OP.  */
312   tree type;
313 
314   /* The definition type of OP.  */
315   vect_def_type dt;
316 
317   /* If OP is the result of peeling at least one cast, and if the cast
318      of OP itself is a vectorizable statement, CASTER identifies that
319      statement, otherwise it is null.  */
320   stmt_vec_info caster;
321 };
322 
vect_unpromoted_value()323 inline vect_unpromoted_value::vect_unpromoted_value ()
324   : op (NULL_TREE),
325     type (NULL_TREE),
326     dt (vect_uninitialized_def),
327     caster (NULL)
328 {
329 }
330 
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332    statement that casts it to CASTER_IN.  */
333 
334 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 			       stmt_vec_info caster_in)
337 {
338   op = op_in;
339   type = TREE_TYPE (op);
340   dt = dt_in;
341   caster = caster_in;
342 }
343 
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345    to reach some vectorizable inner operand OP', continuing as long as it
346    is possible to convert OP' back to OP using a possible sign change
347    followed by a possible promotion P.  Return this OP', or null if OP is
348    not a vectorizable SSA name.  If there is a promotion P, describe its
349    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
350    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351    have more than one user.
352 
353    A successful return means that it is possible to go from OP' to OP
354    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
355    whereas the cast from UNPROM to OP might be a promotion, a sign
356    change, or a nop.
357 
358    E.g. say we have:
359 
360        signed short *ptr = ...;
361        signed short C = *ptr;
362        unsigned short B = (unsigned short) C;    // sign change
363        signed int A = (signed int) B;            // unsigned promotion
364        ...possible other uses of A...
365        unsigned int OP = (unsigned int) A;       // sign change
366 
367    In this case it's possible to go directly from C to OP using:
368 
369        OP = (unsigned int) (unsigned short) C;
370 	    +------------+ +--------------+
371 	       promotion      sign change
372 
373    so OP' would be C.  The input to the promotion is B, so UNPROM
374    would describe B.  */
375 
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 				      vect_unpromoted_value *unprom,
379 				      bool *single_use_p = NULL)
380 {
381   tree res = NULL_TREE;
382   tree op_type = TREE_TYPE (op);
383   unsigned int orig_precision = TYPE_PRECISION (op_type);
384   unsigned int min_precision = orig_precision;
385   stmt_vec_info caster = NULL;
386   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
387     {
388       /* See whether OP is simple enough to vectorize.  */
389       stmt_vec_info def_stmt_info;
390       gimple *def_stmt;
391       vect_def_type dt;
392       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 	break;
394 
395       /* If OP is the input of a demotion, skip over it to see whether
396 	 OP is itself the result of a promotion.  If so, the combined
397 	 effect of the promotion and the demotion might fit the required
398 	 pattern, otherwise neither operation fits.
399 
400 	 This copes with cases such as the result of an arithmetic
401 	 operation being truncated before being stored, and where that
402 	 arithmetic operation has been recognized as an over-widened one.  */
403       if (TYPE_PRECISION (op_type) <= min_precision)
404 	{
405 	  /* Use OP as the UNPROM described above if we haven't yet
406 	     found a promotion, or if using the new input preserves the
407 	     sign of the previous promotion.  */
408 	  if (!res
409 	      || TYPE_PRECISION (unprom->type) == orig_precision
410 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
411 	    {
412 	      unprom->set_op (op, dt, caster);
413 	      min_precision = TYPE_PRECISION (op_type);
414 	    }
415 	  /* Stop if we've already seen a promotion and if this
416 	     conversion does more than change the sign.  */
417 	  else if (TYPE_PRECISION (op_type)
418 		   != TYPE_PRECISION (unprom->type))
419 	    break;
420 
421 	  /* The sequence now extends to OP.  */
422 	  res = op;
423 	}
424 
425       /* See whether OP is defined by a cast.  Record it as CASTER if
426 	 the cast is potentially vectorizable.  */
427       if (!def_stmt)
428 	break;
429       caster = def_stmt_info;
430 
431       /* Ignore pattern statements, since we don't link uses for them.  */
432       if (caster
433 	  && single_use_p
434 	  && !STMT_VINFO_RELATED_STMT (caster)
435 	  && !has_single_use (res))
436 	*single_use_p = false;
437 
438       gassign *assign = dyn_cast <gassign *> (def_stmt);
439       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 	break;
441 
442       /* Continue with the input to the cast.  */
443       op = gimple_assign_rhs1 (def_stmt);
444       op_type = TREE_TYPE (op);
445     }
446   return res;
447 }
448 
449 /* OP is an integer operand to an operation that returns TYPE, and we
450    want to treat the operation as a widening one.  So far we can treat
451    it as widening from *COMMON_TYPE.
452 
453    Return true if OP is suitable for such a widening operation,
454    either widening from *COMMON_TYPE or from some supertype of it.
455    Update *COMMON_TYPE to the supertype in the latter case.
456 
457    SHIFT_P is true if OP is a shift amount.  */
458 
459 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 			    tree *common_type)
462 {
463   /* Calculate the minimum precision required by OP, without changing
464      the sign of either operand.  */
465   unsigned int precision;
466   if (shift_p)
467     {
468       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 	return false;
470       precision = TREE_INT_CST_LOW (op);
471     }
472   else
473     {
474       precision = wi::min_precision (wi::to_widest (op),
475 				     TYPE_SIGN (*common_type));
476       if (precision * 2 > TYPE_PRECISION (type))
477 	return false;
478     }
479 
480   /* If OP requires a wider type, switch to that type.  The checks
481      above ensure that this is still narrower than the result.  */
482   precision = vect_element_precision (precision);
483   if (TYPE_PRECISION (*common_type) < precision)
484     *common_type = build_nonstandard_integer_type
485       (precision, TYPE_UNSIGNED (*common_type));
486   return true;
487 }
488 
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
491 
492 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
494 {
495   if (types_compatible_p (*common_type, new_type))
496     return true;
497 
498   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
499   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501     return true;
502 
503   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
504   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
506     {
507       *common_type = new_type;
508       return true;
509     }
510 
511   /* We have mismatched signs, with the signed type being
512      no wider than the unsigned type.  In this case we need
513      a wider signed type.  */
514   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 				TYPE_PRECISION (new_type));
516   precision *= 2;
517   if (precision * 2 > TYPE_PRECISION (type))
518     return false;
519 
520   *common_type = build_nonstandard_integer_type (precision, false);
521   return true;
522 }
523 
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525    in which each node either performs CODE or WIDENED_CODE, and where
526    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
527    specifies the maximum number of leaf operands.  SHIFT_P says whether
528    CODE and WIDENED_CODE are some sort of shift.
529 
530    If STMT_INFO is such a tree, return the number of leaf operands
531    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
532    to a type that (a) is narrower than the result of STMT_INFO and
533    (b) can hold all leaf operand values.
534 
535    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536    exists.  */
537 
538 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 		      tree_code widened_code, bool shift_p,
541 		      unsigned int max_nops,
542 		      vect_unpromoted_value *unprom, tree *common_type)
543 {
544   /* Check for an integer operation with the right code.  */
545   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546   if (!assign)
547     return 0;
548 
549   tree_code rhs_code = gimple_assign_rhs_code (assign);
550   if (rhs_code != code && rhs_code != widened_code)
551     return 0;
552 
553   tree type = gimple_expr_type (assign);
554   if (!INTEGRAL_TYPE_P (type))
555     return 0;
556 
557   /* Assume that both operands will be leaf operands.  */
558   max_nops -= 2;
559 
560   /* Check the operands.  */
561   unsigned int next_op = 0;
562   for (unsigned int i = 0; i < 2; ++i)
563     {
564       vect_unpromoted_value *this_unprom = &unprom[next_op];
565       unsigned int nops = 1;
566       tree op = gimple_op (assign, i + 1);
567       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
568 	{
569 	  /* We already have a common type from earlier operands.
570 	     Update it to account for OP.  */
571 	  this_unprom->set_op (op, vect_constant_def);
572 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 	    return 0;
574 	}
575       else
576 	{
577 	  /* Only allow shifts by constants.  */
578 	  if (shift_p && i == 1)
579 	    return 0;
580 
581 	  if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 	    return 0;
583 
584 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 	    {
586 	      /* The operand isn't widened.  If STMT_INFO has the code
587 		 for an unwidened operation, recursively check whether
588 		 this operand is a node of the tree.  */
589 	      if (rhs_code != code
590 		  || max_nops == 0
591 		  || this_unprom->dt != vect_internal_def)
592 		return 0;
593 
594 	      /* Give back the leaf slot allocated above now that we're
595 		 not treating this as a leaf operand.  */
596 	      max_nops += 1;
597 
598 	      /* Recursively process the definition of the operand.  */
599 	      stmt_vec_info def_stmt_info
600 		= vinfo->lookup_def (this_unprom->op);
601 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 					   widened_code, shift_p, max_nops,
603 					   this_unprom, common_type);
604 	      if (nops == 0)
605 		return 0;
606 
607 	      max_nops -= nops;
608 	    }
609 	  else
610 	    {
611 	      /* Make sure that the operand is narrower than the result.  */
612 	      if (TYPE_PRECISION (this_unprom->type) * 2
613 		  > TYPE_PRECISION (type))
614 		return 0;
615 
616 	      /* Update COMMON_TYPE for the new operand.  */
617 	      if (i == 0)
618 		*common_type = this_unprom->type;
619 	      else if (!vect_joust_widened_type (type, this_unprom->type,
620 						 common_type))
621 		return 0;
622 	    }
623 	}
624       next_op += nops;
625     }
626   return next_op;
627 }
628 
629 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
630    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631 
632 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 {
635   return make_temp_ssa_name (type, stmt, "patt");
636 }
637 
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639    followed by a version of STMT2_INFO that takes NEW_RHS as its first
640    input.  Try to do this using pattern statements, returning true on
641    success.  */
642 
643 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 		      gimple *stmt1, tree vectype)
646 {
647   if (is_pattern_stmt_p (stmt2_info))
648     {
649       /* STMT2_INFO is part of a pattern.  Get the statement to which
650 	 the pattern is attached.  */
651       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
653 
654       if (dump_enabled_p ())
655 	dump_printf_loc (MSG_NOTE, vect_location,
656 			 "Splitting pattern statement: %G", stmt2_info->stmt);
657 
658       /* Since STMT2_INFO is a pattern statement, we can change it
659 	 in-situ without worrying about changing the code for the
660 	 containing block.  */
661       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
662 
663       if (dump_enabled_p ())
664 	{
665 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 			   stmt2_info->stmt);
668 	}
669 
670       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
673 	   to the end of the definition sequence.  */
674 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
675       else
676 	{
677 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
678 	     before it.  */
679 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
681 	}
682       return true;
683     }
684   else
685     {
686       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
687 	 two-statement pattern now.  */
688       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691       if (!lhs_vectype)
692 	return false;
693 
694       if (dump_enabled_p ())
695 	dump_printf_loc (MSG_NOTE, vect_location,
696 			 "Splitting statement: %G", stmt2_info->stmt);
697 
698       /* Add STMT1 as a singleton pattern definition sequence.  */
699       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701       gimple_seq_add_stmt_without_update (def_seq, stmt1);
702 
703       /* Build the second of the two pattern statements.  */
704       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
707 
708       if (dump_enabled_p ())
709 	{
710 	  dump_printf_loc (MSG_NOTE, vect_location,
711 			   "into pattern statements: %G", stmt1);
712 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
713 	}
714 
715       return true;
716     }
717 }
718 
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720    to STMT_INFO's pattern definition statements if no better way is
721    available.  VECTYPE is the vector form of TYPE.  */
722 
723 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 		    vect_unpromoted_value *unprom, tree vectype)
726 {
727   /* Check for a no-op conversion.  */
728   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729     return unprom->op;
730 
731   /* Allow the caller to create constant vect_unpromoted_values.  */
732   if (TREE_CODE (unprom->op) == INTEGER_CST)
733     return wide_int_to_tree (type, wi::to_widest (unprom->op));
734 
735   tree input = unprom->op;
736   if (unprom->caster)
737     {
738       tree lhs = gimple_get_lhs (unprom->caster->stmt);
739       tree lhs_type = TREE_TYPE (lhs);
740 
741       /* If the result of the existing cast is the right width, use it
742 	 instead of the source of the cast.  */
743       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 	input = lhs;
745       /* If the precision we want is between the source and result
746 	 precisions of the existing cast, try splitting the cast into
747 	 two and tapping into a mid-way point.  */
748       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
750 	{
751 	  /* In order to preserve the semantics of the original cast,
752 	     give the mid-way point the same signedness as the input value.
753 
754 	     It would be possible to use a signed type here instead if
755 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 	     make the sign of the midtype sensitive to the order in
757 	     which we process the statements, since the signedness of
758 	     TYPE is the signedness required by just one of possibly
759 	     many users.  Also, unsigned promotions are usually as cheap
760 	     as or cheaper than signed ones, so it's better to keep an
761 	     unsigned promotion.  */
762 	  tree midtype = build_nonstandard_integer_type
763 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 	  if (vec_midtype)
766 	    {
767 	      input = vect_recog_temp_ssa_var (midtype, NULL);
768 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 						       unprom->op);
770 	      if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 					 vec_midtype))
772 		append_pattern_def_seq (vinfo, stmt_info,
773 					new_stmt, vec_midtype);
774 	    }
775 	}
776 
777       /* See if we can reuse an existing result.  */
778       if (types_compatible_p (type, TREE_TYPE (input)))
779 	return input;
780     }
781 
782   /* We need a new conversion statement.  */
783   tree new_op = vect_recog_temp_ssa_var (type, NULL);
784   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
785 
786   /* If OP is an external value, see if we can insert the new statement
787      on an incoming edge.  */
788   if (input == unprom->op && unprom->dt == vect_external_def)
789     if (edge e = vect_get_external_def_edge (vinfo, input))
790       {
791 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 	gcc_assert (!new_bb);
793 	return new_op;
794       }
795 
796   /* As a (common) last resort, add the statement to the pattern itself.  */
797   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798   return new_op;
799 }
800 
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802    result in the corresponding elements of RESULT.  */
803 
804 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 		     tree *result, tree type, vect_unpromoted_value *unprom,
807 		     tree vectype)
808 {
809   for (unsigned int i = 0; i < n; ++i)
810     {
811       unsigned int j;
812       for (j = 0; j < i; ++j)
813 	if (unprom[j].op == unprom[i].op)
814 	  break;
815       if (j < i)
816 	result[i] = result[j];
817       else
818 	result[i] = vect_convert_input (vinfo, stmt_info,
819 					type, &unprom[i], vectype);
820     }
821 }
822 
823 /* The caller has created a (possibly empty) sequence of pattern definition
824    statements followed by a single statement PATTERN_STMT.  Cast the result
825    of this final statement to TYPE.  If a new statement is needed, add
826    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827    and return the new statement, otherwise return PATTERN_STMT as-is.
828    VECITYPE is the vector form of PATTERN_STMT's result type.  */
829 
830 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 		     gimple *pattern_stmt, tree vecitype)
833 {
834   tree lhs = gimple_get_lhs (pattern_stmt);
835   if (!types_compatible_p (type, TREE_TYPE (lhs)))
836     {
837       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
840     }
841   return pattern_stmt;
842 }
843 
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845    is allowed.  If STMT_INFO is part of a group, assume that it's part of
846    a reduction chain and optimistically assume that all statements
847    except the last allow reassociation.
848    Also require it to have code CODE and to be a reduction
849    in the outermost loop.  When returning true, store the operands in
850    *OP0_OUT and *OP1_OUT.  */
851 
852 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)853 vect_reassociating_reduction_p (vec_info *vinfo,
854 				stmt_vec_info stmt_info, tree_code code,
855 				tree *op0_out, tree *op1_out)
856 {
857   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858   if (!loop_info)
859     return false;
860 
861   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862   if (!assign || gimple_assign_rhs_code (assign) != code)
863     return false;
864 
865   /* We don't allow changing the order of the computation in the inner-loop
866      when doing outer-loop vectorization.  */
867   class loop *loop = LOOP_VINFO_LOOP (loop_info);
868   if (loop && nested_in_vect_loop_p (loop, stmt_info))
869     return false;
870 
871   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
872     {
873       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 				       code))
875 	return false;
876     }
877   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878     return false;
879 
880   *op0_out = gimple_assign_rhs1 (assign);
881   *op1_out = gimple_assign_rhs2 (assign);
882   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883     std::swap (*op0_out, *op1_out);
884   return true;
885 }
886 
887 /* Function vect_recog_dot_prod_pattern
888 
889    Try to find the following pattern:
890 
891      type x_t, y_t;
892      TYPE1 prod;
893      TYPE2 sum = init;
894    loop:
895      sum_0 = phi <init, sum_1>
896      S1  x_t = ...
897      S2  y_t = ...
898      S3  x_T = (TYPE1) x_t;
899      S4  y_T = (TYPE1) y_t;
900      S5  prod = x_T * y_T;
901      [S6  prod = (TYPE2) prod;  #optional]
902      S7  sum_1 = prod + sum_0;
903 
904    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905    same size of 'TYPE1' or bigger. This is a special case of a reduction
906    computation.
907 
908    Input:
909 
910    * STMT_VINFO: The stmt from which the pattern search begins.  In the
911    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912    will be detected.
913 
914    Output:
915 
916    * TYPE_OUT: The type of the output  of this pattern.
917 
918    * Return value: A new stmt that will be used to replace the sequence of
919    stmts that constitute the pattern. In this case it will be:
920         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
921 
922    Note: The dot-prod idiom is a widening reduction pattern that is
923          vectorized without preserving all the intermediate results. It
924          produces only N/2 (widened) results (by summing up pairs of
925          intermediate results) rather than all N results.  Therefore, we
926          cannot allow this pattern when we want to get all the results and in
927          the correct order (as is the case when this computation is in an
928          inner-loop nested in an outer-loop that us being vectorized).  */
929 
930 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 			     stmt_vec_info stmt_vinfo, tree *type_out)
933 {
934   tree oprnd0, oprnd1;
935   gimple *last_stmt = stmt_vinfo->stmt;
936   tree type, half_type;
937   gimple *pattern_stmt;
938   tree var;
939 
940   /* Look for the following pattern
941           DX = (TYPE1) X;
942           DY = (TYPE1) Y;
943           DPROD = DX * DY;
944           DDPROD = (TYPE2) DPROD;
945           sum_1 = DDPROD + sum_0;
946      In which
947      - DX is double the size of X
948      - DY is double the size of Y
949      - DX, DY, DPROD all have the same type
950      - sum is the same size of DPROD or bigger
951      - sum has been recognized as a reduction variable.
952 
953      This is equivalent to:
954        DPROD = X w* Y;          #widen mult
955        sum_1 = DPROD w+ sum_0;  #widen summation
956      or
957        DPROD = X w* Y;          #widen mult
958        sum_1 = DPROD + sum_0;   #summation
959    */
960 
961   /* Starting from LAST_STMT, follow the defs of its uses in search
962      of the above pattern.  */
963 
964   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 				       &oprnd0, &oprnd1))
966     return NULL;
967 
968   type = gimple_expr_type (last_stmt);
969 
970   vect_unpromoted_value unprom_mult;
971   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
972 
973   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
974      we know that oprnd1 is the reduction variable (defined by a loop-header
975      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
977   if (!oprnd0)
978     return NULL;
979 
980   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981   if (!mult_vinfo)
982     return NULL;
983 
984   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
985      inside the loop (in case we are analyzing an outer-loop).  */
986   vect_unpromoted_value unprom0[2];
987   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 			     false, 2, unprom0, &half_type))
989     return NULL;
990 
991   /* If there are two widening operations, make sure they agree on
992      the sign of the extension.  */
993   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995     return NULL;
996 
997   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
998 
999   tree half_vectype;
1000   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 					type_out, &half_vectype))
1002     return NULL;
1003 
1004   /* Get the inputs in the appropriate types.  */
1005   tree mult_oprnd[2];
1006   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 		       unprom0, half_vectype);
1008 
1009   var = vect_recog_temp_ssa_var (type, NULL);
1010   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
1012 
1013   return pattern_stmt;
1014 }
1015 
1016 
1017 /* Function vect_recog_sad_pattern
1018 
1019    Try to find the following Sum of Absolute Difference (SAD) pattern:
1020 
1021      type x_t, y_t;
1022      signed TYPE1 diff, abs_diff;
1023      TYPE2 sum = init;
1024    loop:
1025      sum_0 = phi <init, sum_1>
1026      S1  x_t = ...
1027      S2  y_t = ...
1028      S3  x_T = (TYPE1) x_t;
1029      S4  y_T = (TYPE1) y_t;
1030      S5  diff = x_T - y_T;
1031      S6  abs_diff = ABS_EXPR <diff>;
1032      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1033      S8  sum_1 = abs_diff + sum_0;
1034 
1035    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036    same size of 'TYPE1' or bigger. This is a special case of a reduction
1037    computation.
1038 
1039    Input:
1040 
1041    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1042    example, when this function is called with S8, the pattern
1043    {S3,S4,S5,S6,S7,S8} will be detected.
1044 
1045    Output:
1046 
1047    * TYPE_OUT: The type of the output of this pattern.
1048 
1049    * Return value: A new stmt that will be used to replace the sequence of
1050    stmts that constitute the pattern. In this case it will be:
1051         SAD_EXPR <x_t, y_t, sum_0>
1052   */
1053 
1054 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 			stmt_vec_info stmt_vinfo, tree *type_out)
1057 {
1058   gimple *last_stmt = stmt_vinfo->stmt;
1059   tree half_type;
1060 
1061   /* Look for the following pattern
1062           DX = (TYPE1) X;
1063           DY = (TYPE1) Y;
1064           DDIFF = DX - DY;
1065           DAD = ABS_EXPR <DDIFF>;
1066           DDPROD = (TYPE2) DPROD;
1067           sum_1 = DAD + sum_0;
1068      In which
1069      - DX is at least double the size of X
1070      - DY is at least double the size of Y
1071      - DX, DY, DDIFF, DAD all have the same type
1072      - sum is the same size of DAD or bigger
1073      - sum has been recognized as a reduction variable.
1074 
1075      This is equivalent to:
1076        DDIFF = X w- Y;          #widen sub
1077        DAD = ABS_EXPR <DDIFF>;
1078        sum_1 = DAD w+ sum_0;    #widen summation
1079      or
1080        DDIFF = X w- Y;          #widen sub
1081        DAD = ABS_EXPR <DDIFF>;
1082        sum_1 = DAD + sum_0;     #summation
1083    */
1084 
1085   /* Starting from LAST_STMT, follow the defs of its uses in search
1086      of the above pattern.  */
1087 
1088   tree plus_oprnd0, plus_oprnd1;
1089   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 				       &plus_oprnd0, &plus_oprnd1))
1091     return NULL;
1092 
1093   tree sum_type = gimple_expr_type (last_stmt);
1094 
1095   /* Any non-truncating sequence of conversions is OK here, since
1096      with a successful match, the result of the ABS(U) is known to fit
1097      within the nonnegative range of the result type.  (It cannot be the
1098      negative of the minimum signed value due to the range of the widening
1099      MINUS_EXPR.)  */
1100   vect_unpromoted_value unprom_abs;
1101   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 						      &unprom_abs);
1103 
1104   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1105      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107      Then check that plus_oprnd0 is defined by an abs_expr.  */
1108 
1109   if (!plus_oprnd0)
1110     return NULL;
1111 
1112   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113   if (!abs_stmt_vinfo)
1114     return NULL;
1115 
1116   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1117      inside the loop (in case we are analyzing an outer-loop).  */
1118   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119   if (!abs_stmt
1120       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122     return NULL;
1123 
1124   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125   tree abs_type = TREE_TYPE (abs_oprnd);
1126   if (TYPE_UNSIGNED (abs_type))
1127     return NULL;
1128 
1129   /* Peel off conversions from the ABS input.  This can involve sign
1130      changes (e.g. from an unsigned subtraction to a signed ABS input)
1131      or signed promotion, but it can't include unsigned promotion.
1132      (Note that ABS of an unsigned promotion should have been folded
1133      away before now anyway.)  */
1134   vect_unpromoted_value unprom_diff;
1135   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 						    &unprom_diff);
1137   if (!abs_oprnd)
1138     return NULL;
1139   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140       && TYPE_UNSIGNED (unprom_diff.type))
1141     return NULL;
1142 
1143   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1144   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145   if (!diff_stmt_vinfo)
1146     return NULL;
1147 
1148   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1149      inside the loop (in case we are analyzing an outer-loop).  */
1150   vect_unpromoted_value unprom[2];
1151   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1152 			     false, 2, unprom, &half_type))
1153     return NULL;
1154 
1155   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1156 
1157   tree half_vectype;
1158   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 					type_out, &half_vectype))
1160     return NULL;
1161 
1162   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1163   tree sad_oprnd[2];
1164   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 		       unprom, half_vectype);
1166 
1167   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 					      sad_oprnd[1], plus_oprnd1);
1170 
1171   return pattern_stmt;
1172 }
1173 
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175    so that it can be treated as though it had the form:
1176 
1177       A_TYPE a;
1178       B_TYPE b;
1179       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1180       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1181     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1182     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1183     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1184 
1185    Try to replace the pattern with:
1186 
1187       A_TYPE a;
1188       B_TYPE b;
1189       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1190       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1191     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1193 
1194    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1195 
1196    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1197    name of the pattern being matched, for dump purposes.  */
1198 
1199 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 			     stmt_vec_info last_stmt_info, tree *type_out,
1202 			     tree_code orig_code, tree_code wide_code,
1203 			     bool shift_p, const char *name)
1204 {
1205   gimple *last_stmt = last_stmt_info->stmt;
1206 
1207   vect_unpromoted_value unprom[2];
1208   tree half_type;
1209   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 			     shift_p, 2, unprom, &half_type))
1211     return NULL;
1212 
1213   /* Pattern detected.  */
1214   vect_pattern_detected (name, last_stmt);
1215 
1216   tree type = gimple_expr_type (last_stmt);
1217   tree itype = type;
1218   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 					    TYPE_UNSIGNED (half_type));
1222 
1223   /* Check target support  */
1224   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226   tree ctype = itype;
1227   tree vecctype = vecitype;
1228   if (orig_code == MINUS_EXPR
1229       && TYPE_UNSIGNED (itype)
1230       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1231     {
1232       /* Subtraction is special, even if half_type is unsigned and no matter
1233 	 whether type is signed or unsigned, if type is wider than itype,
1234 	 we need to sign-extend from the widening operation result to the
1235 	 result type.
1236 	 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1237 	 itype unsigned short and type either int or unsigned int.
1238 	 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1239 	 (unsigned short) 0xffff, but for type int we want the result -1
1240 	 and for type unsigned int 0xffffffff rather than 0xffff.  */
1241       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1242       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1243     }
1244 
1245   enum tree_code dummy_code;
1246   int dummy_int;
1247   auto_vec<tree> dummy_vec;
1248   if (!vectype
1249       || !vecitype
1250       || !vecctype
1251       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1252 					  vecitype, vectype,
1253 					  &dummy_code, &dummy_code,
1254 					  &dummy_int, &dummy_vec))
1255     return NULL;
1256 
1257   *type_out = get_vectype_for_scalar_type (vinfo, type);
1258   if (!*type_out)
1259     return NULL;
1260 
1261   tree oprnd[2];
1262   vect_convert_inputs (vinfo, last_stmt_info,
1263 		       2, oprnd, half_type, unprom, vectype);
1264 
1265   tree var = vect_recog_temp_ssa_var (itype, NULL);
1266   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1267 					      oprnd[0], oprnd[1]);
1268 
1269   if (vecctype != vecitype)
1270     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1271 					pattern_stmt, vecitype);
1272 
1273   return vect_convert_output (vinfo, last_stmt_info,
1274 			      type, pattern_stmt, vecctype);
1275 }
1276 
1277 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1278    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1279 
1280 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1281 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1282 			       tree *type_out)
1283 {
1284   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1285 				      MULT_EXPR, WIDEN_MULT_EXPR, false,
1286 				      "vect_recog_widen_mult_pattern");
1287 }
1288 
1289 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1290    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1291 
1292 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1293 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1294 			       tree *type_out)
1295 {
1296   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1297 				      PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1298 				      "vect_recog_widen_plus_pattern");
1299 }
1300 
1301 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1302    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1303 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1304 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1305 			       tree *type_out)
1306 {
1307   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1308 				      MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1309 				      "vect_recog_widen_minus_pattern");
1310 }
1311 
1312 /* Function vect_recog_pow_pattern
1313 
1314    Try to find the following pattern:
1315 
1316      x = POW (y, N);
1317 
1318    with POW being one of pow, powf, powi, powif and N being
1319    either 2 or 0.5.
1320 
1321    Input:
1322 
1323    * STMT_VINFO: The stmt from which the pattern search begins.
1324 
1325    Output:
1326 
1327    * TYPE_OUT: The type of the output of this pattern.
1328 
1329    * Return value: A new stmt that will be used to replace the sequence of
1330    stmts that constitute the pattern. In this case it will be:
1331         x = x * x
1332    or
1333 	x = sqrt (x)
1334 */
1335 
1336 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1337 vect_recog_pow_pattern (vec_info *vinfo,
1338 			stmt_vec_info stmt_vinfo, tree *type_out)
1339 {
1340   gimple *last_stmt = stmt_vinfo->stmt;
1341   tree base, exp;
1342   gimple *stmt;
1343   tree var;
1344 
1345   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1346     return NULL;
1347 
1348   switch (gimple_call_combined_fn (last_stmt))
1349     {
1350     CASE_CFN_POW:
1351     CASE_CFN_POWI:
1352       break;
1353 
1354     default:
1355       return NULL;
1356     }
1357 
1358   base = gimple_call_arg (last_stmt, 0);
1359   exp = gimple_call_arg (last_stmt, 1);
1360   if (TREE_CODE (exp) != REAL_CST
1361       && TREE_CODE (exp) != INTEGER_CST)
1362     {
1363       if (flag_unsafe_math_optimizations
1364 	  && TREE_CODE (base) == REAL_CST
1365 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1366 	{
1367 	  combined_fn log_cfn;
1368 	  built_in_function exp_bfn;
1369 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1370 	    {
1371 	    case BUILT_IN_POW:
1372 	      log_cfn = CFN_BUILT_IN_LOG;
1373 	      exp_bfn = BUILT_IN_EXP;
1374 	      break;
1375 	    case BUILT_IN_POWF:
1376 	      log_cfn = CFN_BUILT_IN_LOGF;
1377 	      exp_bfn = BUILT_IN_EXPF;
1378 	      break;
1379 	    case BUILT_IN_POWL:
1380 	      log_cfn = CFN_BUILT_IN_LOGL;
1381 	      exp_bfn = BUILT_IN_EXPL;
1382 	      break;
1383 	    default:
1384 	      return NULL;
1385 	    }
1386 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1387 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1388 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1389 	     does that, but if C is a power of 2, we want to use
1390 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1391 	     vectorization we don't have vectorized exp2.  */
1392 	  if (logc
1393 	      && TREE_CODE (logc) == REAL_CST
1394 	      && exp_decl
1395 	      && lookup_attribute ("omp declare simd",
1396 				   DECL_ATTRIBUTES (exp_decl)))
1397 	    {
1398 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1399 	      if (node->simd_clones == NULL)
1400 		{
1401 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1402 		      || node->definition)
1403 		    return NULL;
1404 		  expand_simd_clones (node);
1405 		  if (node->simd_clones == NULL)
1406 		    return NULL;
1407 		}
1408 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1409 	      if (!*type_out)
1410 		return NULL;
1411 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1412 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1413 	      append_pattern_def_seq (vinfo, stmt_vinfo, g);
1414 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1415 	      g = gimple_build_call (exp_decl, 1, def);
1416 	      gimple_call_set_lhs (g, res);
1417 	      return g;
1418 	    }
1419 	}
1420 
1421       return NULL;
1422     }
1423 
1424   /* We now have a pow or powi builtin function call with a constant
1425      exponent.  */
1426 
1427   /* Catch squaring.  */
1428   if ((tree_fits_shwi_p (exp)
1429        && tree_to_shwi (exp) == 2)
1430       || (TREE_CODE (exp) == REAL_CST
1431           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1432     {
1433       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1434 					    TREE_TYPE (base), type_out))
1435 	return NULL;
1436 
1437       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1438       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1439       return stmt;
1440     }
1441 
1442   /* Catch square root.  */
1443   if (TREE_CODE (exp) == REAL_CST
1444       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1445     {
1446       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1447       if (*type_out
1448 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1449 					     OPTIMIZE_FOR_SPEED))
1450 	{
1451 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1452 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1453 	  gimple_call_set_lhs (stmt, var);
1454 	  gimple_call_set_nothrow (stmt, true);
1455 	  return stmt;
1456 	}
1457     }
1458 
1459   return NULL;
1460 }
1461 
1462 
1463 /* Function vect_recog_widen_sum_pattern
1464 
1465    Try to find the following pattern:
1466 
1467      type x_t;
1468      TYPE x_T, sum = init;
1469    loop:
1470      sum_0 = phi <init, sum_1>
1471      S1  x_t = *p;
1472      S2  x_T = (TYPE) x_t;
1473      S3  sum_1 = x_T + sum_0;
1474 
1475    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1476    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1477    a special case of a reduction computation.
1478 
1479    Input:
1480 
1481    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1482    when this function is called with S3, the pattern {S2,S3} will be detected.
1483 
1484    Output:
1485 
1486    * TYPE_OUT: The type of the output of this pattern.
1487 
1488    * Return value: A new stmt that will be used to replace the sequence of
1489    stmts that constitute the pattern. In this case it will be:
1490         WIDEN_SUM <x_t, sum_0>
1491 
1492    Note: The widening-sum idiom is a widening reduction pattern that is
1493 	 vectorized without preserving all the intermediate results. It
1494          produces only N/2 (widened) results (by summing up pairs of
1495 	 intermediate results) rather than all N results.  Therefore, we
1496 	 cannot allow this pattern when we want to get all the results and in
1497 	 the correct order (as is the case when this computation is in an
1498 	 inner-loop nested in an outer-loop that us being vectorized).  */
1499 
1500 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1501 vect_recog_widen_sum_pattern (vec_info *vinfo,
1502 			      stmt_vec_info stmt_vinfo, tree *type_out)
1503 {
1504   gimple *last_stmt = stmt_vinfo->stmt;
1505   tree oprnd0, oprnd1;
1506   tree type;
1507   gimple *pattern_stmt;
1508   tree var;
1509 
1510   /* Look for the following pattern
1511           DX = (TYPE) X;
1512           sum_1 = DX + sum_0;
1513      In which DX is at least double the size of X, and sum_1 has been
1514      recognized as a reduction variable.
1515    */
1516 
1517   /* Starting from LAST_STMT, follow the defs of its uses in search
1518      of the above pattern.  */
1519 
1520   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1521 				       &oprnd0, &oprnd1))
1522     return NULL;
1523 
1524   type = gimple_expr_type (last_stmt);
1525 
1526   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1527      we know that oprnd1 is the reduction variable (defined by a loop-header
1528      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1529      Left to check that oprnd0 is defined by a cast from type 'type' to type
1530      'TYPE'.  */
1531 
1532   vect_unpromoted_value unprom0;
1533   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1534       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1535     return NULL;
1536 
1537   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1538 
1539   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1540 					unprom0.type, type_out))
1541     return NULL;
1542 
1543   var = vect_recog_temp_ssa_var (type, NULL);
1544   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1545 
1546   return pattern_stmt;
1547 }
1548 
1549 /* Recognize cases in which an operation is performed in one type WTYPE
1550    but could be done more efficiently in a narrower type NTYPE.  For example,
1551    if we have:
1552 
1553      ATYPE a;  // narrower than NTYPE
1554      BTYPE b;  // narrower than NTYPE
1555      WTYPE aw = (WTYPE) a;
1556      WTYPE bw = (WTYPE) b;
1557      WTYPE res = aw + bw;  // only uses of aw and bw
1558 
1559    then it would be more efficient to do:
1560 
1561      NTYPE an = (NTYPE) a;
1562      NTYPE bn = (NTYPE) b;
1563      NTYPE resn = an + bn;
1564      WTYPE res = (WTYPE) resn;
1565 
1566    Other situations include things like:
1567 
1568      ATYPE a;  // NTYPE or narrower
1569      WTYPE aw = (WTYPE) a;
1570      WTYPE res = aw + b;
1571 
1572    when only "(NTYPE) res" is significant.  In that case it's more efficient
1573    to truncate "b" and do the operation on NTYPE instead:
1574 
1575      NTYPE an = (NTYPE) a;
1576      NTYPE bn = (NTYPE) b;  // truncation
1577      NTYPE resn = an + bn;
1578      WTYPE res = (WTYPE) resn;
1579 
1580    All users of "res" should then use "resn" instead, making the final
1581    statement dead (not marked as relevant).  The final statement is still
1582    needed to maintain the type correctness of the IR.
1583 
1584    vect_determine_precisions has already determined the minimum
1585    precison of the operation and the minimum precision required
1586    by users of the result.  */
1587 
1588 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1589 vect_recog_over_widening_pattern (vec_info *vinfo,
1590 				  stmt_vec_info last_stmt_info, tree *type_out)
1591 {
1592   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1593   if (!last_stmt)
1594     return NULL;
1595 
1596   /* See whether we have found that this operation can be done on a
1597      narrower type without changing its semantics.  */
1598   unsigned int new_precision = last_stmt_info->operation_precision;
1599   if (!new_precision)
1600     return NULL;
1601 
1602   tree lhs = gimple_assign_lhs (last_stmt);
1603   tree type = TREE_TYPE (lhs);
1604   tree_code code = gimple_assign_rhs_code (last_stmt);
1605 
1606   /* Punt for reductions where we don't handle the type conversions.  */
1607   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1608     return NULL;
1609 
1610   /* Keep the first operand of a COND_EXPR as-is: only the other two
1611      operands are interesting.  */
1612   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1613 
1614   /* Check the operands.  */
1615   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1616   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1617   unprom.quick_grow (nops);
1618   unsigned int min_precision = 0;
1619   bool single_use_p = false;
1620   for (unsigned int i = 0; i < nops; ++i)
1621     {
1622       tree op = gimple_op (last_stmt, first_op + i);
1623       if (TREE_CODE (op) == INTEGER_CST)
1624 	unprom[i].set_op (op, vect_constant_def);
1625       else if (TREE_CODE (op) == SSA_NAME)
1626 	{
1627 	  bool op_single_use_p = true;
1628 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1629 						     &op_single_use_p))
1630 	    return NULL;
1631 	  /* If:
1632 
1633 	     (1) N bits of the result are needed;
1634 	     (2) all inputs are widened from M<N bits; and
1635 	     (3) one operand OP is a single-use SSA name
1636 
1637 	     we can shift the M->N widening from OP to the output
1638 	     without changing the number or type of extensions involved.
1639 	     This then reduces the number of copies of STMT_INFO.
1640 
1641 	     If instead of (3) more than one operand is a single-use SSA name,
1642 	     shifting the extension to the output is even more of a win.
1643 
1644 	     If instead:
1645 
1646 	     (1) N bits of the result are needed;
1647 	     (2) one operand OP2 is widened from M2<N bits;
1648 	     (3) another operand OP1 is widened from M1<M2 bits; and
1649 	     (4) both OP1 and OP2 are single-use
1650 
1651 	     the choice is between:
1652 
1653 	     (a) truncating OP2 to M1, doing the operation on M1,
1654 		 and then widening the result to N
1655 
1656 	     (b) widening OP1 to M2, doing the operation on M2, and then
1657 		 widening the result to N
1658 
1659 	     Both shift the M2->N widening of the inputs to the output.
1660 	     (a) additionally shifts the M1->M2 widening to the output;
1661 	     it requires fewer copies of STMT_INFO but requires an extra
1662 	     M2->M1 truncation.
1663 
1664 	     Which is better will depend on the complexity and cost of
1665 	     STMT_INFO, which is hard to predict at this stage.  However,
1666 	     a clear tie-breaker in favor of (b) is the fact that the
1667 	     truncation in (a) increases the length of the operation chain.
1668 
1669 	     If instead of (4) only one of OP1 or OP2 is single-use,
1670 	     (b) is still a win over doing the operation in N bits:
1671 	     it still shifts the M2->N widening on the single-use operand
1672 	     to the output and reduces the number of STMT_INFO copies.
1673 
1674 	     If neither operand is single-use then operating on fewer than
1675 	     N bits might lead to more extensions overall.  Whether it does
1676 	     or not depends on global information about the vectorization
1677 	     region, and whether that's a good trade-off would again
1678 	     depend on the complexity and cost of the statements involved,
1679 	     as well as things like register pressure that are not normally
1680 	     modelled at this stage.  We therefore ignore these cases
1681 	     and just optimize the clear single-use wins above.
1682 
1683 	     Thus we take the maximum precision of the unpromoted operands
1684 	     and record whether any operand is single-use.  */
1685 	  if (unprom[i].dt == vect_internal_def)
1686 	    {
1687 	      min_precision = MAX (min_precision,
1688 				   TYPE_PRECISION (unprom[i].type));
1689 	      single_use_p |= op_single_use_p;
1690 	    }
1691 	}
1692       else
1693 	return NULL;
1694     }
1695 
1696   /* Although the operation could be done in operation_precision, we have
1697      to balance that against introducing extra truncations or extensions.
1698      Calculate the minimum precision that can be handled efficiently.
1699 
1700      The loop above determined that the operation could be handled
1701      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1702      extension from the inputs to the output without introducing more
1703      instructions, and would reduce the number of instructions required
1704      for STMT_INFO itself.
1705 
1706      vect_determine_precisions has also determined that the result only
1707      needs min_output_precision bits.  Truncating by a factor of N times
1708      requires a tree of N - 1 instructions, so if TYPE is N times wider
1709      than min_output_precision, doing the operation in TYPE and truncating
1710      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1711      In contrast:
1712 
1713      - truncating the input to a unary operation and doing the operation
1714        in the new type requires at most N - 1 + 1 = N instructions per
1715        output vector
1716 
1717      - doing the same for a binary operation requires at most
1718        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1719 
1720      Both unary and binary operations require fewer instructions than
1721      this if the operands were extended from a suitable truncated form.
1722      Thus there is usually nothing to lose by doing operations in
1723      min_output_precision bits, but there can be something to gain.  */
1724   if (!single_use_p)
1725     min_precision = last_stmt_info->min_output_precision;
1726   else
1727     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1728 
1729   /* Apply the minimum efficient precision we just calculated.  */
1730   if (new_precision < min_precision)
1731     new_precision = min_precision;
1732   new_precision = vect_element_precision (new_precision);
1733   if (new_precision >= TYPE_PRECISION (type))
1734     return NULL;
1735 
1736   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1737 
1738   *type_out = get_vectype_for_scalar_type (vinfo, type);
1739   if (!*type_out)
1740     return NULL;
1741 
1742   /* We've found a viable pattern.  Get the new type of the operation.  */
1743   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1744   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1745 
1746   /* If we're truncating an operation, we need to make sure that we
1747      don't introduce new undefined overflow.  The codes tested here are
1748      a subset of those accepted by vect_truncatable_operation_p.  */
1749   tree op_type = new_type;
1750   if (TYPE_OVERFLOW_UNDEFINED (new_type)
1751       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1752     op_type = build_nonstandard_integer_type (new_precision, true);
1753 
1754   /* We specifically don't check here whether the target supports the
1755      new operation, since it might be something that a later pattern
1756      wants to rewrite anyway.  If targets have a minimum element size
1757      for some optabs, we should pattern-match smaller ops to larger ops
1758      where beneficial.  */
1759   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1760   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1761   if (!new_vectype || !op_vectype)
1762     return NULL;
1763 
1764   if (dump_enabled_p ())
1765     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1766 		     type, new_type);
1767 
1768   /* Calculate the rhs operands for an operation on OP_TYPE.  */
1769   tree ops[3] = {};
1770   for (unsigned int i = 1; i < first_op; ++i)
1771     ops[i - 1] = gimple_op (last_stmt, i);
1772   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1773 		       op_type, &unprom[0], op_vectype);
1774 
1775   /* Use the operation to produce a result of type OP_TYPE.  */
1776   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1777   gimple *pattern_stmt = gimple_build_assign (new_var, code,
1778 					      ops[0], ops[1], ops[2]);
1779   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1780 
1781   if (dump_enabled_p ())
1782     dump_printf_loc (MSG_NOTE, vect_location,
1783 		     "created pattern stmt: %G", pattern_stmt);
1784 
1785   /* Convert back to the original signedness, if OP_TYPE is different
1786      from NEW_TYPE.  */
1787   if (op_type != new_type)
1788     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1789 					pattern_stmt, op_vectype);
1790 
1791   /* Promote the result to the original type.  */
1792   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1793 				      pattern_stmt, new_vectype);
1794 
1795   return pattern_stmt;
1796 }
1797 
1798 /* Recognize the following patterns:
1799 
1800      ATYPE a;  // narrower than TYPE
1801      BTYPE b;  // narrower than TYPE
1802 
1803    1) Multiply high with scaling
1804      TYPE res = ((TYPE) a * (TYPE) b) >> c;
1805    2) ... or also with rounding
1806      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1807 
1808    where only the bottom half of res is used.  */
1809 
1810 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1811 vect_recog_mulhs_pattern (vec_info *vinfo,
1812 			  stmt_vec_info last_stmt_info, tree *type_out)
1813 {
1814   /* Check for a right shift.  */
1815   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1816   if (!last_stmt
1817       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1818     return NULL;
1819 
1820   /* Check that the shift result is wider than the users of the
1821      result need (i.e. that narrowing would be a natural choice).  */
1822   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1823   unsigned int target_precision
1824     = vect_element_precision (last_stmt_info->min_output_precision);
1825   if (!INTEGRAL_TYPE_P (lhs_type)
1826       || target_precision >= TYPE_PRECISION (lhs_type))
1827     return NULL;
1828 
1829   /* Look through any change in sign on the outer shift input.  */
1830   vect_unpromoted_value unprom_rshift_input;
1831   tree rshift_input = vect_look_through_possible_promotion
1832     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1833   if (!rshift_input
1834       || TYPE_PRECISION (TREE_TYPE (rshift_input))
1835 	   != TYPE_PRECISION (lhs_type))
1836     return NULL;
1837 
1838   /* Get the definition of the shift input.  */
1839   stmt_vec_info rshift_input_stmt_info
1840     = vect_get_internal_def (vinfo, rshift_input);
1841   if (!rshift_input_stmt_info)
1842     return NULL;
1843   gassign *rshift_input_stmt
1844     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1845   if (!rshift_input_stmt)
1846     return NULL;
1847 
1848   stmt_vec_info mulh_stmt_info;
1849   tree scale_term;
1850   internal_fn ifn;
1851   unsigned int expect_offset;
1852 
1853   /* Check for the presence of the rounding term.  */
1854   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1855     {
1856       /* Check that the outer shift was by 1.  */
1857       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1858 	return NULL;
1859 
1860       /* Check that the second operand of the PLUS_EXPR is 1.  */
1861       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1862 	return NULL;
1863 
1864       /* Look through any change in sign on the addition input.  */
1865       vect_unpromoted_value unprom_plus_input;
1866       tree plus_input = vect_look_through_possible_promotion
1867 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1868       if (!plus_input
1869 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
1870 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
1871 	return NULL;
1872 
1873       /* Get the definition of the multiply-high-scale part.  */
1874       stmt_vec_info plus_input_stmt_info
1875 	= vect_get_internal_def (vinfo, plus_input);
1876       if (!plus_input_stmt_info)
1877 	return NULL;
1878       gassign *plus_input_stmt
1879 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1880       if (!plus_input_stmt
1881 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1882 	return NULL;
1883 
1884       /* Look through any change in sign on the scaling input.  */
1885       vect_unpromoted_value unprom_scale_input;
1886       tree scale_input = vect_look_through_possible_promotion
1887 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1888       if (!scale_input
1889 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
1890 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
1891 	return NULL;
1892 
1893       /* Get the definition of the multiply-high part.  */
1894       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1895       if (!mulh_stmt_info)
1896 	return NULL;
1897 
1898       /* Get the scaling term.  */
1899       scale_term = gimple_assign_rhs2 (plus_input_stmt);
1900 
1901       expect_offset = target_precision + 2;
1902       ifn = IFN_MULHRS;
1903     }
1904   else
1905     {
1906       mulh_stmt_info = rshift_input_stmt_info;
1907       scale_term = gimple_assign_rhs2 (last_stmt);
1908 
1909       expect_offset = target_precision + 1;
1910       ifn = IFN_MULHS;
1911     }
1912 
1913   /* Check that the scaling factor is correct.  */
1914   if (TREE_CODE (scale_term) != INTEGER_CST
1915       || wi::to_widest (scale_term) + expect_offset
1916 	   != TYPE_PRECISION (lhs_type))
1917     return NULL;
1918 
1919   /* Check whether the scaling input term can be seen as two widened
1920      inputs multiplied together.  */
1921   vect_unpromoted_value unprom_mult[2];
1922   tree new_type;
1923   unsigned int nops
1924     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1925 			    false, 2, unprom_mult, &new_type);
1926   if (nops != 2)
1927     return NULL;
1928 
1929   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1930 
1931   /* Adjust output precision.  */
1932   if (TYPE_PRECISION (new_type) < target_precision)
1933     new_type = build_nonstandard_integer_type
1934       (target_precision, TYPE_UNSIGNED (new_type));
1935 
1936   /* Check for target support.  */
1937   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1938   if (!new_vectype
1939       || !direct_internal_fn_supported_p
1940 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1941     return NULL;
1942 
1943   /* The IR requires a valid vector type for the cast result, even though
1944      it's likely to be discarded.  */
1945   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1946   if (!*type_out)
1947     return NULL;
1948 
1949   /* Generate the IFN_MULHRS call.  */
1950   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1951   tree new_ops[2];
1952   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1953 		       unprom_mult, new_vectype);
1954   gcall *mulhrs_stmt
1955     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1956   gimple_call_set_lhs (mulhrs_stmt, new_var);
1957   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1958 
1959   if (dump_enabled_p ())
1960     dump_printf_loc (MSG_NOTE, vect_location,
1961 		     "created pattern stmt: %G", mulhrs_stmt);
1962 
1963   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1964 			      mulhrs_stmt, new_vectype);
1965 }
1966 
1967 /* Recognize the patterns:
1968 
1969 	    ATYPE a;  // narrower than TYPE
1970 	    BTYPE b;  // narrower than TYPE
1971 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1972      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1973 
1974    where only the bottom half of avg is used.  Try to transform them into:
1975 
1976 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1977      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1978 
1979   followed by:
1980 
1981 	    TYPE avg = (TYPE) avg';
1982 
1983   where NTYPE is no wider than half of TYPE.  Since only the bottom half
1984   of avg is used, all or part of the cast of avg' should become redundant.
1985 
1986   If there is no target support available, generate code to distribute rshift
1987   over plus and add a carry.  */
1988 
1989 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1990 vect_recog_average_pattern (vec_info *vinfo,
1991 			    stmt_vec_info last_stmt_info, tree *type_out)
1992 {
1993   /* Check for a shift right by one bit.  */
1994   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1995   if (!last_stmt
1996       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1997       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1998     return NULL;
1999 
2000   /* Check that the shift result is wider than the users of the
2001      result need (i.e. that narrowing would be a natural choice).  */
2002   tree lhs = gimple_assign_lhs (last_stmt);
2003   tree type = TREE_TYPE (lhs);
2004   unsigned int target_precision
2005     = vect_element_precision (last_stmt_info->min_output_precision);
2006   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2007     return NULL;
2008 
2009   /* Look through any change in sign on the shift input.  */
2010   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2011   vect_unpromoted_value unprom_plus;
2012   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2013 						     &unprom_plus);
2014   if (!rshift_rhs
2015       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2016     return NULL;
2017 
2018   /* Get the definition of the shift input.  */
2019   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2020   if (!plus_stmt_info)
2021     return NULL;
2022 
2023   /* Check whether the shift input can be seen as a tree of additions on
2024      2 or 3 widened inputs.
2025 
2026      Note that the pattern should be a win even if the result of one or
2027      more additions is reused elsewhere: if the pattern matches, we'd be
2028      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
2029   internal_fn ifn = IFN_AVG_FLOOR;
2030   vect_unpromoted_value unprom[3];
2031   tree new_type;
2032   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2033 					    WIDEN_PLUS_EXPR, false, 3,
2034 					    unprom, &new_type);
2035   if (nops == 0)
2036     return NULL;
2037   if (nops == 3)
2038     {
2039       /* Check that one operand is 1.  */
2040       unsigned int i;
2041       for (i = 0; i < 3; ++i)
2042 	if (integer_onep (unprom[i].op))
2043 	  break;
2044       if (i == 3)
2045 	return NULL;
2046       /* Throw away the 1 operand and keep the other two.  */
2047       if (i < 2)
2048 	unprom[i] = unprom[2];
2049       ifn = IFN_AVG_CEIL;
2050     }
2051 
2052   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2053 
2054   /* We know that:
2055 
2056      (a) the operation can be viewed as:
2057 
2058 	   TYPE widened0 = (TYPE) UNPROM[0];
2059 	   TYPE widened1 = (TYPE) UNPROM[1];
2060 	   TYPE tmp1 = widened0 + widened1 {+ 1};
2061 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2062 
2063      (b) the first two statements are equivalent to:
2064 
2065 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2066 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2067 
2068      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2069 	 where sensible;
2070 
2071      (d) all the operations can be performed correctly at twice the width of
2072 	 NEW_TYPE, due to the nature of the average operation; and
2073 
2074      (e) users of the result of the right shift need only TARGET_PRECISION
2075 	 bits, where TARGET_PRECISION is no more than half of TYPE's
2076 	 precision.
2077 
2078      Under these circumstances, the only situation in which NEW_TYPE
2079      could be narrower than TARGET_PRECISION is if widened0, widened1
2080      and an addition result are all used more than once.  Thus we can
2081      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2082      as "free", whereas widening the result of the average instruction
2083      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2084      therefore better not to go narrower than TARGET_PRECISION.  */
2085   if (TYPE_PRECISION (new_type) < target_precision)
2086     new_type = build_nonstandard_integer_type (target_precision,
2087 					       TYPE_UNSIGNED (new_type));
2088 
2089   /* Check for target support.  */
2090   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2091   if (!new_vectype)
2092     return NULL;
2093 
2094   bool fallback_p = false;
2095 
2096   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2097     ;
2098   else if (TYPE_UNSIGNED (new_type)
2099 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2100 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2101 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2102 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2103     fallback_p = true;
2104   else
2105     return NULL;
2106 
2107   /* The IR requires a valid vector type for the cast result, even though
2108      it's likely to be discarded.  */
2109   *type_out = get_vectype_for_scalar_type (vinfo, type);
2110   if (!*type_out)
2111     return NULL;
2112 
2113   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2114   tree new_ops[2];
2115   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2116 		       unprom, new_vectype);
2117 
2118   if (fallback_p)
2119     {
2120       /* As a fallback, generate code for following sequence:
2121 
2122 	 shifted_op0 = new_ops[0] >> 1;
2123 	 shifted_op1 = new_ops[1] >> 1;
2124 	 sum_of_shifted = shifted_op0 + shifted_op1;
2125 	 unmasked_carry = new_ops[0] and/or new_ops[1];
2126 	 carry = unmasked_carry & 1;
2127 	 new_var = sum_of_shifted + carry;
2128       */
2129 
2130       tree one_cst = build_one_cst (new_type);
2131       gassign *g;
2132 
2133       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2134       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2135       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2136 
2137       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2138       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2139       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2140 
2141       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2142       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2143 			       shifted_op0, shifted_op1);
2144       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2145 
2146       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2147       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2148       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2149       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2150 
2151       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2152       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2153       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2154 
2155       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2156       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2157     }
2158 
2159   /* Generate the IFN_AVG* call.  */
2160   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2161 						    new_ops[1]);
2162   gimple_call_set_lhs (average_stmt, new_var);
2163   gimple_set_location (average_stmt, gimple_location (last_stmt));
2164 
2165   if (dump_enabled_p ())
2166     dump_printf_loc (MSG_NOTE, vect_location,
2167 		     "created pattern stmt: %G", average_stmt);
2168 
2169   return vect_convert_output (vinfo, last_stmt_info,
2170 			      type, average_stmt, new_vectype);
2171 }
2172 
2173 /* Recognize cases in which the input to a cast is wider than its
2174    output, and the input is fed by a widening operation.  Fold this
2175    by removing the unnecessary intermediate widening.  E.g.:
2176 
2177      unsigned char a;
2178      unsigned int b = (unsigned int) a;
2179      unsigned short c = (unsigned short) b;
2180 
2181    -->
2182 
2183      unsigned short c = (unsigned short) a;
2184 
2185    Although this is rare in input IR, it is an expected side-effect
2186    of the over-widening pattern above.
2187 
2188    This is beneficial also for integer-to-float conversions, if the
2189    widened integer has more bits than the float, and if the unwidened
2190    input doesn't.  */
2191 
2192 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2193 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2194 				  stmt_vec_info last_stmt_info, tree *type_out)
2195 {
2196   /* Check for a cast, including an integer-to-float conversion.  */
2197   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2198   if (!last_stmt)
2199     return NULL;
2200   tree_code code = gimple_assign_rhs_code (last_stmt);
2201   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2202     return NULL;
2203 
2204   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2205   tree lhs = gimple_assign_lhs (last_stmt);
2206   if (!lhs)
2207     return NULL;
2208   tree lhs_type = TREE_TYPE (lhs);
2209   scalar_mode lhs_mode;
2210   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2211       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2212     return NULL;
2213 
2214   /* Check for a narrowing operation (from a vector point of view).  */
2215   tree rhs = gimple_assign_rhs1 (last_stmt);
2216   tree rhs_type = TREE_TYPE (rhs);
2217   if (!INTEGRAL_TYPE_P (rhs_type)
2218       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2219       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2220     return NULL;
2221 
2222   /* Try to find an unpromoted input.  */
2223   vect_unpromoted_value unprom;
2224   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2225       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2226     return NULL;
2227 
2228   /* If the bits above RHS_TYPE matter, make sure that they're the
2229      same when extending from UNPROM as they are when extending from RHS.  */
2230   if (!INTEGRAL_TYPE_P (lhs_type)
2231       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2232     return NULL;
2233 
2234   /* We can get the same result by casting UNPROM directly, to avoid
2235      the unnecessary widening and narrowing.  */
2236   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2237 
2238   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2239   if (!*type_out)
2240     return NULL;
2241 
2242   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2243   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2244   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2245 
2246   return pattern_stmt;
2247 }
2248 
2249 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2250    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2251 
2252 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2253 vect_recog_widen_shift_pattern (vec_info *vinfo,
2254 				stmt_vec_info last_stmt_info, tree *type_out)
2255 {
2256   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2257 				      LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2258 				      "vect_recog_widen_shift_pattern");
2259 }
2260 
2261 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2262 
2263    type a_t, b_t, c_t;
2264 
2265    S0 a_t = b_t r<< c_t;
2266 
2267   Input/Output:
2268 
2269   * STMT_VINFO: The stmt from which the pattern search begins,
2270     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2271     with a sequence:
2272 
2273    S1 d_t = -c_t;
2274    S2 e_t = d_t & (B - 1);
2275    S3 f_t = b_t << c_t;
2276    S4 g_t = b_t >> e_t;
2277    S0 a_t = f_t | g_t;
2278 
2279     where B is element bitsize of type.
2280 
2281   Output:
2282 
2283   * TYPE_OUT: The type of the output of this pattern.
2284 
2285   * Return value: A new stmt that will be used to replace the rotate
2286     S0 stmt.  */
2287 
2288 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2289 vect_recog_rotate_pattern (vec_info *vinfo,
2290 			   stmt_vec_info stmt_vinfo, tree *type_out)
2291 {
2292   gimple *last_stmt = stmt_vinfo->stmt;
2293   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2294   gimple *pattern_stmt, *def_stmt;
2295   enum tree_code rhs_code;
2296   enum vect_def_type dt;
2297   optab optab1, optab2;
2298   edge ext_def = NULL;
2299   bool bswap16_p = false;
2300 
2301   if (is_gimple_assign (last_stmt))
2302     {
2303       rhs_code = gimple_assign_rhs_code (last_stmt);
2304       switch (rhs_code)
2305 	{
2306 	case LROTATE_EXPR:
2307 	case RROTATE_EXPR:
2308 	  break;
2309 	default:
2310 	  return NULL;
2311 	}
2312 
2313       lhs = gimple_assign_lhs (last_stmt);
2314       oprnd0 = gimple_assign_rhs1 (last_stmt);
2315       type = TREE_TYPE (oprnd0);
2316       oprnd1 = gimple_assign_rhs2 (last_stmt);
2317     }
2318   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2319     {
2320       /* __builtin_bswap16 (x) is another form of x r>> 8.
2321 	 The vectorizer has bswap support, but only if the argument isn't
2322 	 promoted.  */
2323       lhs = gimple_call_lhs (last_stmt);
2324       oprnd0 = gimple_call_arg (last_stmt, 0);
2325       type = TREE_TYPE (oprnd0);
2326       if (!lhs
2327 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2328 	  || TYPE_PRECISION (type) <= 16
2329 	  || TREE_CODE (oprnd0) != SSA_NAME
2330 	  || BITS_PER_UNIT != 8
2331 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2332 	return NULL;
2333 
2334       stmt_vec_info def_stmt_info;
2335       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2336 	return NULL;
2337 
2338       if (dt != vect_internal_def)
2339 	return NULL;
2340 
2341       if (gimple_assign_cast_p (def_stmt))
2342 	{
2343 	  def = gimple_assign_rhs1 (def_stmt);
2344 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2345 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2346 	    oprnd0 = def;
2347 	}
2348 
2349       type = TREE_TYPE (lhs);
2350       vectype = get_vectype_for_scalar_type (vinfo, type);
2351       if (vectype == NULL_TREE)
2352 	return NULL;
2353 
2354       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2355 	{
2356 	  /* The encoding uses one stepped pattern for each byte in the
2357 	     16-bit word.  */
2358 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2359 	  for (unsigned i = 0; i < 3; ++i)
2360 	    for (unsigned j = 0; j < 2; ++j)
2361 	      elts.quick_push ((i + 1) * 2 - j - 1);
2362 
2363 	  vec_perm_indices indices (elts, 1,
2364 				    TYPE_VECTOR_SUBPARTS (char_vectype));
2365 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2366 	    {
2367 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
2368 		 undo the argument promotion.  */
2369 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2370 		{
2371 		  def = vect_recog_temp_ssa_var (type, NULL);
2372 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2373 		  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2374 		  oprnd0 = def;
2375 		}
2376 
2377 	      /* Pattern detected.  */
2378 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2379 
2380 	      *type_out = vectype;
2381 
2382 	      /* Pattern supported.  Create a stmt to be used to replace the
2383 		 pattern, with the unpromoted argument.  */
2384 	      var = vect_recog_temp_ssa_var (type, NULL);
2385 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2386 						1, oprnd0);
2387 	      gimple_call_set_lhs (pattern_stmt, var);
2388 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2389 				      gimple_call_fntype (last_stmt));
2390 	      return pattern_stmt;
2391 	    }
2392 	}
2393 
2394       oprnd1 = build_int_cst (integer_type_node, 8);
2395       rhs_code = LROTATE_EXPR;
2396       bswap16_p = true;
2397     }
2398   else
2399     return NULL;
2400 
2401   if (TREE_CODE (oprnd0) != SSA_NAME
2402       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2403       || !INTEGRAL_TYPE_P (type)
2404       || !TYPE_UNSIGNED (type))
2405     return NULL;
2406 
2407   stmt_vec_info def_stmt_info;
2408   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2409     return NULL;
2410 
2411   if (dt != vect_internal_def
2412       && dt != vect_constant_def
2413       && dt != vect_external_def)
2414     return NULL;
2415 
2416   vectype = get_vectype_for_scalar_type (vinfo, type);
2417   if (vectype == NULL_TREE)
2418     return NULL;
2419 
2420   /* If vector/vector or vector/scalar rotate is supported by the target,
2421      don't do anything here.  */
2422   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2423   if (optab1
2424       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2425     {
2426      use_rotate:
2427       if (bswap16_p)
2428 	{
2429 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2430 	    {
2431 	      def = vect_recog_temp_ssa_var (type, NULL);
2432 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2433 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2434 	      oprnd0 = def;
2435 	    }
2436 
2437 	  /* Pattern detected.  */
2438 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2439 
2440 	  *type_out = vectype;
2441 
2442 	  /* Pattern supported.  Create a stmt to be used to replace the
2443 	     pattern.  */
2444 	  var = vect_recog_temp_ssa_var (type, NULL);
2445 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2446 					      oprnd1);
2447 	  return pattern_stmt;
2448 	}
2449       return NULL;
2450     }
2451 
2452   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2453     {
2454       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2455       if (optab2
2456 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2457 	goto use_rotate;
2458     }
2459 
2460   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2461      don't do anything here either.  */
2462   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2463   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2464   if (!optab1
2465       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2466       || !optab2
2467       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2468     {
2469       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2470 	return NULL;
2471       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2472       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2473       if (!optab1
2474 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2475 	  || !optab2
2476 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2477 	return NULL;
2478     }
2479 
2480   *type_out = vectype;
2481 
2482   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2483     {
2484       def = vect_recog_temp_ssa_var (type, NULL);
2485       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2486       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2487       oprnd0 = def;
2488     }
2489 
2490   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2491     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2492 
2493   def = NULL_TREE;
2494   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2495   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2496     def = oprnd1;
2497   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2498     {
2499       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2500       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2501 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2502 	     == TYPE_PRECISION (type))
2503 	def = rhs1;
2504     }
2505 
2506   if (def == NULL_TREE)
2507     {
2508       def = vect_recog_temp_ssa_var (type, NULL);
2509       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2510       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2511     }
2512   stype = TREE_TYPE (def);
2513 
2514   if (TREE_CODE (def) == INTEGER_CST)
2515     {
2516       if (!tree_fits_uhwi_p (def)
2517 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2518 	  || integer_zerop (def))
2519 	return NULL;
2520       def2 = build_int_cst (stype,
2521 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2522     }
2523   else
2524     {
2525       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2526 
2527       if (vecstype == NULL_TREE)
2528 	return NULL;
2529       def2 = vect_recog_temp_ssa_var (stype, NULL);
2530       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2531       if (ext_def)
2532 	{
2533 	  basic_block new_bb
2534 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2535 	  gcc_assert (!new_bb);
2536 	}
2537       else
2538 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2539 
2540       def2 = vect_recog_temp_ssa_var (stype, NULL);
2541       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2542       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2543 				      gimple_assign_lhs (def_stmt), mask);
2544       if (ext_def)
2545 	{
2546 	  basic_block new_bb
2547 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2548 	  gcc_assert (!new_bb);
2549 	}
2550       else
2551 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2552     }
2553 
2554   var1 = vect_recog_temp_ssa_var (type, NULL);
2555   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2556 					? LSHIFT_EXPR : RSHIFT_EXPR,
2557 				  oprnd0, def);
2558   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2559 
2560   var2 = vect_recog_temp_ssa_var (type, NULL);
2561   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2562 					? RSHIFT_EXPR : LSHIFT_EXPR,
2563 				  oprnd0, def2);
2564   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2565 
2566   /* Pattern detected.  */
2567   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2568 
2569   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2570   var = vect_recog_temp_ssa_var (type, NULL);
2571   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2572 
2573   return pattern_stmt;
2574 }
2575 
2576 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2577    vectorized:
2578 
2579    type a_t;
2580    TYPE b_T, res_T;
2581 
2582    S1 a_t = ;
2583    S2 b_T = ;
2584    S3 res_T = b_T op a_t;
2585 
2586   where type 'TYPE' is a type with different size than 'type',
2587   and op is <<, >> or rotate.
2588 
2589   Also detect cases:
2590 
2591    type a_t;
2592    TYPE b_T, c_T, res_T;
2593 
2594    S0 c_T = ;
2595    S1 a_t = (type) c_T;
2596    S2 b_T = ;
2597    S3 res_T = b_T op a_t;
2598 
2599   Input/Output:
2600 
2601   * STMT_VINFO: The stmt from which the pattern search begins,
2602     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2603     with a shift/rotate which has same type on both operands, in the
2604     second case just b_T op c_T, in the first case with added cast
2605     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2606 
2607   Output:
2608 
2609   * TYPE_OUT: The type of the output of this pattern.
2610 
2611   * Return value: A new stmt that will be used to replace the shift/rotate
2612     S3 stmt.  */
2613 
2614 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2615 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2616 					stmt_vec_info stmt_vinfo,
2617 					tree *type_out)
2618 {
2619   gimple *last_stmt = stmt_vinfo->stmt;
2620   tree oprnd0, oprnd1, lhs, var;
2621   gimple *pattern_stmt;
2622   enum tree_code rhs_code;
2623 
2624   if (!is_gimple_assign (last_stmt))
2625     return NULL;
2626 
2627   rhs_code = gimple_assign_rhs_code (last_stmt);
2628   switch (rhs_code)
2629     {
2630     case LSHIFT_EXPR:
2631     case RSHIFT_EXPR:
2632     case LROTATE_EXPR:
2633     case RROTATE_EXPR:
2634       break;
2635     default:
2636       return NULL;
2637     }
2638 
2639   lhs = gimple_assign_lhs (last_stmt);
2640   oprnd0 = gimple_assign_rhs1 (last_stmt);
2641   oprnd1 = gimple_assign_rhs2 (last_stmt);
2642   if (TREE_CODE (oprnd0) != SSA_NAME
2643       || TREE_CODE (oprnd1) != SSA_NAME
2644       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2645       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2646       || TYPE_PRECISION (TREE_TYPE (lhs))
2647 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2648     return NULL;
2649 
2650   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2651   if (!def_vinfo)
2652     return NULL;
2653 
2654   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2655   if (*type_out == NULL_TREE)
2656     return NULL;
2657 
2658   tree def = NULL_TREE;
2659   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2660   if (def_stmt && gimple_assign_cast_p (def_stmt))
2661     {
2662       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2663       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2664 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2665 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2666 	{
2667 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2668 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2669 	    def = rhs1;
2670 	  else
2671 	    {
2672 	      tree mask
2673 		= build_low_bits_mask (TREE_TYPE (rhs1),
2674 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2675 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2676 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2677 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
2678 							   TREE_TYPE (rhs1));
2679 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2680 	    }
2681 	}
2682     }
2683 
2684   if (def == NULL_TREE)
2685     {
2686       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2687       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2688       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2689     }
2690 
2691   /* Pattern detected.  */
2692   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2693 
2694   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2695   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2696   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2697 
2698   return pattern_stmt;
2699 }
2700 
2701 /* Return true iff the target has a vector optab implementing the operation
2702    CODE on type VECTYPE.  */
2703 
2704 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2705 target_has_vecop_for_code (tree_code code, tree vectype)
2706 {
2707   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2708   return voptab
2709 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2710 }
2711 
2712 /* Verify that the target has optabs of VECTYPE to perform all the steps
2713    needed by the multiplication-by-immediate synthesis algorithm described by
2714    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2715    present.  Return true iff the target supports all the steps.  */
2716 
2717 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2718 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2719 				 tree vectype, bool synth_shift_p)
2720 {
2721   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2722     return false;
2723 
2724   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2725   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2726 
2727   if (var == negate_variant
2728       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2729     return false;
2730 
2731   /* If we must synthesize shifts with additions make sure that vector
2732      addition is available.  */
2733   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2734     return false;
2735 
2736   for (int i = 1; i < alg->ops; i++)
2737     {
2738       switch (alg->op[i])
2739 	{
2740 	case alg_shift:
2741 	  break;
2742 	case alg_add_t_m2:
2743 	case alg_add_t2_m:
2744 	case alg_add_factor:
2745 	  if (!supports_vplus)
2746 	    return false;
2747 	  break;
2748 	case alg_sub_t_m2:
2749 	case alg_sub_t2_m:
2750 	case alg_sub_factor:
2751 	  if (!supports_vminus)
2752 	    return false;
2753 	  break;
2754 	case alg_unknown:
2755 	case alg_m:
2756 	case alg_zero:
2757 	case alg_impossible:
2758 	  return false;
2759 	default:
2760 	  gcc_unreachable ();
2761 	}
2762     }
2763 
2764   return true;
2765 }
2766 
2767 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2768    putting the final result in DEST.  Append all statements but the last into
2769    VINFO.  Return the last statement.  */
2770 
2771 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)2772 synth_lshift_by_additions (vec_info *vinfo,
2773 			   tree dest, tree op, HOST_WIDE_INT amnt,
2774 			   stmt_vec_info stmt_info)
2775 {
2776   HOST_WIDE_INT i;
2777   tree itype = TREE_TYPE (op);
2778   tree prev_res = op;
2779   gcc_assert (amnt >= 0);
2780   for (i = 0; i < amnt; i++)
2781     {
2782       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2783 		      : dest;
2784       gimple *stmt
2785         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2786       prev_res = tmp_var;
2787       if (i < amnt - 1)
2788 	append_pattern_def_seq (vinfo, stmt_info, stmt);
2789       else
2790 	return stmt;
2791     }
2792   gcc_unreachable ();
2793   return NULL;
2794 }
2795 
2796 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2797    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2798    the process if necessary.  Append the resulting assignment statements
2799    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2800    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2801    left shifts using additions.  */
2802 
2803 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2804 apply_binop_and_append_stmt (vec_info *vinfo,
2805 			     tree_code code, tree op1, tree op2,
2806 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2807 {
2808   if (integer_zerop (op2)
2809       && (code == LSHIFT_EXPR
2810 	  || code == PLUS_EXPR))
2811     {
2812       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2813       return op1;
2814     }
2815 
2816   gimple *stmt;
2817   tree itype = TREE_TYPE (op1);
2818   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2819 
2820   if (code == LSHIFT_EXPR
2821       && synth_shift_p)
2822     {
2823       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2824 					TREE_INT_CST_LOW (op2), stmt_vinfo);
2825       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2826       return tmp_var;
2827     }
2828 
2829   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2830   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2831   return tmp_var;
2832 }
2833 
2834 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2835    and simple arithmetic operations to be vectorized.  Record the statements
2836    produced in STMT_VINFO and return the last statement in the sequence or
2837    NULL if it's not possible to synthesize such a multiplication.
2838    This function mirrors the behavior of expand_mult_const in expmed.c but
2839    works on tree-ssa form.  */
2840 
2841 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)2842 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2843 			     stmt_vec_info stmt_vinfo)
2844 {
2845   tree itype = TREE_TYPE (op);
2846   machine_mode mode = TYPE_MODE (itype);
2847   struct algorithm alg;
2848   mult_variant variant;
2849   if (!tree_fits_shwi_p (val))
2850     return NULL;
2851 
2852   /* Multiplication synthesis by shifts, adds and subs can introduce
2853      signed overflow where the original operation didn't.  Perform the
2854      operations on an unsigned type and cast back to avoid this.
2855      In the future we may want to relax this for synthesis algorithms
2856      that we can prove do not cause unexpected overflow.  */
2857   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2858 
2859   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2860 
2861   /* Targets that don't support vector shifts but support vector additions
2862      can synthesize shifts that way.  */
2863   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2864 
2865   HOST_WIDE_INT hwval = tree_to_shwi (val);
2866   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2867      The vectorizer's benefit analysis will decide whether it's beneficial
2868      to do this.  */
2869   bool possible = choose_mult_variant (mode, hwval, &alg,
2870 					&variant, MAX_COST);
2871   if (!possible)
2872     return NULL;
2873 
2874   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2875 
2876   if (!vectype
2877       || !target_supports_mult_synth_alg (&alg, variant,
2878 					   vectype, synth_shift_p))
2879     return NULL;
2880 
2881   tree accumulator;
2882 
2883   /* Clear out the sequence of statements so we can populate it below.  */
2884   gimple *stmt = NULL;
2885 
2886   if (cast_to_unsigned_p)
2887     {
2888       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2889       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2890       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2891       op = tmp_op;
2892     }
2893 
2894   if (alg.op[0] == alg_zero)
2895     accumulator = build_int_cst (multtype, 0);
2896   else
2897     accumulator = op;
2898 
2899   bool needs_fixup = (variant == negate_variant)
2900 		      || (variant == add_variant);
2901 
2902   for (int i = 1; i < alg.ops; i++)
2903     {
2904       tree shft_log = build_int_cst (multtype, alg.log[i]);
2905       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2906       tree tmp_var = NULL_TREE;
2907 
2908       switch (alg.op[i])
2909 	{
2910 	case alg_shift:
2911 	  if (synth_shift_p)
2912 	    stmt
2913 	      = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2914 					   alg.log[i], stmt_vinfo);
2915 	  else
2916 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2917 					 shft_log);
2918 	  break;
2919 	case alg_add_t_m2:
2920 	  tmp_var
2921 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2922 					   stmt_vinfo, synth_shift_p);
2923 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2924 				       tmp_var);
2925 	  break;
2926 	case alg_sub_t_m2:
2927 	  tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2928 						 shft_log, stmt_vinfo,
2929 						 synth_shift_p);
2930 	  /* In some algorithms the first step involves zeroing the
2931 	     accumulator.  If subtracting from such an accumulator
2932 	     just emit the negation directly.  */
2933 	  if (integer_zerop (accumulator))
2934 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2935 	  else
2936 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2937 					tmp_var);
2938 	  break;
2939 	case alg_add_t2_m:
2940 	  tmp_var
2941 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2942 					   shft_log, stmt_vinfo, synth_shift_p);
2943 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2944 	  break;
2945 	case alg_sub_t2_m:
2946 	  tmp_var
2947 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2948 					   shft_log, stmt_vinfo, synth_shift_p);
2949 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2950 	  break;
2951 	case alg_add_factor:
2952 	  tmp_var
2953 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2954 					   shft_log, stmt_vinfo, synth_shift_p);
2955 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2956 				       tmp_var);
2957 	  break;
2958 	case alg_sub_factor:
2959 	  tmp_var
2960 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2961 					   shft_log, stmt_vinfo, synth_shift_p);
2962 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2963 				      accumulator);
2964 	  break;
2965 	default:
2966 	  gcc_unreachable ();
2967 	}
2968       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2969 	 but rather return it directly.  */
2970 
2971       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2972 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2973       accumulator = accum_tmp;
2974     }
2975   if (variant == negate_variant)
2976     {
2977       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2978       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2979       accumulator = accum_tmp;
2980       if (cast_to_unsigned_p)
2981 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2982     }
2983   else if (variant == add_variant)
2984     {
2985       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2986       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2987       accumulator = accum_tmp;
2988       if (cast_to_unsigned_p)
2989 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2990     }
2991   /* Move back to a signed if needed.  */
2992   if (cast_to_unsigned_p)
2993     {
2994       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2995       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2996     }
2997 
2998   return stmt;
2999 }
3000 
3001 /* Detect multiplication by constant and convert it into a sequence of
3002    shifts and additions, subtractions, negations.  We reuse the
3003    choose_mult_variant algorithms from expmed.c
3004 
3005    Input/Output:
3006 
3007    STMT_VINFO: The stmt from which the pattern search begins,
3008    i.e. the mult stmt.
3009 
3010  Output:
3011 
3012   * TYPE_OUT: The type of the output of this pattern.
3013 
3014   * Return value: A new stmt that will be used to replace
3015     the multiplication.  */
3016 
3017 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3018 vect_recog_mult_pattern (vec_info *vinfo,
3019 			 stmt_vec_info stmt_vinfo, tree *type_out)
3020 {
3021   gimple *last_stmt = stmt_vinfo->stmt;
3022   tree oprnd0, oprnd1, vectype, itype;
3023   gimple *pattern_stmt;
3024 
3025   if (!is_gimple_assign (last_stmt))
3026     return NULL;
3027 
3028   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3029     return NULL;
3030 
3031   oprnd0 = gimple_assign_rhs1 (last_stmt);
3032   oprnd1 = gimple_assign_rhs2 (last_stmt);
3033   itype = TREE_TYPE (oprnd0);
3034 
3035   if (TREE_CODE (oprnd0) != SSA_NAME
3036       || TREE_CODE (oprnd1) != INTEGER_CST
3037       || !INTEGRAL_TYPE_P (itype)
3038       || !type_has_mode_precision_p (itype))
3039     return NULL;
3040 
3041   vectype = get_vectype_for_scalar_type (vinfo, itype);
3042   if (vectype == NULL_TREE)
3043     return NULL;
3044 
3045   /* If the target can handle vectorized multiplication natively,
3046      don't attempt to optimize this.  */
3047   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3048   if (mul_optab != unknown_optab)
3049     {
3050       machine_mode vec_mode = TYPE_MODE (vectype);
3051       int icode = (int) optab_handler (mul_optab, vec_mode);
3052       if (icode != CODE_FOR_nothing)
3053        return NULL;
3054     }
3055 
3056   pattern_stmt = vect_synth_mult_by_constant (vinfo,
3057 					      oprnd0, oprnd1, stmt_vinfo);
3058   if (!pattern_stmt)
3059     return NULL;
3060 
3061   /* Pattern detected.  */
3062   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3063 
3064   *type_out = vectype;
3065 
3066   return pattern_stmt;
3067 }
3068 
3069 /* Detect a signed division by a constant that wouldn't be
3070    otherwise vectorized:
3071 
3072    type a_t, b_t;
3073 
3074    S1 a_t = b_t / N;
3075 
3076   where type 'type' is an integral type and N is a constant.
3077 
3078   Similarly handle modulo by a constant:
3079 
3080    S4 a_t = b_t % N;
3081 
3082   Input/Output:
3083 
3084   * STMT_VINFO: The stmt from which the pattern search begins,
3085     i.e. the division stmt.  S1 is replaced by if N is a power
3086     of two constant and type is signed:
3087   S3  y_t = b_t < 0 ? N - 1 : 0;
3088   S2  x_t = b_t + y_t;
3089   S1' a_t = x_t >> log2 (N);
3090 
3091     S4 is replaced if N is a power of two constant and
3092     type is signed by (where *_T temporaries have unsigned type):
3093   S9  y_T = b_t < 0 ? -1U : 0U;
3094   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3095   S7  z_t = (type) z_T;
3096   S6  w_t = b_t + z_t;
3097   S5  x_t = w_t & (N - 1);
3098   S4' a_t = x_t - z_t;
3099 
3100   Output:
3101 
3102   * TYPE_OUT: The type of the output of this pattern.
3103 
3104   * Return value: A new stmt that will be used to replace the division
3105     S1 or modulo S4 stmt.  */
3106 
3107 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3108 vect_recog_divmod_pattern (vec_info *vinfo,
3109 			   stmt_vec_info stmt_vinfo, tree *type_out)
3110 {
3111   gimple *last_stmt = stmt_vinfo->stmt;
3112   tree oprnd0, oprnd1, vectype, itype, cond;
3113   gimple *pattern_stmt, *def_stmt;
3114   enum tree_code rhs_code;
3115   optab optab;
3116   tree q;
3117   int dummy_int, prec;
3118 
3119   if (!is_gimple_assign (last_stmt))
3120     return NULL;
3121 
3122   rhs_code = gimple_assign_rhs_code (last_stmt);
3123   switch (rhs_code)
3124     {
3125     case TRUNC_DIV_EXPR:
3126     case EXACT_DIV_EXPR:
3127     case TRUNC_MOD_EXPR:
3128       break;
3129     default:
3130       return NULL;
3131     }
3132 
3133   oprnd0 = gimple_assign_rhs1 (last_stmt);
3134   oprnd1 = gimple_assign_rhs2 (last_stmt);
3135   itype = TREE_TYPE (oprnd0);
3136   if (TREE_CODE (oprnd0) != SSA_NAME
3137       || TREE_CODE (oprnd1) != INTEGER_CST
3138       || TREE_CODE (itype) != INTEGER_TYPE
3139       || !type_has_mode_precision_p (itype))
3140     return NULL;
3141 
3142   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3143   vectype = get_vectype_for_scalar_type (vinfo, itype);
3144   if (vectype == NULL_TREE)
3145     return NULL;
3146 
3147   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3148     {
3149       /* If the target can handle vectorized division or modulo natively,
3150 	 don't attempt to optimize this, since native division is likely
3151 	 to give smaller code.  */
3152       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3153       if (optab != unknown_optab)
3154 	{
3155 	  machine_mode vec_mode = TYPE_MODE (vectype);
3156 	  int icode = (int) optab_handler (optab, vec_mode);
3157 	  if (icode != CODE_FOR_nothing)
3158 	    return NULL;
3159 	}
3160     }
3161 
3162   prec = TYPE_PRECISION (itype);
3163   if (integer_pow2p (oprnd1))
3164     {
3165       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3166 	return NULL;
3167 
3168       /* Pattern detected.  */
3169       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3170 
3171       *type_out = vectype;
3172 
3173       /* Check if the target supports this internal function.  */
3174       internal_fn ifn = IFN_DIV_POW2;
3175       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3176 	{
3177 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3178 
3179 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3180 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3181 	  gimple_call_set_lhs (div_stmt, var_div);
3182 
3183 	  if (rhs_code == TRUNC_MOD_EXPR)
3184 	    {
3185 	      append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3186 	      def_stmt
3187 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3188 				       LSHIFT_EXPR, var_div, shift);
3189 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3190 	      pattern_stmt
3191 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3192 				       MINUS_EXPR, oprnd0,
3193 				       gimple_assign_lhs (def_stmt));
3194 	    }
3195 	  else
3196 	    pattern_stmt = div_stmt;
3197 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3198 
3199 	  return pattern_stmt;
3200 	}
3201 
3202       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3203 		     build_int_cst (itype, 0));
3204       if (rhs_code == TRUNC_DIV_EXPR
3205 	  || rhs_code == EXACT_DIV_EXPR)
3206 	{
3207 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
3208 	  tree shift;
3209 	  def_stmt
3210 	    = gimple_build_assign (var, COND_EXPR, cond,
3211 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3212 						build_int_cst (itype, 1)),
3213 				   build_int_cst (itype, 0));
3214 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3215 	  var = vect_recog_temp_ssa_var (itype, NULL);
3216 	  def_stmt
3217 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3218 				   gimple_assign_lhs (def_stmt));
3219 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3220 
3221 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
3222 	  pattern_stmt
3223 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3224 				   RSHIFT_EXPR, var, shift);
3225 	}
3226       else
3227 	{
3228 	  tree signmask;
3229 	  if (compare_tree_int (oprnd1, 2) == 0)
3230 	    {
3231 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3232 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3233 					      build_int_cst (itype, 1),
3234 					      build_int_cst (itype, 0));
3235 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3236 	    }
3237 	  else
3238 	    {
3239 	      tree utype
3240 		= build_nonstandard_integer_type (prec, 1);
3241 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3242 	      tree shift
3243 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3244 					- tree_log2 (oprnd1));
3245 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
3246 
3247 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3248 					      build_int_cst (utype, -1),
3249 					      build_int_cst (utype, 0));
3250 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3251 	      var = vect_recog_temp_ssa_var (utype, NULL);
3252 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3253 					      gimple_assign_lhs (def_stmt),
3254 					      shift);
3255 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3256 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3257 	      def_stmt
3258 		= gimple_build_assign (signmask, NOP_EXPR, var);
3259 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3260 	    }
3261 	  def_stmt
3262 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3263 				   PLUS_EXPR, oprnd0, signmask);
3264 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3265 	  def_stmt
3266 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3267 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3268 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3269 						build_int_cst (itype, 1)));
3270 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3271 
3272 	  pattern_stmt
3273 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3274 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
3275 				   signmask);
3276 	}
3277 
3278       return pattern_stmt;
3279     }
3280 
3281   if (prec > HOST_BITS_PER_WIDE_INT
3282       || integer_zerop (oprnd1))
3283     return NULL;
3284 
3285   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3286     return NULL;
3287 
3288   if (TYPE_UNSIGNED (itype))
3289     {
3290       unsigned HOST_WIDE_INT mh, ml;
3291       int pre_shift, post_shift;
3292       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3293 				  & GET_MODE_MASK (itype_mode));
3294       tree t1, t2, t3, t4;
3295 
3296       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3297 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3298 	return NULL;
3299 
3300       /* Find a suitable multiplier and right shift count
3301 	 instead of multiplying with D.  */
3302       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3303 
3304       /* If the suggested multiplier is more than SIZE bits, we can do better
3305 	 for even divisors, using an initial right shift.  */
3306       if (mh != 0 && (d & 1) == 0)
3307 	{
3308 	  pre_shift = ctz_or_zero (d);
3309 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3310 				  &ml, &post_shift, &dummy_int);
3311 	  gcc_assert (!mh);
3312 	}
3313       else
3314 	pre_shift = 0;
3315 
3316       if (mh != 0)
3317 	{
3318 	  if (post_shift - 1 >= prec)
3319 	    return NULL;
3320 
3321 	  /* t1 = oprnd0 h* ml;
3322 	     t2 = oprnd0 - t1;
3323 	     t3 = t2 >> 1;
3324 	     t4 = t1 + t3;
3325 	     q = t4 >> (post_shift - 1);  */
3326 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
3327 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3328 					  build_int_cst (itype, ml));
3329 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3330 
3331 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3332 	  def_stmt
3333 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3334 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3335 
3336 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3337 	  def_stmt
3338 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3339 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3340 
3341 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3342 	  def_stmt
3343 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3344 
3345 	  if (post_shift != 1)
3346 	    {
3347 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3348 
3349 	      q = vect_recog_temp_ssa_var (itype, NULL);
3350 	      pattern_stmt
3351 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
3352 				       build_int_cst (itype, post_shift - 1));
3353 	    }
3354 	  else
3355 	    {
3356 	      q = t4;
3357 	      pattern_stmt = def_stmt;
3358 	    }
3359 	}
3360       else
3361 	{
3362 	  if (pre_shift >= prec || post_shift >= prec)
3363 	    return NULL;
3364 
3365 	  /* t1 = oprnd0 >> pre_shift;
3366 	     t2 = t1 h* ml;
3367 	     q = t2 >> post_shift;  */
3368 	  if (pre_shift)
3369 	    {
3370 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
3371 	      def_stmt
3372 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3373 				       build_int_cst (NULL, pre_shift));
3374 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3375 	    }
3376 	  else
3377 	    t1 = oprnd0;
3378 
3379 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3380 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3381 					  build_int_cst (itype, ml));
3382 
3383 	  if (post_shift)
3384 	    {
3385 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3386 
3387 	      q = vect_recog_temp_ssa_var (itype, NULL);
3388 	      def_stmt
3389 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
3390 				       build_int_cst (itype, post_shift));
3391 	    }
3392 	  else
3393 	    q = t2;
3394 
3395 	  pattern_stmt = def_stmt;
3396 	}
3397     }
3398   else
3399     {
3400       unsigned HOST_WIDE_INT ml;
3401       int post_shift;
3402       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3403       unsigned HOST_WIDE_INT abs_d;
3404       bool add = false;
3405       tree t1, t2, t3, t4;
3406 
3407       /* Give up for -1.  */
3408       if (d == -1)
3409 	return NULL;
3410 
3411       /* Since d might be INT_MIN, we have to cast to
3412 	 unsigned HOST_WIDE_INT before negating to avoid
3413 	 undefined signed overflow.  */
3414       abs_d = (d >= 0
3415 	       ? (unsigned HOST_WIDE_INT) d
3416 	       : - (unsigned HOST_WIDE_INT) d);
3417 
3418       /* n rem d = n rem -d */
3419       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3420 	{
3421 	  d = abs_d;
3422 	  oprnd1 = build_int_cst (itype, abs_d);
3423 	}
3424       if (HOST_BITS_PER_WIDE_INT >= prec
3425 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3426 	/* This case is not handled correctly below.  */
3427 	return NULL;
3428 
3429       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3430       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3431 	{
3432 	  add = true;
3433 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
3434 	}
3435       if (post_shift >= prec)
3436 	return NULL;
3437 
3438       /* t1 = oprnd0 h* ml;  */
3439       t1 = vect_recog_temp_ssa_var (itype, NULL);
3440       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3441 				      build_int_cst (itype, ml));
3442 
3443       if (add)
3444 	{
3445 	  /* t2 = t1 + oprnd0;  */
3446 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3447 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3448 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3449 	}
3450       else
3451 	t2 = t1;
3452 
3453       if (post_shift)
3454 	{
3455 	  /* t3 = t2 >> post_shift;  */
3456 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3457 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3458 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3459 					  build_int_cst (itype, post_shift));
3460 	}
3461       else
3462 	t3 = t2;
3463 
3464       wide_int oprnd0_min, oprnd0_max;
3465       int msb = 1;
3466       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3467 	{
3468 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3469 	    msb = 0;
3470 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3471 	    msb = -1;
3472 	}
3473 
3474       if (msb == 0 && d >= 0)
3475 	{
3476 	  /* q = t3;  */
3477 	  q = t3;
3478 	  pattern_stmt = def_stmt;
3479 	}
3480       else
3481 	{
3482 	  /* t4 = oprnd0 >> (prec - 1);
3483 	     or if we know from VRP that oprnd0 >= 0
3484 	     t4 = 0;
3485 	     or if we know from VRP that oprnd0 < 0
3486 	     t4 = -1;  */
3487 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3488 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3489 	  if (msb != 1)
3490 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3491 					    build_int_cst (itype, msb));
3492 	  else
3493 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3494 					    build_int_cst (itype, prec - 1));
3495 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3496 
3497 	  /* q = t3 - t4;  or q = t4 - t3;  */
3498 	  q = vect_recog_temp_ssa_var (itype, NULL);
3499 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3500 					      d < 0 ? t3 : t4);
3501 	}
3502     }
3503 
3504   if (rhs_code == TRUNC_MOD_EXPR)
3505     {
3506       tree r, t1;
3507 
3508       /* We divided.  Now finish by:
3509 	 t1 = q * oprnd1;
3510 	 r = oprnd0 - t1;  */
3511       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3512 
3513       t1 = vect_recog_temp_ssa_var (itype, NULL);
3514       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3515       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3516 
3517       r = vect_recog_temp_ssa_var (itype, NULL);
3518       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3519     }
3520 
3521   /* Pattern detected.  */
3522   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3523 
3524   *type_out = vectype;
3525   return pattern_stmt;
3526 }
3527 
3528 /* Function vect_recog_mixed_size_cond_pattern
3529 
3530    Try to find the following pattern:
3531 
3532      type x_t, y_t;
3533      TYPE a_T, b_T, c_T;
3534    loop:
3535      S1  a_T = x_t CMP y_t ? b_T : c_T;
3536 
3537    where type 'TYPE' is an integral type which has different size
3538    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3539    than 'type', the constants need to fit into an integer type
3540    with the same width as 'type') or results of conversion from 'type'.
3541 
3542    Input:
3543 
3544    * STMT_VINFO: The stmt from which the pattern search begins.
3545 
3546    Output:
3547 
3548    * TYPE_OUT: The type of the output of this pattern.
3549 
3550    * Return value: A new stmt that will be used to replace the pattern.
3551 	Additionally a def_stmt is added.
3552 
3553 	a_it = x_t CMP y_t ? b_it : c_it;
3554 	a_T = (TYPE) a_it;  */
3555 
3556 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3557 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3558 				    stmt_vec_info stmt_vinfo, tree *type_out)
3559 {
3560   gimple *last_stmt = stmt_vinfo->stmt;
3561   tree cond_expr, then_clause, else_clause;
3562   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3563   gimple *pattern_stmt, *def_stmt;
3564   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3565   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3566   bool promotion;
3567   tree comp_scalar_type;
3568 
3569   if (!is_gimple_assign (last_stmt)
3570       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3571       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3572     return NULL;
3573 
3574   cond_expr = gimple_assign_rhs1 (last_stmt);
3575   then_clause = gimple_assign_rhs2 (last_stmt);
3576   else_clause = gimple_assign_rhs3 (last_stmt);
3577 
3578   if (!COMPARISON_CLASS_P (cond_expr))
3579     return NULL;
3580 
3581   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3582   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3583   if (comp_vectype == NULL_TREE)
3584     return NULL;
3585 
3586   type = gimple_expr_type (last_stmt);
3587   if (types_compatible_p (type, comp_scalar_type)
3588       || ((TREE_CODE (then_clause) != INTEGER_CST
3589 	   || TREE_CODE (else_clause) != INTEGER_CST)
3590 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3591       || !INTEGRAL_TYPE_P (type))
3592     return NULL;
3593 
3594   if ((TREE_CODE (then_clause) != INTEGER_CST
3595        && !type_conversion_p (vinfo, then_clause, false,
3596 			      &orig_type0, &def_stmt0, &promotion))
3597       || (TREE_CODE (else_clause) != INTEGER_CST
3598 	  && !type_conversion_p (vinfo, else_clause, false,
3599 				 &orig_type1, &def_stmt1, &promotion)))
3600     return NULL;
3601 
3602   if (orig_type0 && orig_type1
3603       && !types_compatible_p (orig_type0, orig_type1))
3604     return NULL;
3605 
3606   if (orig_type0)
3607     {
3608       if (!types_compatible_p (orig_type0, comp_scalar_type))
3609 	return NULL;
3610       then_clause = gimple_assign_rhs1 (def_stmt0);
3611       itype = orig_type0;
3612     }
3613 
3614   if (orig_type1)
3615     {
3616       if (!types_compatible_p (orig_type1, comp_scalar_type))
3617 	return NULL;
3618       else_clause = gimple_assign_rhs1 (def_stmt1);
3619       itype = orig_type1;
3620     }
3621 
3622 
3623   HOST_WIDE_INT cmp_mode_size
3624     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3625 
3626   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3627   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3628     return NULL;
3629 
3630   vectype = get_vectype_for_scalar_type (vinfo, type);
3631   if (vectype == NULL_TREE)
3632     return NULL;
3633 
3634   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3635     return NULL;
3636 
3637   if (itype == NULL_TREE)
3638     itype = build_nonstandard_integer_type (cmp_mode_size,
3639   					    TYPE_UNSIGNED (type));
3640 
3641   if (itype == NULL_TREE
3642       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3643     return NULL;
3644 
3645   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3646   if (vecitype == NULL_TREE)
3647     return NULL;
3648 
3649   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3650     return NULL;
3651 
3652   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3653     {
3654       if ((TREE_CODE (then_clause) == INTEGER_CST
3655 	   && !int_fits_type_p (then_clause, itype))
3656 	  || (TREE_CODE (else_clause) == INTEGER_CST
3657 	      && !int_fits_type_p (else_clause, itype)))
3658 	return NULL;
3659     }
3660 
3661   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3662 				  COND_EXPR, unshare_expr (cond_expr),
3663 				  fold_convert (itype, then_clause),
3664 				  fold_convert (itype, else_clause));
3665   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3666 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3667 
3668   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3669   *type_out = vectype;
3670 
3671   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3672 
3673   return pattern_stmt;
3674 }
3675 
3676 
3677 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3678    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3679    in case it's a result of a comparison which can be directly vectorized into
3680    a vector comparison.  Fills in STMTS with all stmts visited during the
3681    walk.  */
3682 
3683 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3684 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3685 {
3686   tree rhs1;
3687   enum tree_code rhs_code;
3688 
3689   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3690   if (!def_stmt_info)
3691     return false;
3692 
3693   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3694   if (!def_stmt)
3695     return false;
3696 
3697   if (stmts.contains (def_stmt))
3698     return true;
3699 
3700   rhs1 = gimple_assign_rhs1 (def_stmt);
3701   rhs_code = gimple_assign_rhs_code (def_stmt);
3702   switch (rhs_code)
3703     {
3704     case SSA_NAME:
3705       if (! check_bool_pattern (rhs1, vinfo, stmts))
3706 	return false;
3707       break;
3708 
3709     CASE_CONVERT:
3710       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3711 	return false;
3712       if (! check_bool_pattern (rhs1, vinfo, stmts))
3713 	return false;
3714       break;
3715 
3716     case BIT_NOT_EXPR:
3717       if (! check_bool_pattern (rhs1, vinfo, stmts))
3718 	return false;
3719       break;
3720 
3721     case BIT_AND_EXPR:
3722     case BIT_IOR_EXPR:
3723     case BIT_XOR_EXPR:
3724       if (! check_bool_pattern (rhs1, vinfo, stmts)
3725 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3726 	return false;
3727       break;
3728 
3729     default:
3730       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3731 	{
3732 	  tree vecitype, comp_vectype;
3733 
3734 	  /* If the comparison can throw, then is_gimple_condexpr will be
3735 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3736 	  if (stmt_could_throw_p (cfun, def_stmt))
3737 	    return false;
3738 
3739 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3740 	  if (comp_vectype == NULL_TREE)
3741 	    return false;
3742 
3743 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
3744 							  TREE_TYPE (rhs1));
3745 	  if (mask_type
3746 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3747 	    return false;
3748 
3749 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3750 	    {
3751 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3752 	      tree itype
3753 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3754 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
3755 	      if (vecitype == NULL_TREE)
3756 		return false;
3757 	    }
3758 	  else
3759 	    vecitype = comp_vectype;
3760 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3761 	    return false;
3762 	}
3763       else
3764 	return false;
3765       break;
3766     }
3767 
3768   bool res = stmts.add (def_stmt);
3769   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3770   gcc_assert (!res);
3771 
3772   return true;
3773 }
3774 
3775 
3776 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3777    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3778    pattern sequence.  */
3779 
3780 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)3781 adjust_bool_pattern_cast (vec_info *vinfo,
3782 			  tree type, tree var, stmt_vec_info stmt_info)
3783 {
3784   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3785 					   NOP_EXPR, var);
3786   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3787 			  get_vectype_for_scalar_type (vinfo, type));
3788   return gimple_assign_lhs (cast_stmt);
3789 }
3790 
3791 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3792    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3793    type, OUT_TYPE is the desired final integer type of the whole pattern.
3794    STMT_INFO is the info of the pattern root and is where pattern stmts should
3795    be associated with.  DEFS is a map of pattern defs.  */
3796 
3797 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3798 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3799 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3800 {
3801   gimple *stmt = SSA_NAME_DEF_STMT (var);
3802   enum tree_code rhs_code, def_rhs_code;
3803   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3804   location_t loc;
3805   gimple *pattern_stmt, *def_stmt;
3806   tree trueval = NULL_TREE;
3807 
3808   rhs1 = gimple_assign_rhs1 (stmt);
3809   rhs2 = gimple_assign_rhs2 (stmt);
3810   rhs_code = gimple_assign_rhs_code (stmt);
3811   loc = gimple_location (stmt);
3812   switch (rhs_code)
3813     {
3814     case SSA_NAME:
3815     CASE_CONVERT:
3816       irhs1 = *defs.get (rhs1);
3817       itype = TREE_TYPE (irhs1);
3818       pattern_stmt
3819 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3820 			       SSA_NAME, irhs1);
3821       break;
3822 
3823     case BIT_NOT_EXPR:
3824       irhs1 = *defs.get (rhs1);
3825       itype = TREE_TYPE (irhs1);
3826       pattern_stmt
3827 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3828 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3829       break;
3830 
3831     case BIT_AND_EXPR:
3832       /* Try to optimize x = y & (a < b ? 1 : 0); into
3833 	 x = (a < b ? y : 0);
3834 
3835 	 E.g. for:
3836 	   bool a_b, b_b, c_b;
3837 	   TYPE d_T;
3838 
3839 	   S1  a_b = x1 CMP1 y1;
3840 	   S2  b_b = x2 CMP2 y2;
3841 	   S3  c_b = a_b & b_b;
3842 	   S4  d_T = (TYPE) c_b;
3843 
3844 	 we would normally emit:
3845 
3846 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3847 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3848 	   S3'  c_T = a_T & b_T;
3849 	   S4'  d_T = c_T;
3850 
3851 	 but we can save one stmt by using the
3852 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3853 	 BIT_AND_EXPR stmt out:
3854 
3855 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3856 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3857 	   S4'  f_T = c_T;
3858 
3859 	 At least when VEC_COND_EXPR is implemented using masks
3860 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3861 	 computes the comparison masks and ands it, in one case with
3862 	 all ones vector, in the other case with a vector register.
3863 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3864 	 often more expensive.  */
3865       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3866       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3867       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3868 	{
3869 	  irhs1 = *defs.get (rhs1);
3870 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3871 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3872 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3873 	    {
3874 	      rhs_code = def_rhs_code;
3875 	      rhs1 = def_rhs1;
3876 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3877 	      trueval = irhs1;
3878 	      goto do_compare;
3879 	    }
3880 	  else
3881 	    irhs2 = *defs.get (rhs2);
3882 	  goto and_ior_xor;
3883 	}
3884       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3885       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3886       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3887 	{
3888 	  irhs2 = *defs.get (rhs2);
3889 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3890 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3891 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3892 	    {
3893 	      rhs_code = def_rhs_code;
3894 	      rhs1 = def_rhs1;
3895 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3896 	      trueval = irhs2;
3897 	      goto do_compare;
3898 	    }
3899 	  else
3900 	    irhs1 = *defs.get (rhs1);
3901 	  goto and_ior_xor;
3902 	}
3903       /* FALLTHRU */
3904     case BIT_IOR_EXPR:
3905     case BIT_XOR_EXPR:
3906       irhs1 = *defs.get (rhs1);
3907       irhs2 = *defs.get (rhs2);
3908     and_ior_xor:
3909       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3910 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3911 	{
3912 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3913 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3914 	  int out_prec = TYPE_PRECISION (out_type);
3915 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3916 	    irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3917 					      stmt_info);
3918 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3919 	    irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3920 					      stmt_info);
3921 	  else
3922 	    {
3923 	      irhs1 = adjust_bool_pattern_cast (vinfo,
3924 						out_type, irhs1, stmt_info);
3925 	      irhs2 = adjust_bool_pattern_cast (vinfo,
3926 						out_type, irhs2, stmt_info);
3927 	    }
3928 	}
3929       itype = TREE_TYPE (irhs1);
3930       pattern_stmt
3931 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3932 			       rhs_code, irhs1, irhs2);
3933       break;
3934 
3935     default:
3936     do_compare:
3937       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3938       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3939 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3940 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3941 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3942 	{
3943 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3944 	  itype
3945 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3946 	}
3947       else
3948 	itype = TREE_TYPE (rhs1);
3949       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3950       if (trueval == NULL_TREE)
3951 	trueval = build_int_cst (itype, 1);
3952       else
3953 	gcc_checking_assert (useless_type_conversion_p (itype,
3954 							TREE_TYPE (trueval)));
3955       pattern_stmt
3956 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3957 			       COND_EXPR, cond_expr, trueval,
3958 			       build_int_cst (itype, 0));
3959       break;
3960     }
3961 
3962   gimple_set_location (pattern_stmt, loc);
3963   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3964 			  get_vectype_for_scalar_type (vinfo, itype));
3965   defs.put (var, gimple_assign_lhs (pattern_stmt));
3966 }
3967 
3968 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3969 
3970 static int
sort_after_uid(const void * p1,const void * p2)3971 sort_after_uid (const void *p1, const void *p2)
3972 {
3973   const gimple *stmt1 = *(const gimple * const *)p1;
3974   const gimple *stmt2 = *(const gimple * const *)p2;
3975   return gimple_uid (stmt1) - gimple_uid (stmt2);
3976 }
3977 
3978 /* Create pattern stmts for all stmts participating in the bool pattern
3979    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3980    OUT_TYPE.  Return the def of the pattern root.  */
3981 
3982 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3983 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3984 		   tree out_type, stmt_vec_info stmt_info)
3985 {
3986   /* Gather original stmts in the bool pattern in their order of appearance
3987      in the IL.  */
3988   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3989   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3990        i != bool_stmt_set.end (); ++i)
3991     bool_stmts.quick_push (*i);
3992   bool_stmts.qsort (sort_after_uid);
3993 
3994   /* Now process them in that order, producing pattern stmts.  */
3995   hash_map <tree, tree> defs;
3996   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3997     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3998 			 out_type, stmt_info, defs);
3999 
4000   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
4001   gimple *pattern_stmt
4002     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4003   return gimple_assign_lhs (pattern_stmt);
4004 }
4005 
4006 /* Return the proper type for converting bool VAR into
4007    an integer value or NULL_TREE if no such type exists.
4008    The type is chosen so that the converted value has the
4009    same number of elements as VAR's vector type.  */
4010 
4011 static tree
integer_type_for_mask(tree var,vec_info * vinfo)4012 integer_type_for_mask (tree var, vec_info *vinfo)
4013 {
4014   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4015     return NULL_TREE;
4016 
4017   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4018   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4019     return NULL_TREE;
4020 
4021   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4022 }
4023 
4024 /* Function vect_recog_bool_pattern
4025 
4026    Try to find pattern like following:
4027 
4028      bool a_b, b_b, c_b, d_b, e_b;
4029      TYPE f_T;
4030    loop:
4031      S1  a_b = x1 CMP1 y1;
4032      S2  b_b = x2 CMP2 y2;
4033      S3  c_b = a_b & b_b;
4034      S4  d_b = x3 CMP3 y3;
4035      S5  e_b = c_b | d_b;
4036      S6  f_T = (TYPE) e_b;
4037 
4038    where type 'TYPE' is an integral type.  Or a similar pattern
4039    ending in
4040 
4041      S6  f_Y = e_b ? r_Y : s_Y;
4042 
4043    as results from if-conversion of a complex condition.
4044 
4045    Input:
4046 
4047    * STMT_VINFO: The stmt at the end from which the pattern
4048 		 search begins, i.e. cast of a bool to
4049 		 an integer type.
4050 
4051    Output:
4052 
4053    * TYPE_OUT: The type of the output of this pattern.
4054 
4055    * Return value: A new stmt that will be used to replace the pattern.
4056 
4057 	Assuming size of TYPE is the same as size of all comparisons
4058 	(otherwise some casts would be added where needed), the above
4059 	sequence we create related pattern stmts:
4060 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4061 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4062 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4063 	S5'  e_T = c_T | d_T;
4064 	S6'  f_T = e_T;
4065 
4066 	Instead of the above S3' we could emit:
4067 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4068 	S3'  c_T = a_T | b_T;
4069 	but the above is more efficient.  */
4070 
4071 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4072 vect_recog_bool_pattern (vec_info *vinfo,
4073 			 stmt_vec_info stmt_vinfo, tree *type_out)
4074 {
4075   gimple *last_stmt = stmt_vinfo->stmt;
4076   enum tree_code rhs_code;
4077   tree var, lhs, rhs, vectype;
4078   gimple *pattern_stmt;
4079 
4080   if (!is_gimple_assign (last_stmt))
4081     return NULL;
4082 
4083   var = gimple_assign_rhs1 (last_stmt);
4084   lhs = gimple_assign_lhs (last_stmt);
4085   rhs_code = gimple_assign_rhs_code (last_stmt);
4086 
4087   if (rhs_code == VIEW_CONVERT_EXPR)
4088     var = TREE_OPERAND (var, 0);
4089 
4090   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4091     return NULL;
4092 
4093   hash_set<gimple *> bool_stmts;
4094 
4095   if (CONVERT_EXPR_CODE_P (rhs_code)
4096       || rhs_code == VIEW_CONVERT_EXPR)
4097     {
4098       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4099 	  || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4100 	return NULL;
4101       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4102       if (vectype == NULL_TREE)
4103 	return NULL;
4104 
4105       if (check_bool_pattern (var, vinfo, bool_stmts))
4106 	{
4107 	  rhs = adjust_bool_stmts (vinfo, bool_stmts,
4108 				   TREE_TYPE (lhs), stmt_vinfo);
4109 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4110 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4111 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4112 	  else
4113 	    pattern_stmt
4114 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
4115 	}
4116       else
4117 	{
4118 	  tree type = integer_type_for_mask (var, vinfo);
4119 	  tree cst0, cst1, tmp;
4120 
4121 	  if (!type)
4122 	    return NULL;
4123 
4124 	  /* We may directly use cond with narrowed type to avoid
4125 	     multiple cond exprs with following result packing and
4126 	     perform single cond with packed mask instead.  In case
4127 	     of widening we better make cond first and then extract
4128 	     results.  */
4129 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4130 	    type = TREE_TYPE (lhs);
4131 
4132 	  cst0 = build_int_cst (type, 0);
4133 	  cst1 = build_int_cst (type, 1);
4134 	  tmp = vect_recog_temp_ssa_var (type, NULL);
4135 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4136 
4137 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4138 	    {
4139 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4140 	      append_pattern_def_seq (vinfo, stmt_vinfo,
4141 				      pattern_stmt, new_vectype);
4142 
4143 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4144 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4145 	    }
4146 	}
4147 
4148       *type_out = vectype;
4149       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4150 
4151       return pattern_stmt;
4152     }
4153   else if (rhs_code == COND_EXPR
4154 	   && TREE_CODE (var) == SSA_NAME)
4155     {
4156       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4157       if (vectype == NULL_TREE)
4158 	return NULL;
4159 
4160       /* Build a scalar type for the boolean result that when
4161          vectorized matches the vector type of the result in
4162 	 size and number of elements.  */
4163       unsigned prec
4164 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4165 			       TYPE_VECTOR_SUBPARTS (vectype));
4166 
4167       tree type
4168 	= build_nonstandard_integer_type (prec,
4169 					  TYPE_UNSIGNED (TREE_TYPE (var)));
4170       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4171 	return NULL;
4172 
4173       if (!check_bool_pattern (var, vinfo, bool_stmts))
4174 	return NULL;
4175 
4176       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4177 
4178       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4179       pattern_stmt
4180 	  = gimple_build_assign (lhs, COND_EXPR,
4181 				 build2 (NE_EXPR, boolean_type_node,
4182 					 rhs, build_int_cst (type, 0)),
4183 				 gimple_assign_rhs2 (last_stmt),
4184 				 gimple_assign_rhs3 (last_stmt));
4185       *type_out = vectype;
4186       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4187 
4188       return pattern_stmt;
4189     }
4190   else if (rhs_code == SSA_NAME
4191 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
4192     {
4193       stmt_vec_info pattern_stmt_info;
4194       tree nunits_vectype;
4195       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4196 					   &nunits_vectype)
4197 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4198 	return NULL;
4199 
4200       if (check_bool_pattern (var, vinfo, bool_stmts))
4201 	rhs = adjust_bool_stmts (vinfo, bool_stmts,
4202 				 TREE_TYPE (vectype), stmt_vinfo);
4203       else
4204 	{
4205 	  tree type = integer_type_for_mask (var, vinfo);
4206 	  tree cst0, cst1, new_vectype;
4207 
4208 	  if (!type)
4209 	    return NULL;
4210 
4211 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4212 	    type = TREE_TYPE (vectype);
4213 
4214 	  cst0 = build_int_cst (type, 0);
4215 	  cst1 = build_int_cst (type, 1);
4216 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
4217 
4218 	  rhs = vect_recog_temp_ssa_var (type, NULL);
4219 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4220 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4221 	}
4222 
4223       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4224       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4225 	{
4226 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4227 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4228 	  append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4229 	  rhs = rhs2;
4230 	}
4231       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4232       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4233       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4234       *type_out = vectype;
4235       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4236 
4237       return pattern_stmt;
4238     }
4239   else
4240     return NULL;
4241 }
4242 
4243 
4244 /* A helper for vect_recog_mask_conversion_pattern.  Build
4245    conversion of MASK to a type suitable for masking VECTYPE.
4246    Built statement gets required vectype and is appended to
4247    a pattern sequence of STMT_VINFO.
4248 
4249    Return converted mask.  */
4250 
4251 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4252 build_mask_conversion (vec_info *vinfo,
4253 		       tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4254 {
4255   gimple *stmt;
4256   tree masktype, tmp;
4257 
4258   masktype = truth_type_for (vectype);
4259   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4260   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4261   append_pattern_def_seq (vinfo, stmt_vinfo,
4262 			  stmt, masktype, TREE_TYPE (vectype));
4263 
4264   return tmp;
4265 }
4266 
4267 
4268 /* Function vect_recog_mask_conversion_pattern
4269 
4270    Try to find statements which require boolean type
4271    converison.  Additional conversion statements are
4272    added to handle such cases.  For example:
4273 
4274    bool m_1, m_2, m_3;
4275    int i_4, i_5;
4276    double d_6, d_7;
4277    char c_1, c_2, c_3;
4278 
4279    S1   m_1 = i_4 > i_5;
4280    S2   m_2 = d_6 < d_7;
4281    S3   m_3 = m_1 & m_2;
4282    S4   c_1 = m_3 ? c_2 : c_3;
4283 
4284    Will be transformed into:
4285 
4286    S1   m_1 = i_4 > i_5;
4287    S2   m_2 = d_6 < d_7;
4288    S3'' m_2' = (_Bool[bitsize=32])m_2
4289    S3'  m_3' = m_1 & m_2';
4290    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4291    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4292 
4293 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4294 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4295 				    stmt_vec_info stmt_vinfo, tree *type_out)
4296 {
4297   gimple *last_stmt = stmt_vinfo->stmt;
4298   enum tree_code rhs_code;
4299   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4300   tree vectype1, vectype2;
4301   stmt_vec_info pattern_stmt_info;
4302   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4303   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4304 
4305   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4306   if (is_gimple_call (last_stmt)
4307       && gimple_call_internal_p (last_stmt))
4308     {
4309       gcall *pattern_stmt;
4310 
4311       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4312       int mask_argno = internal_fn_mask_index (ifn);
4313       if (mask_argno < 0)
4314 	return NULL;
4315 
4316       bool store_p = internal_store_fn_p (ifn);
4317       if (store_p)
4318 	{
4319 	  int rhs_index = internal_fn_stored_value_index (ifn);
4320 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
4321 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4322 	}
4323       else
4324 	{
4325 	  lhs = gimple_call_lhs (last_stmt);
4326 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4327 	}
4328 
4329       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4330       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4331       if (!mask_arg_type)
4332 	return NULL;
4333       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4334 
4335       if (!vectype1 || !vectype2
4336 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4337 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4338 	return NULL;
4339 
4340       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4341 
4342       auto_vec<tree, 8> args;
4343       unsigned int nargs = gimple_call_num_args (last_stmt);
4344       args.safe_grow (nargs, true);
4345       for (unsigned int i = 0; i < nargs; ++i)
4346 	args[i] = ((int) i == mask_argno
4347 		   ? tmp
4348 		   : gimple_call_arg (last_stmt, i));
4349       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4350 
4351       if (!store_p)
4352 	{
4353 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4354 	  gimple_call_set_lhs (pattern_stmt, lhs);
4355 	}
4356       gimple_call_set_nothrow (pattern_stmt, true);
4357 
4358       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4359       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4360 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4361 
4362       *type_out = vectype1;
4363       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4364 
4365       return pattern_stmt;
4366     }
4367 
4368   if (!is_gimple_assign (last_stmt))
4369     return NULL;
4370 
4371   gimple *pattern_stmt;
4372   lhs = gimple_assign_lhs (last_stmt);
4373   rhs1 = gimple_assign_rhs1 (last_stmt);
4374   rhs_code = gimple_assign_rhs_code (last_stmt);
4375 
4376   /* Check for cond expression requiring mask conversion.  */
4377   if (rhs_code == COND_EXPR)
4378     {
4379       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4380 
4381       if (TREE_CODE (rhs1) == SSA_NAME)
4382 	{
4383 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
4384 	  if (!rhs1_type)
4385 	    return NULL;
4386 	}
4387       else if (COMPARISON_CLASS_P (rhs1))
4388 	{
4389 	  /* Check whether we're comparing scalar booleans and (if so)
4390 	     whether a better mask type exists than the mask associated
4391 	     with boolean-sized elements.  This avoids unnecessary packs
4392 	     and unpacks if the booleans are set from comparisons of
4393 	     wider types.  E.g. in:
4394 
4395 	       int x1, x2, x3, x4, y1, y1;
4396 	       ...
4397 	       bool b1 = (x1 == x2);
4398 	       bool b2 = (x3 == x4);
4399 	       ... = b1 == b2 ? y1 : y2;
4400 
4401 	     it is better for b1 and b2 to use the mask type associated
4402 	     with int elements rather bool (byte) elements.  */
4403 	  rhs1_op0 = TREE_OPERAND (rhs1, 0);
4404 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
4405 	  if (!rhs1_op0 || !rhs1_op1)
4406 	    return NULL;
4407 	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4408 	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4409 
4410 	  if (!rhs1_op0_type)
4411 	    rhs1_type = TREE_TYPE (rhs1_op0);
4412 	  else if (!rhs1_op1_type)
4413 	    rhs1_type = TREE_TYPE (rhs1_op1);
4414 	  else if (TYPE_PRECISION (rhs1_op0_type)
4415 		   != TYPE_PRECISION (rhs1_op1_type))
4416 	    {
4417 	      int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4418 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4419 	      int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4420 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4421 	      if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4422 		{
4423 		  if (abs (tmp0) > abs (tmp1))
4424 		    rhs1_type = rhs1_op1_type;
4425 		  else
4426 		    rhs1_type = rhs1_op0_type;
4427 		}
4428 	      else
4429 		rhs1_type = build_nonstandard_integer_type
4430 		  (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4431 	    }
4432 	  else
4433 	    rhs1_type = rhs1_op0_type;
4434 	}
4435       else
4436 	return NULL;
4437 
4438       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4439 
4440       if (!vectype1 || !vectype2)
4441 	return NULL;
4442 
4443       /* Continue if a conversion is needed.  Also continue if we have
4444 	 a comparison whose vector type would normally be different from
4445 	 VECTYPE2 when considered in isolation.  In that case we'll
4446 	 replace the comparison with an SSA name (so that we can record
4447 	 its vector type) and behave as though the comparison was an SSA
4448 	 name from the outset.  */
4449       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4450 		    TYPE_VECTOR_SUBPARTS (vectype2))
4451 	  && !rhs1_op0_type
4452 	  && !rhs1_op1_type)
4453 	return NULL;
4454 
4455       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4456          in place, we can handle it in vectorizable_condition.  This avoids
4457 	 unnecessary promotion stmts and increased vectorization factor.  */
4458       if (COMPARISON_CLASS_P (rhs1)
4459 	  && INTEGRAL_TYPE_P (rhs1_type)
4460 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4461 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4462 	{
4463 	  enum vect_def_type dt;
4464 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4465 	      && dt == vect_external_def
4466 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4467 	      && (dt == vect_external_def
4468 		  || dt == vect_constant_def))
4469 	    {
4470 	      tree wide_scalar_type = build_nonstandard_integer_type
4471 		(vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4472 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
4473 							   wide_scalar_type);
4474 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4475 		return NULL;
4476 	    }
4477 	}
4478 
4479       /* If rhs1 is a comparison we need to move it into a
4480 	 separate statement.  */
4481       if (TREE_CODE (rhs1) != SSA_NAME)
4482 	{
4483 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4484 	  if (rhs1_op0_type
4485 	      && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4486 	    rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4487 					      vectype2, stmt_vinfo);
4488 	  if (rhs1_op1_type
4489 	      && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4490 	    rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4491 				      vectype2, stmt_vinfo);
4492 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4493 					      rhs1_op0, rhs1_op1);
4494 	  rhs1 = tmp;
4495 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4496 				  rhs1_type);
4497 	}
4498 
4499       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4500 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4501 	tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4502       else
4503 	tmp = rhs1;
4504 
4505       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4506       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4507 					  gimple_assign_rhs2 (last_stmt),
4508 					  gimple_assign_rhs3 (last_stmt));
4509 
4510       *type_out = vectype1;
4511       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4512 
4513       return pattern_stmt;
4514     }
4515 
4516   /* Now check for binary boolean operations requiring conversion for
4517      one of operands.  */
4518   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4519     return NULL;
4520 
4521   if (rhs_code != BIT_IOR_EXPR
4522       && rhs_code != BIT_XOR_EXPR
4523       && rhs_code != BIT_AND_EXPR
4524       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4525     return NULL;
4526 
4527   rhs2 = gimple_assign_rhs2 (last_stmt);
4528 
4529   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4530   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4531 
4532   if (!rhs1_type || !rhs2_type
4533       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4534     return NULL;
4535 
4536   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4537     {
4538       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4539       if (!vectype1)
4540 	return NULL;
4541       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4542     }
4543   else
4544     {
4545       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4546       if (!vectype1)
4547 	return NULL;
4548       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4549     }
4550 
4551   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4552   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4553 
4554   *type_out = vectype1;
4555   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4556 
4557   return pattern_stmt;
4558 }
4559 
4560 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4561    the boolean condition under which it occurs, otherwise return null.  */
4562 
4563 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4564 vect_get_load_store_mask (stmt_vec_info stmt_info)
4565 {
4566   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4567     {
4568       gcc_assert (gimple_assign_single_p (def_assign));
4569       return NULL_TREE;
4570     }
4571 
4572   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4573     {
4574       internal_fn ifn = gimple_call_internal_fn (def_call);
4575       int mask_index = internal_fn_mask_index (ifn);
4576       return gimple_call_arg (def_call, mask_index);
4577     }
4578 
4579   gcc_unreachable ();
4580 }
4581 
4582 /* Return MASK if MASK is suitable for masking an operation on vectors
4583    of type VECTYPE, otherwise convert it into such a form and return
4584    the result.  Associate any conversion statements with STMT_INFO's
4585    pattern.  */
4586 
4587 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4588 vect_convert_mask_for_vectype (tree mask, tree vectype,
4589 			       stmt_vec_info stmt_info, vec_info *vinfo)
4590 {
4591   tree mask_type = integer_type_for_mask (mask, vinfo);
4592   if (mask_type)
4593     {
4594       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4595       if (mask_vectype
4596 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4597 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4598 	mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4599     }
4600   return mask;
4601 }
4602 
4603 /* Return the equivalent of:
4604 
4605      fold_convert (TYPE, VALUE)
4606 
4607    with the expectation that the operation will be vectorized.
4608    If new statements are needed, add them as pattern statements
4609    to STMT_INFO.  */
4610 
4611 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4612 vect_add_conversion_to_pattern (vec_info *vinfo,
4613 				tree type, tree value, stmt_vec_info stmt_info)
4614 {
4615   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4616     return value;
4617 
4618   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4619   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4620   append_pattern_def_seq (vinfo, stmt_info, conversion,
4621 			  get_vectype_for_scalar_type (vinfo, type));
4622   return new_value;
4623 }
4624 
4625 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4626    internal function.  Return the final statement on success and set
4627    *TYPE_OUT to the vector type being loaded or stored.
4628 
4629    This function only handles gathers and scatters that were recognized
4630    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4631 
4632 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4633 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4634 				   stmt_vec_info stmt_info, tree *type_out)
4635 {
4636   /* Currently we only support this for loop vectorization.  */
4637   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4638   if (!loop_vinfo)
4639     return NULL;
4640 
4641   /* Make sure that we're looking at a gather load or scatter store.  */
4642   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4643   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4644     return NULL;
4645 
4646   /* Get the boolean that controls whether the load or store happens.
4647      This is null if the operation is unconditional.  */
4648   tree mask = vect_get_load_store_mask (stmt_info);
4649 
4650   /* Make sure that the target supports an appropriate internal
4651      function for the gather/scatter operation.  */
4652   gather_scatter_info gs_info;
4653   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4654       || gs_info.decl)
4655     return NULL;
4656 
4657   /* Convert the mask to the right form.  */
4658   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4659 						 gs_info.element_type);
4660   if (mask)
4661     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4662 					  loop_vinfo);
4663 
4664   /* Get the invariant base and non-invariant offset, converting the
4665      latter to the same width as the vector elements.  */
4666   tree base = gs_info.base;
4667   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4668   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4669 						gs_info.offset, stmt_info);
4670 
4671   /* Build the new pattern statement.  */
4672   tree scale = size_int (gs_info.scale);
4673   gcall *pattern_stmt;
4674   if (DR_IS_READ (dr))
4675     {
4676       tree zero = build_zero_cst (gs_info.element_type);
4677       if (mask != NULL)
4678 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4679 						   offset, scale, zero, mask);
4680       else
4681 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4682 						   offset, scale, zero);
4683       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4684       gimple_call_set_lhs (pattern_stmt, load_lhs);
4685     }
4686   else
4687     {
4688       tree rhs = vect_get_store_rhs (stmt_info);
4689       if (mask != NULL)
4690 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4691 						   base, offset, scale, rhs,
4692 						   mask);
4693       else
4694 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4695 						   base, offset, scale, rhs);
4696     }
4697   gimple_call_set_nothrow (pattern_stmt, true);
4698 
4699   /* Copy across relevant vectorization info and associate DR with the
4700      new pattern statement instead of the original statement.  */
4701   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4702   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4703 
4704   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4705   *type_out = vectype;
4706   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4707 
4708   return pattern_stmt;
4709 }
4710 
4711 /* Return true if TYPE is a non-boolean integer type.  These are the types
4712    that we want to consider for narrowing.  */
4713 
4714 static bool
vect_narrowable_type_p(tree type)4715 vect_narrowable_type_p (tree type)
4716 {
4717   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4718 }
4719 
4720 /* Return true if the operation given by CODE can be truncated to N bits
4721    when only N bits of the output are needed.  This is only true if bit N+1
4722    of the inputs has no effect on the low N bits of the result.  */
4723 
4724 static bool
vect_truncatable_operation_p(tree_code code)4725 vect_truncatable_operation_p (tree_code code)
4726 {
4727   switch (code)
4728     {
4729     case PLUS_EXPR:
4730     case MINUS_EXPR:
4731     case MULT_EXPR:
4732     case BIT_AND_EXPR:
4733     case BIT_IOR_EXPR:
4734     case BIT_XOR_EXPR:
4735     case COND_EXPR:
4736       return true;
4737 
4738     default:
4739       return false;
4740     }
4741 }
4742 
4743 /* Record that STMT_INFO could be changed from operating on TYPE to
4744    operating on a type with the precision and sign given by PRECISION
4745    and SIGN respectively.  PRECISION is an arbitrary bit precision;
4746    it might not be a whole number of bytes.  */
4747 
4748 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4749 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4750 			 unsigned int precision, signop sign)
4751 {
4752   /* Round the precision up to a whole number of bytes.  */
4753   precision = vect_element_precision (precision);
4754   if (precision < TYPE_PRECISION (type)
4755       && (!stmt_info->operation_precision
4756 	  || stmt_info->operation_precision > precision))
4757     {
4758       stmt_info->operation_precision = precision;
4759       stmt_info->operation_sign = sign;
4760     }
4761 }
4762 
4763 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4764    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
4765    is an arbitrary bit precision; it might not be a whole number of bytes.  */
4766 
4767 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4768 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4769 			      unsigned int min_input_precision)
4770 {
4771   /* This operation in isolation only requires the inputs to have
4772      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
4773      that MIN_INPUT_PRECISION is a natural precision for the chain
4774      as a whole.  E.g. consider something like:
4775 
4776 	 unsigned short *x, *y;
4777 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4778 
4779      The right shift can be done on unsigned chars, and only requires the
4780      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
4781      approach would mean turning a natural chain of single-vector unsigned
4782      short operations into one that truncates "*x" and then extends
4783      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4784      operation and one vector for each unsigned char operation.
4785      This would be a significant pessimization.
4786 
4787      Instead only propagate the maximum of this precision and the precision
4788      required by the users of the result.  This means that we don't pessimize
4789      the case above but continue to optimize things like:
4790 
4791 	 unsigned char *y;
4792 	 unsigned short *x;
4793 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4794 
4795      Here we would truncate two vectors of *x to a single vector of
4796      unsigned chars and use single-vector unsigned char operations for
4797      everything else, rather than doing two unsigned short copies of
4798      "(*x & 0xf0) >> 4" and then truncating the result.  */
4799   min_input_precision = MAX (min_input_precision,
4800 			     stmt_info->min_output_precision);
4801 
4802   if (min_input_precision < TYPE_PRECISION (type)
4803       && (!stmt_info->min_input_precision
4804 	  || stmt_info->min_input_precision > min_input_precision))
4805     stmt_info->min_input_precision = min_input_precision;
4806 }
4807 
4808 /* Subroutine of vect_determine_min_output_precision.  Return true if
4809    we can calculate a reduced number of output bits for STMT_INFO,
4810    whose result is LHS.  */
4811 
4812 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)4813 vect_determine_min_output_precision_1 (vec_info *vinfo,
4814 				       stmt_vec_info stmt_info, tree lhs)
4815 {
4816   /* Take the maximum precision required by users of the result.  */
4817   unsigned int precision = 0;
4818   imm_use_iterator iter;
4819   use_operand_p use;
4820   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4821     {
4822       gimple *use_stmt = USE_STMT (use);
4823       if (is_gimple_debug (use_stmt))
4824 	continue;
4825       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4826       if (!use_stmt_info || !use_stmt_info->min_input_precision)
4827 	return false;
4828       /* The input precision recorded for COND_EXPRs applies only to the
4829 	 "then" and "else" values.  */
4830       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4831       if (assign
4832 	  && gimple_assign_rhs_code (assign) == COND_EXPR
4833 	  && use->use != gimple_assign_rhs2_ptr (assign)
4834 	  && use->use != gimple_assign_rhs3_ptr (assign))
4835 	return false;
4836       precision = MAX (precision, use_stmt_info->min_input_precision);
4837     }
4838 
4839   if (dump_enabled_p ())
4840     dump_printf_loc (MSG_NOTE, vect_location,
4841 		     "only the low %d bits of %T are significant\n",
4842 		     precision, lhs);
4843   stmt_info->min_output_precision = precision;
4844   return true;
4845 }
4846 
4847 /* Calculate min_output_precision for STMT_INFO.  */
4848 
4849 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)4850 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4851 {
4852   /* We're only interested in statements with a narrowable result.  */
4853   tree lhs = gimple_get_lhs (stmt_info->stmt);
4854   if (!lhs
4855       || TREE_CODE (lhs) != SSA_NAME
4856       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4857     return;
4858 
4859   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4860     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4861 }
4862 
4863 /* Use range information to decide whether STMT (described by STMT_INFO)
4864    could be done in a narrower type.  This is effectively a forward
4865    propagation, since it uses context-independent information that applies
4866    to all users of an SSA name.  */
4867 
4868 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4869 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4870 {
4871   tree lhs = gimple_assign_lhs (stmt);
4872   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4873     return;
4874 
4875   tree type = TREE_TYPE (lhs);
4876   if (!vect_narrowable_type_p (type))
4877     return;
4878 
4879   /* First see whether we have any useful range information for the result.  */
4880   unsigned int precision = TYPE_PRECISION (type);
4881   signop sign = TYPE_SIGN (type);
4882   wide_int min_value, max_value;
4883   if (!vect_get_range_info (lhs, &min_value, &max_value))
4884     return;
4885 
4886   tree_code code = gimple_assign_rhs_code (stmt);
4887   unsigned int nops = gimple_num_ops (stmt);
4888 
4889   if (!vect_truncatable_operation_p (code))
4890     /* Check that all relevant input operands are compatible, and update
4891        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
4892     for (unsigned int i = 1; i < nops; ++i)
4893       {
4894 	tree op = gimple_op (stmt, i);
4895 	if (TREE_CODE (op) == INTEGER_CST)
4896 	  {
4897 	    /* Don't require the integer to have RHS_TYPE (which it might
4898 	       not for things like shift amounts, etc.), but do require it
4899 	       to fit the type.  */
4900 	    if (!int_fits_type_p (op, type))
4901 	      return;
4902 
4903 	    min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4904 	    max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4905 	  }
4906 	else if (TREE_CODE (op) == SSA_NAME)
4907 	  {
4908 	    /* Ignore codes that don't take uniform arguments.  */
4909 	    if (!types_compatible_p (TREE_TYPE (op), type))
4910 	      return;
4911 
4912 	    wide_int op_min_value, op_max_value;
4913 	    if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4914 	      return;
4915 
4916 	    min_value = wi::min (min_value, op_min_value, sign);
4917 	    max_value = wi::max (max_value, op_max_value, sign);
4918 	  }
4919 	else
4920 	  return;
4921       }
4922 
4923   /* Try to switch signed types for unsigned types if we can.
4924      This is better for two reasons.  First, unsigned ops tend
4925      to be cheaper than signed ops.  Second, it means that we can
4926      handle things like:
4927 
4928 	signed char c;
4929 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4930 
4931      as:
4932 
4933 	signed char c;
4934 	unsigned short res_1 = (unsigned short) c & 0xff00;
4935 	int res = (int) res_1;
4936 
4937      where the intermediate result res_1 has unsigned rather than
4938      signed type.  */
4939   if (sign == SIGNED && !wi::neg_p (min_value))
4940     sign = UNSIGNED;
4941 
4942   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
4943   unsigned int precision1 = wi::min_precision (min_value, sign);
4944   unsigned int precision2 = wi::min_precision (max_value, sign);
4945   unsigned int value_precision = MAX (precision1, precision2);
4946   if (value_precision >= precision)
4947     return;
4948 
4949   if (dump_enabled_p ())
4950     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4951 		     " without loss of precision: %G",
4952 		     sign == SIGNED ? "signed" : "unsigned",
4953 		     value_precision, stmt);
4954 
4955   vect_set_operation_type (stmt_info, type, value_precision, sign);
4956   vect_set_min_input_precision (stmt_info, type, value_precision);
4957 }
4958 
4959 /* Use information about the users of STMT's result to decide whether
4960    STMT (described by STMT_INFO) could be done in a narrower type.
4961    This is effectively a backward propagation.  */
4962 
4963 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4964 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4965 {
4966   tree_code code = gimple_assign_rhs_code (stmt);
4967   unsigned int opno = (code == COND_EXPR ? 2 : 1);
4968   tree type = TREE_TYPE (gimple_op (stmt, opno));
4969   if (!vect_narrowable_type_p (type))
4970     return;
4971 
4972   unsigned int precision = TYPE_PRECISION (type);
4973   unsigned int operation_precision, min_input_precision;
4974   switch (code)
4975     {
4976     CASE_CONVERT:
4977       /* Only the bits that contribute to the output matter.  Don't change
4978 	 the precision of the operation itself.  */
4979       operation_precision = precision;
4980       min_input_precision = stmt_info->min_output_precision;
4981       break;
4982 
4983     case LSHIFT_EXPR:
4984     case RSHIFT_EXPR:
4985       {
4986 	tree shift = gimple_assign_rhs2 (stmt);
4987 	if (TREE_CODE (shift) != INTEGER_CST
4988 	    || !wi::ltu_p (wi::to_widest (shift), precision))
4989 	  return;
4990 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
4991 	if (code == LSHIFT_EXPR)
4992 	  {
4993 	    /* Avoid creating an undefined shift.
4994 
4995 	       ??? We could instead use min_output_precision as-is and
4996 	       optimize out-of-range shifts to zero.  However, only
4997 	       degenerate testcases shift away all their useful input data,
4998 	       and it isn't natural to drop input operations in the middle
4999 	       of vectorization.  This sort of thing should really be
5000 	       handled before vectorization.  */
5001 	    operation_precision = MAX (stmt_info->min_output_precision,
5002 				       const_shift + 1);
5003 	    /* We need CONST_SHIFT fewer bits of the input.  */
5004 	    min_input_precision = (MAX (operation_precision, const_shift)
5005 				   - const_shift);
5006 	  }
5007 	else
5008 	  {
5009 	    /* We need CONST_SHIFT extra bits to do the operation.  */
5010 	    operation_precision = (stmt_info->min_output_precision
5011 				   + const_shift);
5012 	    min_input_precision = operation_precision;
5013 	  }
5014 	break;
5015       }
5016 
5017     default:
5018       if (vect_truncatable_operation_p (code))
5019 	{
5020 	  /* Input bit N has no effect on output bits N-1 and lower.  */
5021 	  operation_precision = stmt_info->min_output_precision;
5022 	  min_input_precision = operation_precision;
5023 	  break;
5024 	}
5025       return;
5026     }
5027 
5028   if (operation_precision < precision)
5029     {
5030       if (dump_enabled_p ())
5031 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5032 			 " without affecting users: %G",
5033 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5034 			 operation_precision, stmt);
5035       vect_set_operation_type (stmt_info, type, operation_precision,
5036 			       TYPE_SIGN (type));
5037     }
5038   vect_set_min_input_precision (stmt_info, type, min_input_precision);
5039 }
5040 
5041 /* Return true if the statement described by STMT_INFO sets a boolean
5042    SSA_NAME and if we know how to vectorize this kind of statement using
5043    vector mask types.  */
5044 
5045 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5046 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5047 {
5048   tree lhs = gimple_get_lhs (stmt_info->stmt);
5049   if (!lhs
5050       || TREE_CODE (lhs) != SSA_NAME
5051       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5052     return false;
5053 
5054   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5055     {
5056       tree_code rhs_code = gimple_assign_rhs_code (assign);
5057       switch (rhs_code)
5058 	{
5059 	CASE_CONVERT:
5060 	case SSA_NAME:
5061 	case BIT_NOT_EXPR:
5062 	case BIT_IOR_EXPR:
5063 	case BIT_XOR_EXPR:
5064 	case BIT_AND_EXPR:
5065 	  return true;
5066 
5067 	default:
5068 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5069 	}
5070     }
5071   else if (is_a <gphi *> (stmt_info->stmt))
5072     return true;
5073   return false;
5074 }
5075 
5076 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5077    a vector mask type instead of a normal vector type.  Record the
5078    result in STMT_INFO->mask_precision.  */
5079 
5080 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5081 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5082 {
5083   if (!possible_vector_mask_operation_p (stmt_info))
5084     return;
5085 
5086   /* If at least one boolean input uses a vector mask type,
5087      pick the mask type with the narrowest elements.
5088 
5089      ??? This is the traditional behavior.  It should always produce
5090      the smallest number of operations, but isn't necessarily the
5091      optimal choice.  For example, if we have:
5092 
5093        a = b & c
5094 
5095      where:
5096 
5097        - the user of a wants it to have a mask type for 16-bit elements (M16)
5098        - b also uses M16
5099        - c uses a mask type for 8-bit elements (M8)
5100 
5101      then picking M8 gives:
5102 
5103        - 1 M16->M8 pack for b
5104        - 1 M8 AND for a
5105        - 2 M8->M16 unpacks for the user of a
5106 
5107      whereas picking M16 would have given:
5108 
5109        - 2 M8->M16 unpacks for c
5110        - 2 M16 ANDs for a
5111 
5112      The number of operations are equal, but M16 would have given
5113      a shorter dependency chain and allowed more ILP.  */
5114   unsigned int precision = ~0U;
5115   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5116     {
5117       unsigned int nops = gimple_num_ops (assign);
5118       for (unsigned int i = 1; i < nops; ++i)
5119 	{
5120 	  tree rhs = gimple_op (assign, i);
5121 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5122 	    continue;
5123 
5124 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5125 	  if (!def_stmt_info)
5126 	    /* Don't let external or constant operands influence the choice.
5127 	       We can convert them to whichever vector type we pick.  */
5128 	    continue;
5129 
5130 	  if (def_stmt_info->mask_precision)
5131 	    {
5132 	      if (precision > def_stmt_info->mask_precision)
5133 		precision = def_stmt_info->mask_precision;
5134 	    }
5135 	}
5136 
5137       /* If the statement compares two values that shouldn't use vector masks,
5138 	 try comparing the values as normal scalars instead.  */
5139       tree_code rhs_code = gimple_assign_rhs_code (assign);
5140       if (precision == ~0U
5141 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5142 	{
5143 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5144 	  scalar_mode mode;
5145 	  tree vectype, mask_type;
5146 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5147 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5148 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5149 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5150 	    precision = GET_MODE_BITSIZE (mode);
5151 	}
5152     }
5153   else
5154     {
5155       gphi *phi = as_a <gphi *> (stmt_info->stmt);
5156       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5157 	{
5158 	  tree rhs = gimple_phi_arg_def (phi, i);
5159 
5160 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5161 	  if (!def_stmt_info)
5162 	    /* Don't let external or constant operands influence the choice.
5163 	       We can convert them to whichever vector type we pick.  */
5164 	    continue;
5165 
5166 	  if (def_stmt_info->mask_precision)
5167 	    {
5168 	      if (precision > def_stmt_info->mask_precision)
5169 		precision = def_stmt_info->mask_precision;
5170 	    }
5171 	}
5172     }
5173 
5174   if (dump_enabled_p ())
5175     {
5176       if (precision == ~0U)
5177 	dump_printf_loc (MSG_NOTE, vect_location,
5178 			 "using normal nonmask vectors for %G",
5179 			 stmt_info->stmt);
5180       else
5181 	dump_printf_loc (MSG_NOTE, vect_location,
5182 			 "using boolean precision %d for %G",
5183 			 precision, stmt_info->stmt);
5184     }
5185 
5186   stmt_info->mask_precision = precision;
5187 }
5188 
5189 /* Handle vect_determine_precisions for STMT_INFO, given that we
5190    have already done so for the users of its result.  */
5191 
5192 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5193 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5194 {
5195   vect_determine_min_output_precision (vinfo, stmt_info);
5196   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5197     {
5198       vect_determine_precisions_from_range (stmt_info, stmt);
5199       vect_determine_precisions_from_users (stmt_info, stmt);
5200     }
5201 }
5202 
5203 /* Walk backwards through the vectorizable region to determine the
5204    values of these fields:
5205 
5206    - min_output_precision
5207    - min_input_precision
5208    - operation_precision
5209    - operation_sign.  */
5210 
5211 void
vect_determine_precisions(vec_info * vinfo)5212 vect_determine_precisions (vec_info *vinfo)
5213 {
5214   DUMP_VECT_SCOPE ("vect_determine_precisions");
5215 
5216   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5217     {
5218       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5219       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5220       unsigned int nbbs = loop->num_nodes;
5221 
5222       for (unsigned int i = 0; i < nbbs; i++)
5223 	{
5224 	  basic_block bb = bbs[i];
5225 	  for (auto gsi = gsi_start_phis (bb);
5226 	       !gsi_end_p (gsi); gsi_next (&gsi))
5227 	    {
5228 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5229 	      if (stmt_info)
5230 		vect_determine_mask_precision (vinfo, stmt_info);
5231 	    }
5232 	  for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5233 	    if (!is_gimple_debug (gsi_stmt (si)))
5234 	      vect_determine_mask_precision
5235 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5236 	}
5237       for (unsigned int i = 0; i < nbbs; i++)
5238 	{
5239 	  basic_block bb = bbs[nbbs - i - 1];
5240 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
5241 	       !gsi_end_p (si); gsi_prev (&si))
5242 	    if (!is_gimple_debug (gsi_stmt (si)))
5243 	      vect_determine_stmt_precisions
5244 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5245 	  for (auto gsi = gsi_start_phis (bb);
5246 	       !gsi_end_p (gsi); gsi_next (&gsi))
5247 	    {
5248 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5249 	      if (stmt_info)
5250 		vect_determine_stmt_precisions (vinfo, stmt_info);
5251 	    }
5252 	}
5253     }
5254   else
5255     {
5256       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5257       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5258 	{
5259 	  basic_block bb = bb_vinfo->bbs[i];
5260 	  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5261 	    {
5262 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5263 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5264 		vect_determine_mask_precision (vinfo, stmt_info);
5265 	    }
5266 	  for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5267 	    {
5268 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5269 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5270 		vect_determine_mask_precision (vinfo, stmt_info);
5271 	    }
5272 	}
5273       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5274 	{
5275 	  for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5276 	       !gsi_end_p (gsi); gsi_prev (&gsi))
5277 	    {
5278 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5279 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5280 		vect_determine_stmt_precisions (vinfo, stmt_info);
5281 	    }
5282 	  for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5283 	       !gsi_end_p (gsi); gsi_next (&gsi))
5284 	    {
5285 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5286 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5287 		vect_determine_stmt_precisions (vinfo, stmt_info);
5288 	    }
5289 	}
5290     }
5291 }
5292 
5293 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5294 
5295 struct vect_recog_func
5296 {
5297   vect_recog_func_ptr fn;
5298   const char *name;
5299 };
5300 
5301 /* Note that ordering matters - the first pattern matching on a stmt is
5302    taken which means usually the more complex one needs to preceed the
5303    less comples onex (widen_sum only after dot_prod or sad for example).  */
5304 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5305   { vect_recog_over_widening_pattern, "over_widening" },
5306   /* Must come after over_widening, which narrows the shift as much as
5307      possible beforehand.  */
5308   { vect_recog_average_pattern, "average" },
5309   { vect_recog_mulhs_pattern, "mult_high" },
5310   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5311   { vect_recog_widen_mult_pattern, "widen_mult" },
5312   { vect_recog_dot_prod_pattern, "dot_prod" },
5313   { vect_recog_sad_pattern, "sad" },
5314   { vect_recog_widen_sum_pattern, "widen_sum" },
5315   { vect_recog_pow_pattern, "pow" },
5316   { vect_recog_widen_shift_pattern, "widen_shift" },
5317   { vect_recog_rotate_pattern, "rotate" },
5318   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5319   { vect_recog_divmod_pattern, "divmod" },
5320   { vect_recog_mult_pattern, "mult" },
5321   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5322   { vect_recog_bool_pattern, "bool" },
5323   /* This must come before mask conversion, and includes the parts
5324      of mask conversion that are needed for gather and scatter
5325      internal functions.  */
5326   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5327   { vect_recog_mask_conversion_pattern, "mask_conversion" },
5328   { vect_recog_widen_plus_pattern, "widen_plus" },
5329   { vect_recog_widen_minus_pattern, "widen_minus" },
5330 };
5331 
5332 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5333 
5334 /* Mark statements that are involved in a pattern.  */
5335 
5336 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5337 vect_mark_pattern_stmts (vec_info *vinfo,
5338 			 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5339                          tree pattern_vectype)
5340 {
5341   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5342   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5343 
5344   gimple *orig_pattern_stmt = NULL;
5345   if (is_pattern_stmt_p (orig_stmt_info))
5346     {
5347       /* We're replacing a statement in an existing pattern definition
5348 	 sequence.  */
5349       orig_pattern_stmt = orig_stmt_info->stmt;
5350       if (dump_enabled_p ())
5351 	dump_printf_loc (MSG_NOTE, vect_location,
5352 			 "replacing earlier pattern %G", orig_pattern_stmt);
5353 
5354       /* To keep the book-keeping simple, just swap the lhs of the
5355 	 old and new statements, so that the old one has a valid but
5356 	 unused lhs.  */
5357       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5358       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5359       gimple_set_lhs (pattern_stmt, old_lhs);
5360 
5361       if (dump_enabled_p ())
5362 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5363 
5364       /* Switch to the statement that ORIG replaces.  */
5365       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5366 
5367       /* We shouldn't be replacing the main pattern statement.  */
5368       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5369 		  != orig_pattern_stmt);
5370     }
5371 
5372   if (def_seq)
5373     for (gimple_stmt_iterator si = gsi_start (def_seq);
5374 	 !gsi_end_p (si); gsi_next (&si))
5375       {
5376 	if (dump_enabled_p ())
5377 	  dump_printf_loc (MSG_NOTE, vect_location,
5378 			   "extra pattern stmt: %G", gsi_stmt (si));
5379 	stmt_vec_info pattern_stmt_info
5380 	  = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5381 				    orig_stmt_info, pattern_vectype);
5382 	/* Stmts in the def sequence are not vectorizable cycle or
5383 	   induction defs, instead they should all be vect_internal_def
5384 	   feeding the main pattern stmt which retains this def type.  */
5385 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5386       }
5387 
5388   if (orig_pattern_stmt)
5389     {
5390       vect_init_pattern_stmt (vinfo, pattern_stmt,
5391 			      orig_stmt_info, pattern_vectype);
5392 
5393       /* Insert all the new pattern statements before the original one.  */
5394       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5395       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5396 					       orig_def_seq);
5397       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5398       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5399 
5400       /* Remove the pattern statement that this new pattern replaces.  */
5401       gsi_remove (&gsi, false);
5402     }
5403   else
5404     vect_set_pattern_stmt (vinfo,
5405 			   pattern_stmt, orig_stmt_info, pattern_vectype);
5406 
5407   /* Transfer reduction path info to the pattern.  */
5408   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5409     {
5410       tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5411 				1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5412       /* Search the pattern def sequence and the main pattern stmt.  Note
5413          we may have inserted all into a containing pattern def sequence
5414 	 so the following is a bit awkward.  */
5415       gimple_stmt_iterator si;
5416       gimple *s;
5417       if (def_seq)
5418 	{
5419 	  si = gsi_start (def_seq);
5420 	  s = gsi_stmt (si);
5421 	  gsi_next (&si);
5422 	}
5423       else
5424 	{
5425 	  si = gsi_none ();
5426 	  s = pattern_stmt;
5427 	}
5428       do
5429 	{
5430 	  bool found = false;
5431 	  for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5432 	    if (gimple_op (s, i) == lookfor)
5433 	      {
5434 		STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5435 		lookfor = gimple_get_lhs (s);
5436 		found = true;
5437 		break;
5438 	      }
5439 	  if (s == pattern_stmt)
5440 	    {
5441 	      if (!found && dump_enabled_p ())
5442 		dump_printf_loc (MSG_NOTE, vect_location,
5443 				 "failed to update reduction index.\n");
5444 	      break;
5445 	    }
5446 	  if (gsi_end_p (si))
5447 	    s = pattern_stmt;
5448 	  else
5449 	    {
5450 	      s = gsi_stmt (si);
5451 	      if (s == pattern_stmt)
5452 		/* Found the end inside a bigger pattern def seq.  */
5453 		si = gsi_none ();
5454 	      else
5455 		gsi_next (&si);
5456 	    }
5457 	} while (1);
5458     }
5459 }
5460 
5461 /* Function vect_pattern_recog_1
5462 
5463    Input:
5464    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5465         computation pattern.
5466    STMT_INFO: A stmt from which the pattern search should start.
5467 
5468    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5469    a sequence of statements that has the same functionality and can be
5470    used to replace STMT_INFO.  It returns the last statement in the sequence
5471    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5472    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5473    statement, having first checked that the target supports the new operation
5474    in that type.
5475 
5476    This function also does some bookkeeping, as explained in the documentation
5477    for vect_recog_pattern.  */
5478 
5479 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5480 vect_pattern_recog_1 (vec_info *vinfo,
5481 		      vect_recog_func *recog_func, stmt_vec_info stmt_info)
5482 {
5483   gimple *pattern_stmt;
5484   loop_vec_info loop_vinfo;
5485   tree pattern_vectype;
5486 
5487   /* If this statement has already been replaced with pattern statements,
5488      leave the original statement alone, since the first match wins.
5489      Instead try to match against the definition statements that feed
5490      the main pattern statement.  */
5491   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5492     {
5493       gimple_stmt_iterator gsi;
5494       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5495 	   !gsi_end_p (gsi); gsi_next (&gsi))
5496 	vect_pattern_recog_1 (vinfo, recog_func,
5497 			      vinfo->lookup_stmt (gsi_stmt (gsi)));
5498       return;
5499     }
5500 
5501   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5502   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5503   if (!pattern_stmt)
5504     {
5505       /* Clear any half-formed pattern definition sequence.  */
5506       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5507       return;
5508     }
5509 
5510   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5511   gcc_assert (pattern_vectype);
5512 
5513   /* Found a vectorizable pattern.  */
5514   if (dump_enabled_p ())
5515     dump_printf_loc (MSG_NOTE, vect_location,
5516 		     "%s pattern recognized: %G",
5517 		     recog_func->name, pattern_stmt);
5518 
5519   /* Mark the stmts that are involved in the pattern. */
5520   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5521 
5522   /* Patterns cannot be vectorized using SLP, because they change the order of
5523      computation.  */
5524   if (loop_vinfo)
5525     {
5526       unsigned ix, ix2;
5527       stmt_vec_info *elem_ptr;
5528       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5529 			     elem_ptr, *elem_ptr == stmt_info);
5530     }
5531 }
5532 
5533 
5534 /* Function vect_pattern_recog
5535 
5536    Input:
5537    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5538         computation idioms.
5539 
5540    Output - for each computation idiom that is detected we create a new stmt
5541         that provides the same functionality and that can be vectorized.  We
5542         also record some information in the struct_stmt_info of the relevant
5543         stmts, as explained below:
5544 
5545    At the entry to this function we have the following stmts, with the
5546    following initial value in the STMT_VINFO fields:
5547 
5548          stmt                     in_pattern_p  related_stmt    vec_stmt
5549          S1: a_i = ....                 -       -               -
5550          S2: a_2 = ..use(a_i)..         -       -               -
5551          S3: a_1 = ..use(a_2)..         -       -               -
5552          S4: a_0 = ..use(a_1)..         -       -               -
5553          S5: ... = ..use(a_0)..         -       -               -
5554 
5555    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5556    represented by a single stmt.  We then:
5557    - create a new stmt S6 equivalent to the pattern (the stmt is not
5558      inserted into the code)
5559    - fill in the STMT_VINFO fields as follows:
5560 
5561                                   in_pattern_p  related_stmt    vec_stmt
5562          S1: a_i = ....                 -       -               -
5563          S2: a_2 = ..use(a_i)..         -       -               -
5564          S3: a_1 = ..use(a_2)..         -       -               -
5565          S4: a_0 = ..use(a_1)..         true    S6              -
5566           '---> S6: a_new = ....        -       S4              -
5567          S5: ... = ..use(a_0)..         -       -               -
5568 
5569    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5570    to each other through the RELATED_STMT field).
5571 
5572    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5573    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5574    remain irrelevant unless used by stmts other than S4.
5575 
5576    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5577    (because they are marked as irrelevant).  It will vectorize S6, and record
5578    a pointer to the new vector stmt VS6 from S6 (as usual).
5579    S4 will be skipped, and S5 will be vectorized as usual:
5580 
5581                                   in_pattern_p  related_stmt    vec_stmt
5582          S1: a_i = ....                 -       -               -
5583          S2: a_2 = ..use(a_i)..         -       -               -
5584          S3: a_1 = ..use(a_2)..         -       -               -
5585        > VS6: va_new = ....             -       -               -
5586          S4: a_0 = ..use(a_1)..         true    S6              VS6
5587           '---> S6: a_new = ....        -       S4              VS6
5588        > VS5: ... = ..vuse(va_new)..    -       -               -
5589          S5: ... = ..use(a_0)..         -       -               -
5590 
5591    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5592    elsewhere), and we'll end up with:
5593 
5594         VS6: va_new = ....
5595         VS5: ... = ..vuse(va_new)..
5596 
5597    In case of more than one pattern statements, e.g., widen-mult with
5598    intermediate type:
5599 
5600      S1  a_t = ;
5601      S2  a_T = (TYPE) a_t;
5602            '--> S3: a_it = (interm_type) a_t;
5603      S4  prod_T = a_T * CONST;
5604            '--> S5: prod_T' = a_it w* CONST;
5605 
5606    there may be other users of a_T outside the pattern.  In that case S2 will
5607    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5608    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5609    be recorded in S3.  */
5610 
5611 void
vect_pattern_recog(vec_info * vinfo)5612 vect_pattern_recog (vec_info *vinfo)
5613 {
5614   class loop *loop;
5615   basic_block *bbs;
5616   unsigned int nbbs;
5617   gimple_stmt_iterator si;
5618   unsigned int i, j;
5619 
5620   vect_determine_precisions (vinfo);
5621 
5622   DUMP_VECT_SCOPE ("vect_pattern_recog");
5623 
5624   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5625     {
5626       loop = LOOP_VINFO_LOOP (loop_vinfo);
5627       bbs = LOOP_VINFO_BBS (loop_vinfo);
5628       nbbs = loop->num_nodes;
5629 
5630       /* Scan through the loop stmts, applying the pattern recognition
5631 	 functions starting at each stmt visited:  */
5632       for (i = 0; i < nbbs; i++)
5633 	{
5634 	  basic_block bb = bbs[i];
5635 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5636 	    {
5637 	      if (is_gimple_debug (gsi_stmt (si)))
5638 		continue;
5639 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5640 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
5641 	      for (j = 0; j < NUM_PATTERNS; j++)
5642 		vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5643 				      stmt_info);
5644 	    }
5645 	}
5646     }
5647   else
5648     {
5649       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5650       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5651 	for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5652 	     !gsi_end_p (gsi); gsi_next (&gsi))
5653 	  {
5654 	    stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5655 	    if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5656 	      continue;
5657 
5658 	    /* Scan over all generic vect_recog_xxx_pattern functions.  */
5659 	    for (j = 0; j < NUM_PATTERNS; j++)
5660 	      vect_pattern_recog_1 (vinfo,
5661 				    &vect_vect_recog_func_ptrs[j], stmt_info);
5662 	  }
5663     }
5664 
5665   /* After this no more add_stmt calls are allowed.  */
5666   vinfo->stmt_vec_info_ro = true;
5667 }
5668