1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2021 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
54 
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58   value_range_kind vr_type = get_range_info (var, min_value, max_value);
59   wide_int nonzero = get_nonzero_bits (var);
60   signop sgn = TYPE_SIGN (TREE_TYPE (var));
61   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 					 nonzero, sgn) == VR_RANGE)
63     {
64       if (dump_enabled_p ())
65 	{
66 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 	  dump_printf (MSG_NOTE, " has range [");
68 	  dump_hex (MSG_NOTE, *min_value);
69 	  dump_printf (MSG_NOTE, ", ");
70 	  dump_hex (MSG_NOTE, *max_value);
71 	  dump_printf (MSG_NOTE, "]\n");
72 	}
73       return true;
74     }
75   else
76     {
77       if (dump_enabled_p ())
78 	{
79 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 	  dump_printf (MSG_NOTE, " has no range info\n");
81 	}
82       return false;
83     }
84 }
85 
86 /* Report that we've found an instance of pattern PATTERN in
87    statement STMT.  */
88 
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92   if (dump_enabled_p ())
93     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95 
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97    return the pattern statement's stmt_vec_info.  Set its vector type to
98    VECTYPE if it doesn't have one already.  */
99 
100 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 			stmt_vec_info orig_stmt_info, tree vectype)
103 {
104   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105   if (pattern_stmt_info == NULL)
106     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108 
109   pattern_stmt_info->pattern_stmt_p = true;
110   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
114     {
115       gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 		  == vect_use_mask_type_p (orig_stmt_info));
117       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
119     }
120   return pattern_stmt_info;
121 }
122 
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125    have one already.  */
126 
127 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 		       stmt_vec_info orig_stmt_info, tree vectype)
130 {
131   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132   STMT_VINFO_RELATED_STMT (orig_stmt_info)
133     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
134 }
135 
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
137    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138    be different from the vector type of the final pattern statement.
139    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140    from which it was derived.  */
141 
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 			stmt_vec_info stmt_info, gimple *new_stmt,
145 			tree vectype = NULL_TREE,
146 			tree scalar_type_for_mask = NULL_TREE)
147 {
148   gcc_assert (!scalar_type_for_mask
149 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150   if (vectype)
151     {
152       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154       if (scalar_type_for_mask)
155 	new_stmt_info->mask_precision
156 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
157     }
158   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 				      new_stmt);
160 }
161 
162 /* The caller wants to perform new operations on vect_external variable
163    VAR, so that the result of the operations would also be vect_external.
164    Return the edge on which the operations can be performed, if one exists.
165    Return null if the operations should instead be treated as part of
166    the pattern that needs them.  */
167 
168 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)169 vect_get_external_def_edge (vec_info *vinfo, tree var)
170 {
171   edge e = NULL;
172   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
173     {
174       e = loop_preheader_edge (loop_vinfo->loop);
175       if (!SSA_NAME_IS_DEFAULT_DEF (var))
176 	{
177 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 	  if (bb == NULL
179 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 	    e = NULL;
181 	}
182     }
183   return e;
184 }
185 
186 /* Return true if the target supports a vector version of CODE,
187    where CODE is known to map to a direct optab.  ITYPE specifies
188    the type of (some of) the scalar inputs and OTYPE specifies the
189    type of the scalar result.
190 
191    If CODE allows the inputs and outputs to have different type
192    (such as for WIDEN_SUM_EXPR), it is the input mode rather
193    than the output mode that determines the appropriate target pattern.
194    Operand 0 of the target pattern then specifies the mode that the output
195    must have.
196 
197    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199    is nonnull.  */
200 
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 				 tree itype, tree *vecotype_out,
204 				 tree *vecitype_out = NULL)
205 {
206   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207   if (!vecitype)
208     return false;
209 
210   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211   if (!vecotype)
212     return false;
213 
214   optab optab = optab_for_tree_code (code, vecitype, optab_default);
215   if (!optab)
216     return false;
217 
218   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219   if (icode == CODE_FOR_nothing
220       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221     return false;
222 
223   *vecotype_out = vecotype;
224   if (vecitype_out)
225     *vecitype_out = vecitype;
226   return true;
227 }
228 
229 /* Round bit precision PRECISION up to a full element.  */
230 
231 static unsigned int
vect_element_precision(unsigned int precision)232 vect_element_precision (unsigned int precision)
233 {
234   precision = 1 << ceil_log2 (precision);
235   return MAX (precision, BITS_PER_UNIT);
236 }
237 
238 /* If OP is defined by a statement that's being considered for vectorization,
239    return information about that statement, otherwise return NULL.  */
240 
241 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)242 vect_get_internal_def (vec_info *vinfo, tree op)
243 {
244   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245   if (def_stmt_info
246       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247     return def_stmt_info;
248   return NULL;
249 }
250 
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252    is a result of a type promotion, such that:
253      DEF_STMT: NAME = NOP (name0)
254    If CHECK_SIGN is TRUE, check that either both types are signed or both are
255    unsigned.  */
256 
257 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 		   tree *orig_type, gimple **def_stmt, bool *promotion)
260 {
261   tree type = TREE_TYPE (name);
262   tree oprnd0;
263   enum vect_def_type dt;
264 
265   stmt_vec_info def_stmt_info;
266   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267     return false;
268 
269   if (dt != vect_internal_def
270       && dt != vect_external_def && dt != vect_constant_def)
271     return false;
272 
273   if (!*def_stmt)
274     return false;
275 
276   if (!is_gimple_assign (*def_stmt))
277     return false;
278 
279   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280     return false;
281 
282   oprnd0 = gimple_assign_rhs1 (*def_stmt);
283 
284   *orig_type = TREE_TYPE (oprnd0);
285   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287     return false;
288 
289   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290     *promotion = true;
291   else
292     *promotion = false;
293 
294   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295     return false;
296 
297   return true;
298 }
299 
300 /* Holds information about an input operand after some sign changes
301    and type promotions have been peeled away.  */
302 class vect_unpromoted_value {
303 public:
304   vect_unpromoted_value ();
305 
306   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
307 
308   /* The value obtained after peeling away zero or more casts.  */
309   tree op;
310 
311   /* The type of OP.  */
312   tree type;
313 
314   /* The definition type of OP.  */
315   vect_def_type dt;
316 
317   /* If OP is the result of peeling at least one cast, and if the cast
318      of OP itself is a vectorizable statement, CASTER identifies that
319      statement, otherwise it is null.  */
320   stmt_vec_info caster;
321 };
322 
vect_unpromoted_value()323 inline vect_unpromoted_value::vect_unpromoted_value ()
324   : op (NULL_TREE),
325     type (NULL_TREE),
326     dt (vect_uninitialized_def),
327     caster (NULL)
328 {
329 }
330 
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332    statement that casts it to CASTER_IN.  */
333 
334 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 			       stmt_vec_info caster_in)
337 {
338   op = op_in;
339   type = TREE_TYPE (op);
340   dt = dt_in;
341   caster = caster_in;
342 }
343 
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345    to reach some vectorizable inner operand OP', continuing as long as it
346    is possible to convert OP' back to OP using a possible sign change
347    followed by a possible promotion P.  Return this OP', or null if OP is
348    not a vectorizable SSA name.  If there is a promotion P, describe its
349    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
350    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351    have more than one user.
352 
353    A successful return means that it is possible to go from OP' to OP
354    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
355    whereas the cast from UNPROM to OP might be a promotion, a sign
356    change, or a nop.
357 
358    E.g. say we have:
359 
360        signed short *ptr = ...;
361        signed short C = *ptr;
362        unsigned short B = (unsigned short) C;    // sign change
363        signed int A = (signed int) B;            // unsigned promotion
364        ...possible other uses of A...
365        unsigned int OP = (unsigned int) A;       // sign change
366 
367    In this case it's possible to go directly from C to OP using:
368 
369        OP = (unsigned int) (unsigned short) C;
370 	    +------------+ +--------------+
371 	       promotion      sign change
372 
373    so OP' would be C.  The input to the promotion is B, so UNPROM
374    would describe B.  */
375 
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 				      vect_unpromoted_value *unprom,
379 				      bool *single_use_p = NULL)
380 {
381   tree res = NULL_TREE;
382   tree op_type = TREE_TYPE (op);
383   unsigned int orig_precision = TYPE_PRECISION (op_type);
384   unsigned int min_precision = orig_precision;
385   stmt_vec_info caster = NULL;
386   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
387     {
388       /* See whether OP is simple enough to vectorize.  */
389       stmt_vec_info def_stmt_info;
390       gimple *def_stmt;
391       vect_def_type dt;
392       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 	break;
394 
395       /* If OP is the input of a demotion, skip over it to see whether
396 	 OP is itself the result of a promotion.  If so, the combined
397 	 effect of the promotion and the demotion might fit the required
398 	 pattern, otherwise neither operation fits.
399 
400 	 This copes with cases such as the result of an arithmetic
401 	 operation being truncated before being stored, and where that
402 	 arithmetic operation has been recognized as an over-widened one.  */
403       if (TYPE_PRECISION (op_type) <= min_precision)
404 	{
405 	  /* Use OP as the UNPROM described above if we haven't yet
406 	     found a promotion, or if using the new input preserves the
407 	     sign of the previous promotion.  */
408 	  if (!res
409 	      || TYPE_PRECISION (unprom->type) == orig_precision
410 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
411 	    {
412 	      unprom->set_op (op, dt, caster);
413 	      min_precision = TYPE_PRECISION (op_type);
414 	    }
415 	  /* Stop if we've already seen a promotion and if this
416 	     conversion does more than change the sign.  */
417 	  else if (TYPE_PRECISION (op_type)
418 		   != TYPE_PRECISION (unprom->type))
419 	    break;
420 
421 	  /* The sequence now extends to OP.  */
422 	  res = op;
423 	}
424 
425       /* See whether OP is defined by a cast.  Record it as CASTER if
426 	 the cast is potentially vectorizable.  */
427       if (!def_stmt)
428 	break;
429       caster = def_stmt_info;
430 
431       /* Ignore pattern statements, since we don't link uses for them.  */
432       if (caster
433 	  && single_use_p
434 	  && !STMT_VINFO_RELATED_STMT (caster)
435 	  && !has_single_use (res))
436 	*single_use_p = false;
437 
438       gassign *assign = dyn_cast <gassign *> (def_stmt);
439       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 	break;
441 
442       /* Continue with the input to the cast.  */
443       op = gimple_assign_rhs1 (def_stmt);
444       op_type = TREE_TYPE (op);
445     }
446   return res;
447 }
448 
449 /* OP is an integer operand to an operation that returns TYPE, and we
450    want to treat the operation as a widening one.  So far we can treat
451    it as widening from *COMMON_TYPE.
452 
453    Return true if OP is suitable for such a widening operation,
454    either widening from *COMMON_TYPE or from some supertype of it.
455    Update *COMMON_TYPE to the supertype in the latter case.
456 
457    SHIFT_P is true if OP is a shift amount.  */
458 
459 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 			    tree *common_type)
462 {
463   /* Calculate the minimum precision required by OP, without changing
464      the sign of either operand.  */
465   unsigned int precision;
466   if (shift_p)
467     {
468       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 	return false;
470       precision = TREE_INT_CST_LOW (op);
471     }
472   else
473     {
474       precision = wi::min_precision (wi::to_widest (op),
475 				     TYPE_SIGN (*common_type));
476       if (precision * 2 > TYPE_PRECISION (type))
477 	return false;
478     }
479 
480   /* If OP requires a wider type, switch to that type.  The checks
481      above ensure that this is still narrower than the result.  */
482   precision = vect_element_precision (precision);
483   if (TYPE_PRECISION (*common_type) < precision)
484     *common_type = build_nonstandard_integer_type
485       (precision, TYPE_UNSIGNED (*common_type));
486   return true;
487 }
488 
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
491 
492 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
494 {
495   if (types_compatible_p (*common_type, new_type))
496     return true;
497 
498   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
499   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501     return true;
502 
503   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
504   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
506     {
507       *common_type = new_type;
508       return true;
509     }
510 
511   /* We have mismatched signs, with the signed type being
512      no wider than the unsigned type.  In this case we need
513      a wider signed type.  */
514   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 				TYPE_PRECISION (new_type));
516   precision *= 2;
517   if (precision * 2 > TYPE_PRECISION (type))
518     return false;
519 
520   *common_type = build_nonstandard_integer_type (precision, false);
521   return true;
522 }
523 
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525    in which each node either performs CODE or WIDENED_CODE, and where
526    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
527    specifies the maximum number of leaf operands.  SHIFT_P says whether
528    CODE and WIDENED_CODE are some sort of shift.
529 
530    If STMT_INFO is such a tree, return the number of leaf operands
531    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
532    to a type that (a) is narrower than the result of STMT_INFO and
533    (b) can hold all leaf operand values.
534 
535    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536    exists.  */
537 
538 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 		      tree_code widened_code, bool shift_p,
541 		      unsigned int max_nops,
542 		      vect_unpromoted_value *unprom, tree *common_type)
543 {
544   /* Check for an integer operation with the right code.  */
545   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546   if (!assign)
547     return 0;
548 
549   tree_code rhs_code = gimple_assign_rhs_code (assign);
550   if (rhs_code != code && rhs_code != widened_code)
551     return 0;
552 
553   tree type = gimple_expr_type (assign);
554   if (!INTEGRAL_TYPE_P (type))
555     return 0;
556 
557   /* Assume that both operands will be leaf operands.  */
558   max_nops -= 2;
559 
560   /* Check the operands.  */
561   unsigned int next_op = 0;
562   for (unsigned int i = 0; i < 2; ++i)
563     {
564       vect_unpromoted_value *this_unprom = &unprom[next_op];
565       unsigned int nops = 1;
566       tree op = gimple_op (assign, i + 1);
567       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
568 	{
569 	  /* We already have a common type from earlier operands.
570 	     Update it to account for OP.  */
571 	  this_unprom->set_op (op, vect_constant_def);
572 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 	    return 0;
574 	}
575       else
576 	{
577 	  /* Only allow shifts by constants.  */
578 	  if (shift_p && i == 1)
579 	    return 0;
580 
581 	  if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 	    return 0;
583 
584 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 	    {
586 	      /* The operand isn't widened.  If STMT_INFO has the code
587 		 for an unwidened operation, recursively check whether
588 		 this operand is a node of the tree.  */
589 	      if (rhs_code != code
590 		  || max_nops == 0
591 		  || this_unprom->dt != vect_internal_def)
592 		return 0;
593 
594 	      /* Give back the leaf slot allocated above now that we're
595 		 not treating this as a leaf operand.  */
596 	      max_nops += 1;
597 
598 	      /* Recursively process the definition of the operand.  */
599 	      stmt_vec_info def_stmt_info
600 		= vinfo->lookup_def (this_unprom->op);
601 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 					   widened_code, shift_p, max_nops,
603 					   this_unprom, common_type);
604 	      if (nops == 0)
605 		return 0;
606 
607 	      max_nops -= nops;
608 	    }
609 	  else
610 	    {
611 	      /* Make sure that the operand is narrower than the result.  */
612 	      if (TYPE_PRECISION (this_unprom->type) * 2
613 		  > TYPE_PRECISION (type))
614 		return 0;
615 
616 	      /* Update COMMON_TYPE for the new operand.  */
617 	      if (i == 0)
618 		*common_type = this_unprom->type;
619 	      else if (!vect_joust_widened_type (type, this_unprom->type,
620 						 common_type))
621 		return 0;
622 	    }
623 	}
624       next_op += nops;
625     }
626   return next_op;
627 }
628 
629 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
630    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631 
632 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 {
635   return make_temp_ssa_name (type, stmt, "patt");
636 }
637 
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639    followed by a version of STMT2_INFO that takes NEW_RHS as its first
640    input.  Try to do this using pattern statements, returning true on
641    success.  */
642 
643 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 		      gimple *stmt1, tree vectype)
646 {
647   if (is_pattern_stmt_p (stmt2_info))
648     {
649       /* STMT2_INFO is part of a pattern.  Get the statement to which
650 	 the pattern is attached.  */
651       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
653 
654       if (dump_enabled_p ())
655 	dump_printf_loc (MSG_NOTE, vect_location,
656 			 "Splitting pattern statement: %G", stmt2_info->stmt);
657 
658       /* Since STMT2_INFO is a pattern statement, we can change it
659 	 in-situ without worrying about changing the code for the
660 	 containing block.  */
661       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
662 
663       if (dump_enabled_p ())
664 	{
665 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 			   stmt2_info->stmt);
668 	}
669 
670       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
673 	   to the end of the definition sequence.  */
674 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
675       else
676 	{
677 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
678 	     before it.  */
679 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
681 	}
682       return true;
683     }
684   else
685     {
686       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
687 	 two-statement pattern now.  */
688       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691       if (!lhs_vectype)
692 	return false;
693 
694       if (dump_enabled_p ())
695 	dump_printf_loc (MSG_NOTE, vect_location,
696 			 "Splitting statement: %G", stmt2_info->stmt);
697 
698       /* Add STMT1 as a singleton pattern definition sequence.  */
699       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701       gimple_seq_add_stmt_without_update (def_seq, stmt1);
702 
703       /* Build the second of the two pattern statements.  */
704       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
707 
708       if (dump_enabled_p ())
709 	{
710 	  dump_printf_loc (MSG_NOTE, vect_location,
711 			   "into pattern statements: %G", stmt1);
712 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
713 	}
714 
715       return true;
716     }
717 }
718 
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720    to STMT_INFO's pattern definition statements if no better way is
721    available.  VECTYPE is the vector form of TYPE.  */
722 
723 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 		    vect_unpromoted_value *unprom, tree vectype)
726 {
727   /* Check for a no-op conversion.  */
728   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729     return unprom->op;
730 
731   /* Allow the caller to create constant vect_unpromoted_values.  */
732   if (TREE_CODE (unprom->op) == INTEGER_CST)
733     return wide_int_to_tree (type, wi::to_widest (unprom->op));
734 
735   tree input = unprom->op;
736   if (unprom->caster)
737     {
738       tree lhs = gimple_get_lhs (unprom->caster->stmt);
739       tree lhs_type = TREE_TYPE (lhs);
740 
741       /* If the result of the existing cast is the right width, use it
742 	 instead of the source of the cast.  */
743       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 	input = lhs;
745       /* If the precision we want is between the source and result
746 	 precisions of the existing cast, try splitting the cast into
747 	 two and tapping into a mid-way point.  */
748       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
750 	{
751 	  /* In order to preserve the semantics of the original cast,
752 	     give the mid-way point the same signedness as the input value.
753 
754 	     It would be possible to use a signed type here instead if
755 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 	     make the sign of the midtype sensitive to the order in
757 	     which we process the statements, since the signedness of
758 	     TYPE is the signedness required by just one of possibly
759 	     many users.  Also, unsigned promotions are usually as cheap
760 	     as or cheaper than signed ones, so it's better to keep an
761 	     unsigned promotion.  */
762 	  tree midtype = build_nonstandard_integer_type
763 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 	  if (vec_midtype)
766 	    {
767 	      input = vect_recog_temp_ssa_var (midtype, NULL);
768 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 						       unprom->op);
770 	      if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 					 vec_midtype))
772 		append_pattern_def_seq (vinfo, stmt_info,
773 					new_stmt, vec_midtype);
774 	    }
775 	}
776 
777       /* See if we can reuse an existing result.  */
778       if (types_compatible_p (type, TREE_TYPE (input)))
779 	return input;
780     }
781 
782   /* We need a new conversion statement.  */
783   tree new_op = vect_recog_temp_ssa_var (type, NULL);
784   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
785 
786   /* If OP is an external value, see if we can insert the new statement
787      on an incoming edge.  */
788   if (input == unprom->op && unprom->dt == vect_external_def)
789     if (edge e = vect_get_external_def_edge (vinfo, input))
790       {
791 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 	gcc_assert (!new_bb);
793 	return new_op;
794       }
795 
796   /* As a (common) last resort, add the statement to the pattern itself.  */
797   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798   return new_op;
799 }
800 
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802    result in the corresponding elements of RESULT.  */
803 
804 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 		     tree *result, tree type, vect_unpromoted_value *unprom,
807 		     tree vectype)
808 {
809   for (unsigned int i = 0; i < n; ++i)
810     {
811       unsigned int j;
812       for (j = 0; j < i; ++j)
813 	if (unprom[j].op == unprom[i].op)
814 	  break;
815       if (j < i)
816 	result[i] = result[j];
817       else
818 	result[i] = vect_convert_input (vinfo, stmt_info,
819 					type, &unprom[i], vectype);
820     }
821 }
822 
823 /* The caller has created a (possibly empty) sequence of pattern definition
824    statements followed by a single statement PATTERN_STMT.  Cast the result
825    of this final statement to TYPE.  If a new statement is needed, add
826    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827    and return the new statement, otherwise return PATTERN_STMT as-is.
828    VECITYPE is the vector form of PATTERN_STMT's result type.  */
829 
830 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 		     gimple *pattern_stmt, tree vecitype)
833 {
834   tree lhs = gimple_get_lhs (pattern_stmt);
835   if (!types_compatible_p (type, TREE_TYPE (lhs)))
836     {
837       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
840     }
841   return pattern_stmt;
842 }
843 
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845    is allowed.  If STMT_INFO is part of a group, assume that it's part of
846    a reduction chain and optimistically assume that all statements
847    except the last allow reassociation.
848    Also require it to have code CODE and to be a reduction
849    in the outermost loop.  When returning true, store the operands in
850    *OP0_OUT and *OP1_OUT.  */
851 
852 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)853 vect_reassociating_reduction_p (vec_info *vinfo,
854 				stmt_vec_info stmt_info, tree_code code,
855 				tree *op0_out, tree *op1_out)
856 {
857   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858   if (!loop_info)
859     return false;
860 
861   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862   if (!assign || gimple_assign_rhs_code (assign) != code)
863     return false;
864 
865   /* We don't allow changing the order of the computation in the inner-loop
866      when doing outer-loop vectorization.  */
867   class loop *loop = LOOP_VINFO_LOOP (loop_info);
868   if (loop && nested_in_vect_loop_p (loop, stmt_info))
869     return false;
870 
871   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
872     {
873       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 				       code))
875 	return false;
876     }
877   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878     return false;
879 
880   *op0_out = gimple_assign_rhs1 (assign);
881   *op1_out = gimple_assign_rhs2 (assign);
882   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883     std::swap (*op0_out, *op1_out);
884   return true;
885 }
886 
887 /* Function vect_recog_dot_prod_pattern
888 
889    Try to find the following pattern:
890 
891      type x_t, y_t;
892      TYPE1 prod;
893      TYPE2 sum = init;
894    loop:
895      sum_0 = phi <init, sum_1>
896      S1  x_t = ...
897      S2  y_t = ...
898      S3  x_T = (TYPE1) x_t;
899      S4  y_T = (TYPE1) y_t;
900      S5  prod = x_T * y_T;
901      [S6  prod = (TYPE2) prod;  #optional]
902      S7  sum_1 = prod + sum_0;
903 
904    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905    same size of 'TYPE1' or bigger. This is a special case of a reduction
906    computation.
907 
908    Input:
909 
910    * STMT_VINFO: The stmt from which the pattern search begins.  In the
911    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912    will be detected.
913 
914    Output:
915 
916    * TYPE_OUT: The type of the output  of this pattern.
917 
918    * Return value: A new stmt that will be used to replace the sequence of
919    stmts that constitute the pattern. In this case it will be:
920         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
921 
922    Note: The dot-prod idiom is a widening reduction pattern that is
923          vectorized without preserving all the intermediate results. It
924          produces only N/2 (widened) results (by summing up pairs of
925          intermediate results) rather than all N results.  Therefore, we
926          cannot allow this pattern when we want to get all the results and in
927          the correct order (as is the case when this computation is in an
928          inner-loop nested in an outer-loop that us being vectorized).  */
929 
930 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 			     stmt_vec_info stmt_vinfo, tree *type_out)
933 {
934   tree oprnd0, oprnd1;
935   gimple *last_stmt = stmt_vinfo->stmt;
936   tree type, half_type;
937   gimple *pattern_stmt;
938   tree var;
939 
940   /* Look for the following pattern
941           DX = (TYPE1) X;
942           DY = (TYPE1) Y;
943           DPROD = DX * DY;
944           DDPROD = (TYPE2) DPROD;
945           sum_1 = DDPROD + sum_0;
946      In which
947      - DX is double the size of X
948      - DY is double the size of Y
949      - DX, DY, DPROD all have the same type
950      - sum is the same size of DPROD or bigger
951      - sum has been recognized as a reduction variable.
952 
953      This is equivalent to:
954        DPROD = X w* Y;          #widen mult
955        sum_1 = DPROD w+ sum_0;  #widen summation
956      or
957        DPROD = X w* Y;          #widen mult
958        sum_1 = DPROD + sum_0;   #summation
959    */
960 
961   /* Starting from LAST_STMT, follow the defs of its uses in search
962      of the above pattern.  */
963 
964   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 				       &oprnd0, &oprnd1))
966     return NULL;
967 
968   type = gimple_expr_type (last_stmt);
969 
970   vect_unpromoted_value unprom_mult;
971   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
972 
973   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
974      we know that oprnd1 is the reduction variable (defined by a loop-header
975      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
977   if (!oprnd0)
978     return NULL;
979 
980   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981   if (!mult_vinfo)
982     return NULL;
983 
984   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
985      inside the loop (in case we are analyzing an outer-loop).  */
986   vect_unpromoted_value unprom0[2];
987   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 			     false, 2, unprom0, &half_type))
989     return NULL;
990 
991   /* If there are two widening operations, make sure they agree on
992      the sign of the extension.  */
993   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995     return NULL;
996 
997   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
998 
999   tree half_vectype;
1000   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 					type_out, &half_vectype))
1002     return NULL;
1003 
1004   /* Get the inputs in the appropriate types.  */
1005   tree mult_oprnd[2];
1006   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 		       unprom0, half_vectype);
1008 
1009   var = vect_recog_temp_ssa_var (type, NULL);
1010   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
1012 
1013   return pattern_stmt;
1014 }
1015 
1016 
1017 /* Function vect_recog_sad_pattern
1018 
1019    Try to find the following Sum of Absolute Difference (SAD) pattern:
1020 
1021      type x_t, y_t;
1022      signed TYPE1 diff, abs_diff;
1023      TYPE2 sum = init;
1024    loop:
1025      sum_0 = phi <init, sum_1>
1026      S1  x_t = ...
1027      S2  y_t = ...
1028      S3  x_T = (TYPE1) x_t;
1029      S4  y_T = (TYPE1) y_t;
1030      S5  diff = x_T - y_T;
1031      S6  abs_diff = ABS_EXPR <diff>;
1032      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1033      S8  sum_1 = abs_diff + sum_0;
1034 
1035    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036    same size of 'TYPE1' or bigger. This is a special case of a reduction
1037    computation.
1038 
1039    Input:
1040 
1041    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1042    example, when this function is called with S8, the pattern
1043    {S3,S4,S5,S6,S7,S8} will be detected.
1044 
1045    Output:
1046 
1047    * TYPE_OUT: The type of the output of this pattern.
1048 
1049    * Return value: A new stmt that will be used to replace the sequence of
1050    stmts that constitute the pattern. In this case it will be:
1051         SAD_EXPR <x_t, y_t, sum_0>
1052   */
1053 
1054 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 			stmt_vec_info stmt_vinfo, tree *type_out)
1057 {
1058   gimple *last_stmt = stmt_vinfo->stmt;
1059   tree half_type;
1060 
1061   /* Look for the following pattern
1062           DX = (TYPE1) X;
1063           DY = (TYPE1) Y;
1064           DDIFF = DX - DY;
1065           DAD = ABS_EXPR <DDIFF>;
1066           DDPROD = (TYPE2) DPROD;
1067           sum_1 = DAD + sum_0;
1068      In which
1069      - DX is at least double the size of X
1070      - DY is at least double the size of Y
1071      - DX, DY, DDIFF, DAD all have the same type
1072      - sum is the same size of DAD or bigger
1073      - sum has been recognized as a reduction variable.
1074 
1075      This is equivalent to:
1076        DDIFF = X w- Y;          #widen sub
1077        DAD = ABS_EXPR <DDIFF>;
1078        sum_1 = DAD w+ sum_0;    #widen summation
1079      or
1080        DDIFF = X w- Y;          #widen sub
1081        DAD = ABS_EXPR <DDIFF>;
1082        sum_1 = DAD + sum_0;     #summation
1083    */
1084 
1085   /* Starting from LAST_STMT, follow the defs of its uses in search
1086      of the above pattern.  */
1087 
1088   tree plus_oprnd0, plus_oprnd1;
1089   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 				       &plus_oprnd0, &plus_oprnd1))
1091     return NULL;
1092 
1093   tree sum_type = gimple_expr_type (last_stmt);
1094 
1095   /* Any non-truncating sequence of conversions is OK here, since
1096      with a successful match, the result of the ABS(U) is known to fit
1097      within the nonnegative range of the result type.  (It cannot be the
1098      negative of the minimum signed value due to the range of the widening
1099      MINUS_EXPR.)  */
1100   vect_unpromoted_value unprom_abs;
1101   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 						      &unprom_abs);
1103 
1104   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1105      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107      Then check that plus_oprnd0 is defined by an abs_expr.  */
1108 
1109   if (!plus_oprnd0)
1110     return NULL;
1111 
1112   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113   if (!abs_stmt_vinfo)
1114     return NULL;
1115 
1116   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1117      inside the loop (in case we are analyzing an outer-loop).  */
1118   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119   if (!abs_stmt
1120       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122     return NULL;
1123 
1124   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125   tree abs_type = TREE_TYPE (abs_oprnd);
1126   if (TYPE_UNSIGNED (abs_type))
1127     return NULL;
1128 
1129   /* Peel off conversions from the ABS input.  This can involve sign
1130      changes (e.g. from an unsigned subtraction to a signed ABS input)
1131      or signed promotion, but it can't include unsigned promotion.
1132      (Note that ABS of an unsigned promotion should have been folded
1133      away before now anyway.)  */
1134   vect_unpromoted_value unprom_diff;
1135   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 						    &unprom_diff);
1137   if (!abs_oprnd)
1138     return NULL;
1139   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140       && TYPE_UNSIGNED (unprom_diff.type))
1141     return NULL;
1142 
1143   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1144   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145   if (!diff_stmt_vinfo)
1146     return NULL;
1147 
1148   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1149      inside the loop (in case we are analyzing an outer-loop).  */
1150   vect_unpromoted_value unprom[2];
1151   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1152 			     false, 2, unprom, &half_type))
1153     return NULL;
1154 
1155   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1156 
1157   tree half_vectype;
1158   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 					type_out, &half_vectype))
1160     return NULL;
1161 
1162   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1163   tree sad_oprnd[2];
1164   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 		       unprom, half_vectype);
1166 
1167   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 					      sad_oprnd[1], plus_oprnd1);
1170 
1171   return pattern_stmt;
1172 }
1173 
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175    so that it can be treated as though it had the form:
1176 
1177       A_TYPE a;
1178       B_TYPE b;
1179       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1180       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1181     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1182     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1183     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1184 
1185    Try to replace the pattern with:
1186 
1187       A_TYPE a;
1188       B_TYPE b;
1189       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1190       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1191     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1193 
1194    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1195 
1196    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1197    name of the pattern being matched, for dump purposes.  */
1198 
1199 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 			     stmt_vec_info last_stmt_info, tree *type_out,
1202 			     tree_code orig_code, tree_code wide_code,
1203 			     bool shift_p, const char *name)
1204 {
1205   gimple *last_stmt = last_stmt_info->stmt;
1206 
1207   vect_unpromoted_value unprom[2];
1208   tree half_type;
1209   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 			     shift_p, 2, unprom, &half_type))
1211     return NULL;
1212 
1213   /* Pattern detected.  */
1214   vect_pattern_detected (name, last_stmt);
1215 
1216   tree type = gimple_expr_type (last_stmt);
1217   tree itype = type;
1218   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 					    TYPE_UNSIGNED (half_type));
1222 
1223   /* Check target support  */
1224   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226   enum tree_code dummy_code;
1227   int dummy_int;
1228   auto_vec<tree> dummy_vec;
1229   if (!vectype
1230       || !vecitype
1231       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1232 					  vecitype, vectype,
1233 					  &dummy_code, &dummy_code,
1234 					  &dummy_int, &dummy_vec))
1235     return NULL;
1236 
1237   *type_out = get_vectype_for_scalar_type (vinfo, type);
1238   if (!*type_out)
1239     return NULL;
1240 
1241   tree oprnd[2];
1242   vect_convert_inputs (vinfo, last_stmt_info,
1243 		       2, oprnd, half_type, unprom, vectype);
1244 
1245   tree var = vect_recog_temp_ssa_var (itype, NULL);
1246   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1247 					      oprnd[0], oprnd[1]);
1248 
1249   return vect_convert_output (vinfo, last_stmt_info,
1250 			      type, pattern_stmt, vecitype);
1251 }
1252 
1253 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1254    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1255 
1256 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1257 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1258 			       tree *type_out)
1259 {
1260   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1261 				      MULT_EXPR, WIDEN_MULT_EXPR, false,
1262 				      "vect_recog_widen_mult_pattern");
1263 }
1264 
1265 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1266    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1267 
1268 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1269 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1270 			       tree *type_out)
1271 {
1272   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1273 				      PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1274 				      "vect_recog_widen_plus_pattern");
1275 }
1276 
1277 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1278    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1279 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1280 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1281 			       tree *type_out)
1282 {
1283   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1284 				      MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1285 				      "vect_recog_widen_minus_pattern");
1286 }
1287 
1288 /* Function vect_recog_pow_pattern
1289 
1290    Try to find the following pattern:
1291 
1292      x = POW (y, N);
1293 
1294    with POW being one of pow, powf, powi, powif and N being
1295    either 2 or 0.5.
1296 
1297    Input:
1298 
1299    * STMT_VINFO: The stmt from which the pattern search begins.
1300 
1301    Output:
1302 
1303    * TYPE_OUT: The type of the output of this pattern.
1304 
1305    * Return value: A new stmt that will be used to replace the sequence of
1306    stmts that constitute the pattern. In this case it will be:
1307         x = x * x
1308    or
1309 	x = sqrt (x)
1310 */
1311 
1312 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1313 vect_recog_pow_pattern (vec_info *vinfo,
1314 			stmt_vec_info stmt_vinfo, tree *type_out)
1315 {
1316   gimple *last_stmt = stmt_vinfo->stmt;
1317   tree base, exp;
1318   gimple *stmt;
1319   tree var;
1320 
1321   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1322     return NULL;
1323 
1324   switch (gimple_call_combined_fn (last_stmt))
1325     {
1326     CASE_CFN_POW:
1327     CASE_CFN_POWI:
1328       break;
1329 
1330     default:
1331       return NULL;
1332     }
1333 
1334   base = gimple_call_arg (last_stmt, 0);
1335   exp = gimple_call_arg (last_stmt, 1);
1336   if (TREE_CODE (exp) != REAL_CST
1337       && TREE_CODE (exp) != INTEGER_CST)
1338     {
1339       if (flag_unsafe_math_optimizations
1340 	  && TREE_CODE (base) == REAL_CST
1341 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1342 	{
1343 	  combined_fn log_cfn;
1344 	  built_in_function exp_bfn;
1345 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1346 	    {
1347 	    case BUILT_IN_POW:
1348 	      log_cfn = CFN_BUILT_IN_LOG;
1349 	      exp_bfn = BUILT_IN_EXP;
1350 	      break;
1351 	    case BUILT_IN_POWF:
1352 	      log_cfn = CFN_BUILT_IN_LOGF;
1353 	      exp_bfn = BUILT_IN_EXPF;
1354 	      break;
1355 	    case BUILT_IN_POWL:
1356 	      log_cfn = CFN_BUILT_IN_LOGL;
1357 	      exp_bfn = BUILT_IN_EXPL;
1358 	      break;
1359 	    default:
1360 	      return NULL;
1361 	    }
1362 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1363 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1364 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1365 	     does that, but if C is a power of 2, we want to use
1366 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1367 	     vectorization we don't have vectorized exp2.  */
1368 	  if (logc
1369 	      && TREE_CODE (logc) == REAL_CST
1370 	      && exp_decl
1371 	      && lookup_attribute ("omp declare simd",
1372 				   DECL_ATTRIBUTES (exp_decl)))
1373 	    {
1374 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1375 	      if (node->simd_clones == NULL)
1376 		{
1377 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1378 		      || node->definition)
1379 		    return NULL;
1380 		  expand_simd_clones (node);
1381 		  if (node->simd_clones == NULL)
1382 		    return NULL;
1383 		}
1384 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1385 	      if (!*type_out)
1386 		return NULL;
1387 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1388 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1389 	      append_pattern_def_seq (vinfo, stmt_vinfo, g);
1390 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1391 	      g = gimple_build_call (exp_decl, 1, def);
1392 	      gimple_call_set_lhs (g, res);
1393 	      return g;
1394 	    }
1395 	}
1396 
1397       return NULL;
1398     }
1399 
1400   /* We now have a pow or powi builtin function call with a constant
1401      exponent.  */
1402 
1403   /* Catch squaring.  */
1404   if ((tree_fits_shwi_p (exp)
1405        && tree_to_shwi (exp) == 2)
1406       || (TREE_CODE (exp) == REAL_CST
1407           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1408     {
1409       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1410 					    TREE_TYPE (base), type_out))
1411 	return NULL;
1412 
1413       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1414       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1415       return stmt;
1416     }
1417 
1418   /* Catch square root.  */
1419   if (TREE_CODE (exp) == REAL_CST
1420       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1421     {
1422       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1423       if (*type_out
1424 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1425 					     OPTIMIZE_FOR_SPEED))
1426 	{
1427 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1428 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1429 	  gimple_call_set_lhs (stmt, var);
1430 	  gimple_call_set_nothrow (stmt, true);
1431 	  return stmt;
1432 	}
1433     }
1434 
1435   return NULL;
1436 }
1437 
1438 
1439 /* Function vect_recog_widen_sum_pattern
1440 
1441    Try to find the following pattern:
1442 
1443      type x_t;
1444      TYPE x_T, sum = init;
1445    loop:
1446      sum_0 = phi <init, sum_1>
1447      S1  x_t = *p;
1448      S2  x_T = (TYPE) x_t;
1449      S3  sum_1 = x_T + sum_0;
1450 
1451    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1452    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1453    a special case of a reduction computation.
1454 
1455    Input:
1456 
1457    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1458    when this function is called with S3, the pattern {S2,S3} will be detected.
1459 
1460    Output:
1461 
1462    * TYPE_OUT: The type of the output of this pattern.
1463 
1464    * Return value: A new stmt that will be used to replace the sequence of
1465    stmts that constitute the pattern. In this case it will be:
1466         WIDEN_SUM <x_t, sum_0>
1467 
1468    Note: The widening-sum idiom is a widening reduction pattern that is
1469 	 vectorized without preserving all the intermediate results. It
1470          produces only N/2 (widened) results (by summing up pairs of
1471 	 intermediate results) rather than all N results.  Therefore, we
1472 	 cannot allow this pattern when we want to get all the results and in
1473 	 the correct order (as is the case when this computation is in an
1474 	 inner-loop nested in an outer-loop that us being vectorized).  */
1475 
1476 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1477 vect_recog_widen_sum_pattern (vec_info *vinfo,
1478 			      stmt_vec_info stmt_vinfo, tree *type_out)
1479 {
1480   gimple *last_stmt = stmt_vinfo->stmt;
1481   tree oprnd0, oprnd1;
1482   tree type;
1483   gimple *pattern_stmt;
1484   tree var;
1485 
1486   /* Look for the following pattern
1487           DX = (TYPE) X;
1488           sum_1 = DX + sum_0;
1489      In which DX is at least double the size of X, and sum_1 has been
1490      recognized as a reduction variable.
1491    */
1492 
1493   /* Starting from LAST_STMT, follow the defs of its uses in search
1494      of the above pattern.  */
1495 
1496   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1497 				       &oprnd0, &oprnd1))
1498     return NULL;
1499 
1500   type = gimple_expr_type (last_stmt);
1501 
1502   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1503      we know that oprnd1 is the reduction variable (defined by a loop-header
1504      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1505      Left to check that oprnd0 is defined by a cast from type 'type' to type
1506      'TYPE'.  */
1507 
1508   vect_unpromoted_value unprom0;
1509   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1510       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1511     return NULL;
1512 
1513   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1514 
1515   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1516 					unprom0.type, type_out))
1517     return NULL;
1518 
1519   var = vect_recog_temp_ssa_var (type, NULL);
1520   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1521 
1522   return pattern_stmt;
1523 }
1524 
1525 /* Recognize cases in which an operation is performed in one type WTYPE
1526    but could be done more efficiently in a narrower type NTYPE.  For example,
1527    if we have:
1528 
1529      ATYPE a;  // narrower than NTYPE
1530      BTYPE b;  // narrower than NTYPE
1531      WTYPE aw = (WTYPE) a;
1532      WTYPE bw = (WTYPE) b;
1533      WTYPE res = aw + bw;  // only uses of aw and bw
1534 
1535    then it would be more efficient to do:
1536 
1537      NTYPE an = (NTYPE) a;
1538      NTYPE bn = (NTYPE) b;
1539      NTYPE resn = an + bn;
1540      WTYPE res = (WTYPE) resn;
1541 
1542    Other situations include things like:
1543 
1544      ATYPE a;  // NTYPE or narrower
1545      WTYPE aw = (WTYPE) a;
1546      WTYPE res = aw + b;
1547 
1548    when only "(NTYPE) res" is significant.  In that case it's more efficient
1549    to truncate "b" and do the operation on NTYPE instead:
1550 
1551      NTYPE an = (NTYPE) a;
1552      NTYPE bn = (NTYPE) b;  // truncation
1553      NTYPE resn = an + bn;
1554      WTYPE res = (WTYPE) resn;
1555 
1556    All users of "res" should then use "resn" instead, making the final
1557    statement dead (not marked as relevant).  The final statement is still
1558    needed to maintain the type correctness of the IR.
1559 
1560    vect_determine_precisions has already determined the minimum
1561    precison of the operation and the minimum precision required
1562    by users of the result.  */
1563 
1564 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1565 vect_recog_over_widening_pattern (vec_info *vinfo,
1566 				  stmt_vec_info last_stmt_info, tree *type_out)
1567 {
1568   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1569   if (!last_stmt)
1570     return NULL;
1571 
1572   /* See whether we have found that this operation can be done on a
1573      narrower type without changing its semantics.  */
1574   unsigned int new_precision = last_stmt_info->operation_precision;
1575   if (!new_precision)
1576     return NULL;
1577 
1578   tree lhs = gimple_assign_lhs (last_stmt);
1579   tree type = TREE_TYPE (lhs);
1580   tree_code code = gimple_assign_rhs_code (last_stmt);
1581 
1582   /* Punt for reductions where we don't handle the type conversions.  */
1583   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1584     return NULL;
1585 
1586   /* Keep the first operand of a COND_EXPR as-is: only the other two
1587      operands are interesting.  */
1588   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1589 
1590   /* Check the operands.  */
1591   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1592   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1593   unprom.quick_grow (nops);
1594   unsigned int min_precision = 0;
1595   bool single_use_p = false;
1596   for (unsigned int i = 0; i < nops; ++i)
1597     {
1598       tree op = gimple_op (last_stmt, first_op + i);
1599       if (TREE_CODE (op) == INTEGER_CST)
1600 	unprom[i].set_op (op, vect_constant_def);
1601       else if (TREE_CODE (op) == SSA_NAME)
1602 	{
1603 	  bool op_single_use_p = true;
1604 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1605 						     &op_single_use_p))
1606 	    return NULL;
1607 	  /* If:
1608 
1609 	     (1) N bits of the result are needed;
1610 	     (2) all inputs are widened from M<N bits; and
1611 	     (3) one operand OP is a single-use SSA name
1612 
1613 	     we can shift the M->N widening from OP to the output
1614 	     without changing the number or type of extensions involved.
1615 	     This then reduces the number of copies of STMT_INFO.
1616 
1617 	     If instead of (3) more than one operand is a single-use SSA name,
1618 	     shifting the extension to the output is even more of a win.
1619 
1620 	     If instead:
1621 
1622 	     (1) N bits of the result are needed;
1623 	     (2) one operand OP2 is widened from M2<N bits;
1624 	     (3) another operand OP1 is widened from M1<M2 bits; and
1625 	     (4) both OP1 and OP2 are single-use
1626 
1627 	     the choice is between:
1628 
1629 	     (a) truncating OP2 to M1, doing the operation on M1,
1630 		 and then widening the result to N
1631 
1632 	     (b) widening OP1 to M2, doing the operation on M2, and then
1633 		 widening the result to N
1634 
1635 	     Both shift the M2->N widening of the inputs to the output.
1636 	     (a) additionally shifts the M1->M2 widening to the output;
1637 	     it requires fewer copies of STMT_INFO but requires an extra
1638 	     M2->M1 truncation.
1639 
1640 	     Which is better will depend on the complexity and cost of
1641 	     STMT_INFO, which is hard to predict at this stage.  However,
1642 	     a clear tie-breaker in favor of (b) is the fact that the
1643 	     truncation in (a) increases the length of the operation chain.
1644 
1645 	     If instead of (4) only one of OP1 or OP2 is single-use,
1646 	     (b) is still a win over doing the operation in N bits:
1647 	     it still shifts the M2->N widening on the single-use operand
1648 	     to the output and reduces the number of STMT_INFO copies.
1649 
1650 	     If neither operand is single-use then operating on fewer than
1651 	     N bits might lead to more extensions overall.  Whether it does
1652 	     or not depends on global information about the vectorization
1653 	     region, and whether that's a good trade-off would again
1654 	     depend on the complexity and cost of the statements involved,
1655 	     as well as things like register pressure that are not normally
1656 	     modelled at this stage.  We therefore ignore these cases
1657 	     and just optimize the clear single-use wins above.
1658 
1659 	     Thus we take the maximum precision of the unpromoted operands
1660 	     and record whether any operand is single-use.  */
1661 	  if (unprom[i].dt == vect_internal_def)
1662 	    {
1663 	      min_precision = MAX (min_precision,
1664 				   TYPE_PRECISION (unprom[i].type));
1665 	      single_use_p |= op_single_use_p;
1666 	    }
1667 	}
1668       else
1669 	return NULL;
1670     }
1671 
1672   /* Although the operation could be done in operation_precision, we have
1673      to balance that against introducing extra truncations or extensions.
1674      Calculate the minimum precision that can be handled efficiently.
1675 
1676      The loop above determined that the operation could be handled
1677      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1678      extension from the inputs to the output without introducing more
1679      instructions, and would reduce the number of instructions required
1680      for STMT_INFO itself.
1681 
1682      vect_determine_precisions has also determined that the result only
1683      needs min_output_precision bits.  Truncating by a factor of N times
1684      requires a tree of N - 1 instructions, so if TYPE is N times wider
1685      than min_output_precision, doing the operation in TYPE and truncating
1686      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1687      In contrast:
1688 
1689      - truncating the input to a unary operation and doing the operation
1690        in the new type requires at most N - 1 + 1 = N instructions per
1691        output vector
1692 
1693      - doing the same for a binary operation requires at most
1694        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1695 
1696      Both unary and binary operations require fewer instructions than
1697      this if the operands were extended from a suitable truncated form.
1698      Thus there is usually nothing to lose by doing operations in
1699      min_output_precision bits, but there can be something to gain.  */
1700   if (!single_use_p)
1701     min_precision = last_stmt_info->min_output_precision;
1702   else
1703     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1704 
1705   /* Apply the minimum efficient precision we just calculated.  */
1706   if (new_precision < min_precision)
1707     new_precision = min_precision;
1708   new_precision = vect_element_precision (new_precision);
1709   if (new_precision >= TYPE_PRECISION (type))
1710     return NULL;
1711 
1712   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1713 
1714   *type_out = get_vectype_for_scalar_type (vinfo, type);
1715   if (!*type_out)
1716     return NULL;
1717 
1718   /* We've found a viable pattern.  Get the new type of the operation.  */
1719   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1720   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1721 
1722   /* If we're truncating an operation, we need to make sure that we
1723      don't introduce new undefined overflow.  The codes tested here are
1724      a subset of those accepted by vect_truncatable_operation_p.  */
1725   tree op_type = new_type;
1726   if (TYPE_OVERFLOW_UNDEFINED (new_type)
1727       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1728     op_type = build_nonstandard_integer_type (new_precision, true);
1729 
1730   /* We specifically don't check here whether the target supports the
1731      new operation, since it might be something that a later pattern
1732      wants to rewrite anyway.  If targets have a minimum element size
1733      for some optabs, we should pattern-match smaller ops to larger ops
1734      where beneficial.  */
1735   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1736   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1737   if (!new_vectype || !op_vectype)
1738     return NULL;
1739 
1740   if (dump_enabled_p ())
1741     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1742 		     type, new_type);
1743 
1744   /* Calculate the rhs operands for an operation on OP_TYPE.  */
1745   tree ops[3] = {};
1746   for (unsigned int i = 1; i < first_op; ++i)
1747     ops[i - 1] = gimple_op (last_stmt, i);
1748   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1749 		       op_type, &unprom[0], op_vectype);
1750 
1751   /* Use the operation to produce a result of type OP_TYPE.  */
1752   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1753   gimple *pattern_stmt = gimple_build_assign (new_var, code,
1754 					      ops[0], ops[1], ops[2]);
1755   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1756 
1757   if (dump_enabled_p ())
1758     dump_printf_loc (MSG_NOTE, vect_location,
1759 		     "created pattern stmt: %G", pattern_stmt);
1760 
1761   /* Convert back to the original signedness, if OP_TYPE is different
1762      from NEW_TYPE.  */
1763   if (op_type != new_type)
1764     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1765 					pattern_stmt, op_vectype);
1766 
1767   /* Promote the result to the original type.  */
1768   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1769 				      pattern_stmt, new_vectype);
1770 
1771   return pattern_stmt;
1772 }
1773 
1774 /* Recognize the following patterns:
1775 
1776      ATYPE a;  // narrower than TYPE
1777      BTYPE b;  // narrower than TYPE
1778 
1779    1) Multiply high with scaling
1780      TYPE res = ((TYPE) a * (TYPE) b) >> c;
1781    2) ... or also with rounding
1782      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1783 
1784    where only the bottom half of res is used.  */
1785 
1786 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1787 vect_recog_mulhs_pattern (vec_info *vinfo,
1788 			  stmt_vec_info last_stmt_info, tree *type_out)
1789 {
1790   /* Check for a right shift.  */
1791   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1792   if (!last_stmt
1793       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1794     return NULL;
1795 
1796   /* Check that the shift result is wider than the users of the
1797      result need (i.e. that narrowing would be a natural choice).  */
1798   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1799   unsigned int target_precision
1800     = vect_element_precision (last_stmt_info->min_output_precision);
1801   if (!INTEGRAL_TYPE_P (lhs_type)
1802       || target_precision >= TYPE_PRECISION (lhs_type))
1803     return NULL;
1804 
1805   /* Look through any change in sign on the outer shift input.  */
1806   vect_unpromoted_value unprom_rshift_input;
1807   tree rshift_input = vect_look_through_possible_promotion
1808     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1809   if (!rshift_input
1810       || TYPE_PRECISION (TREE_TYPE (rshift_input))
1811 	   != TYPE_PRECISION (lhs_type))
1812     return NULL;
1813 
1814   /* Get the definition of the shift input.  */
1815   stmt_vec_info rshift_input_stmt_info
1816     = vect_get_internal_def (vinfo, rshift_input);
1817   if (!rshift_input_stmt_info)
1818     return NULL;
1819   gassign *rshift_input_stmt
1820     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1821   if (!rshift_input_stmt)
1822     return NULL;
1823 
1824   stmt_vec_info mulh_stmt_info;
1825   tree scale_term;
1826   internal_fn ifn;
1827   unsigned int expect_offset;
1828 
1829   /* Check for the presence of the rounding term.  */
1830   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1831     {
1832       /* Check that the outer shift was by 1.  */
1833       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1834 	return NULL;
1835 
1836       /* Check that the second operand of the PLUS_EXPR is 1.  */
1837       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1838 	return NULL;
1839 
1840       /* Look through any change in sign on the addition input.  */
1841       vect_unpromoted_value unprom_plus_input;
1842       tree plus_input = vect_look_through_possible_promotion
1843 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1844       if (!plus_input
1845 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
1846 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
1847 	return NULL;
1848 
1849       /* Get the definition of the multiply-high-scale part.  */
1850       stmt_vec_info plus_input_stmt_info
1851 	= vect_get_internal_def (vinfo, plus_input);
1852       if (!plus_input_stmt_info)
1853 	return NULL;
1854       gassign *plus_input_stmt
1855 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1856       if (!plus_input_stmt
1857 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1858 	return NULL;
1859 
1860       /* Look through any change in sign on the scaling input.  */
1861       vect_unpromoted_value unprom_scale_input;
1862       tree scale_input = vect_look_through_possible_promotion
1863 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1864       if (!scale_input
1865 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
1866 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
1867 	return NULL;
1868 
1869       /* Get the definition of the multiply-high part.  */
1870       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1871       if (!mulh_stmt_info)
1872 	return NULL;
1873 
1874       /* Get the scaling term.  */
1875       scale_term = gimple_assign_rhs2 (plus_input_stmt);
1876 
1877       expect_offset = target_precision + 2;
1878       ifn = IFN_MULHRS;
1879     }
1880   else
1881     {
1882       mulh_stmt_info = rshift_input_stmt_info;
1883       scale_term = gimple_assign_rhs2 (last_stmt);
1884 
1885       expect_offset = target_precision + 1;
1886       ifn = IFN_MULHS;
1887     }
1888 
1889   /* Check that the scaling factor is correct.  */
1890   if (TREE_CODE (scale_term) != INTEGER_CST
1891       || wi::to_widest (scale_term) + expect_offset
1892 	   != TYPE_PRECISION (lhs_type))
1893     return NULL;
1894 
1895   /* Check whether the scaling input term can be seen as two widened
1896      inputs multiplied together.  */
1897   vect_unpromoted_value unprom_mult[2];
1898   tree new_type;
1899   unsigned int nops
1900     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1901 			    false, 2, unprom_mult, &new_type);
1902   if (nops != 2)
1903     return NULL;
1904 
1905   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1906 
1907   /* Adjust output precision.  */
1908   if (TYPE_PRECISION (new_type) < target_precision)
1909     new_type = build_nonstandard_integer_type
1910       (target_precision, TYPE_UNSIGNED (new_type));
1911 
1912   /* Check for target support.  */
1913   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1914   if (!new_vectype
1915       || !direct_internal_fn_supported_p
1916 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1917     return NULL;
1918 
1919   /* The IR requires a valid vector type for the cast result, even though
1920      it's likely to be discarded.  */
1921   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1922   if (!*type_out)
1923     return NULL;
1924 
1925   /* Generate the IFN_MULHRS call.  */
1926   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1927   tree new_ops[2];
1928   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1929 		       unprom_mult, new_vectype);
1930   gcall *mulhrs_stmt
1931     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1932   gimple_call_set_lhs (mulhrs_stmt, new_var);
1933   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1934 
1935   if (dump_enabled_p ())
1936     dump_printf_loc (MSG_NOTE, vect_location,
1937 		     "created pattern stmt: %G", mulhrs_stmt);
1938 
1939   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1940 			      mulhrs_stmt, new_vectype);
1941 }
1942 
1943 /* Recognize the patterns:
1944 
1945 	    ATYPE a;  // narrower than TYPE
1946 	    BTYPE b;  // narrower than TYPE
1947 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1948      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1949 
1950    where only the bottom half of avg is used.  Try to transform them into:
1951 
1952 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1953      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1954 
1955   followed by:
1956 
1957 	    TYPE avg = (TYPE) avg';
1958 
1959   where NTYPE is no wider than half of TYPE.  Since only the bottom half
1960   of avg is used, all or part of the cast of avg' should become redundant.
1961 
1962   If there is no target support available, generate code to distribute rshift
1963   over plus and add a carry.  */
1964 
1965 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1966 vect_recog_average_pattern (vec_info *vinfo,
1967 			    stmt_vec_info last_stmt_info, tree *type_out)
1968 {
1969   /* Check for a shift right by one bit.  */
1970   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1971   if (!last_stmt
1972       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1973       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1974     return NULL;
1975 
1976   /* Check that the shift result is wider than the users of the
1977      result need (i.e. that narrowing would be a natural choice).  */
1978   tree lhs = gimple_assign_lhs (last_stmt);
1979   tree type = TREE_TYPE (lhs);
1980   unsigned int target_precision
1981     = vect_element_precision (last_stmt_info->min_output_precision);
1982   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1983     return NULL;
1984 
1985   /* Look through any change in sign on the shift input.  */
1986   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1987   vect_unpromoted_value unprom_plus;
1988   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1989 						     &unprom_plus);
1990   if (!rshift_rhs
1991       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1992     return NULL;
1993 
1994   /* Get the definition of the shift input.  */
1995   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1996   if (!plus_stmt_info)
1997     return NULL;
1998 
1999   /* Check whether the shift input can be seen as a tree of additions on
2000      2 or 3 widened inputs.
2001 
2002      Note that the pattern should be a win even if the result of one or
2003      more additions is reused elsewhere: if the pattern matches, we'd be
2004      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
2005   internal_fn ifn = IFN_AVG_FLOOR;
2006   vect_unpromoted_value unprom[3];
2007   tree new_type;
2008   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2009 					    WIDEN_PLUS_EXPR, false, 3,
2010 					    unprom, &new_type);
2011   if (nops == 0)
2012     return NULL;
2013   if (nops == 3)
2014     {
2015       /* Check that one operand is 1.  */
2016       unsigned int i;
2017       for (i = 0; i < 3; ++i)
2018 	if (integer_onep (unprom[i].op))
2019 	  break;
2020       if (i == 3)
2021 	return NULL;
2022       /* Throw away the 1 operand and keep the other two.  */
2023       if (i < 2)
2024 	unprom[i] = unprom[2];
2025       ifn = IFN_AVG_CEIL;
2026     }
2027 
2028   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2029 
2030   /* We know that:
2031 
2032      (a) the operation can be viewed as:
2033 
2034 	   TYPE widened0 = (TYPE) UNPROM[0];
2035 	   TYPE widened1 = (TYPE) UNPROM[1];
2036 	   TYPE tmp1 = widened0 + widened1 {+ 1};
2037 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2038 
2039      (b) the first two statements are equivalent to:
2040 
2041 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2042 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2043 
2044      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2045 	 where sensible;
2046 
2047      (d) all the operations can be performed correctly at twice the width of
2048 	 NEW_TYPE, due to the nature of the average operation; and
2049 
2050      (e) users of the result of the right shift need only TARGET_PRECISION
2051 	 bits, where TARGET_PRECISION is no more than half of TYPE's
2052 	 precision.
2053 
2054      Under these circumstances, the only situation in which NEW_TYPE
2055      could be narrower than TARGET_PRECISION is if widened0, widened1
2056      and an addition result are all used more than once.  Thus we can
2057      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2058      as "free", whereas widening the result of the average instruction
2059      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2060      therefore better not to go narrower than TARGET_PRECISION.  */
2061   if (TYPE_PRECISION (new_type) < target_precision)
2062     new_type = build_nonstandard_integer_type (target_precision,
2063 					       TYPE_UNSIGNED (new_type));
2064 
2065   /* Check for target support.  */
2066   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2067   if (!new_vectype)
2068     return NULL;
2069 
2070   bool fallback_p = false;
2071 
2072   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2073     ;
2074   else if (TYPE_UNSIGNED (new_type)
2075 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2076 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2077 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2078 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2079     fallback_p = true;
2080   else
2081     return NULL;
2082 
2083   /* The IR requires a valid vector type for the cast result, even though
2084      it's likely to be discarded.  */
2085   *type_out = get_vectype_for_scalar_type (vinfo, type);
2086   if (!*type_out)
2087     return NULL;
2088 
2089   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2090   tree new_ops[2];
2091   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2092 		       unprom, new_vectype);
2093 
2094   if (fallback_p)
2095     {
2096       /* As a fallback, generate code for following sequence:
2097 
2098 	 shifted_op0 = new_ops[0] >> 1;
2099 	 shifted_op1 = new_ops[1] >> 1;
2100 	 sum_of_shifted = shifted_op0 + shifted_op1;
2101 	 unmasked_carry = new_ops[0] and/or new_ops[1];
2102 	 carry = unmasked_carry & 1;
2103 	 new_var = sum_of_shifted + carry;
2104       */
2105 
2106       tree one_cst = build_one_cst (new_type);
2107       gassign *g;
2108 
2109       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2110       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2111       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2112 
2113       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2114       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2115       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2116 
2117       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2118       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2119 			       shifted_op0, shifted_op1);
2120       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2121 
2122       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2123       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2124       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2125       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2126 
2127       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2128       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2129       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2130 
2131       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2132       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2133     }
2134 
2135   /* Generate the IFN_AVG* call.  */
2136   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2137 						    new_ops[1]);
2138   gimple_call_set_lhs (average_stmt, new_var);
2139   gimple_set_location (average_stmt, gimple_location (last_stmt));
2140 
2141   if (dump_enabled_p ())
2142     dump_printf_loc (MSG_NOTE, vect_location,
2143 		     "created pattern stmt: %G", average_stmt);
2144 
2145   return vect_convert_output (vinfo, last_stmt_info,
2146 			      type, average_stmt, new_vectype);
2147 }
2148 
2149 /* Recognize cases in which the input to a cast is wider than its
2150    output, and the input is fed by a widening operation.  Fold this
2151    by removing the unnecessary intermediate widening.  E.g.:
2152 
2153      unsigned char a;
2154      unsigned int b = (unsigned int) a;
2155      unsigned short c = (unsigned short) b;
2156 
2157    -->
2158 
2159      unsigned short c = (unsigned short) a;
2160 
2161    Although this is rare in input IR, it is an expected side-effect
2162    of the over-widening pattern above.
2163 
2164    This is beneficial also for integer-to-float conversions, if the
2165    widened integer has more bits than the float, and if the unwidened
2166    input doesn't.  */
2167 
2168 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2169 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2170 				  stmt_vec_info last_stmt_info, tree *type_out)
2171 {
2172   /* Check for a cast, including an integer-to-float conversion.  */
2173   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2174   if (!last_stmt)
2175     return NULL;
2176   tree_code code = gimple_assign_rhs_code (last_stmt);
2177   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2178     return NULL;
2179 
2180   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2181   tree lhs = gimple_assign_lhs (last_stmt);
2182   if (!lhs)
2183     return NULL;
2184   tree lhs_type = TREE_TYPE (lhs);
2185   scalar_mode lhs_mode;
2186   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2187       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2188     return NULL;
2189 
2190   /* Check for a narrowing operation (from a vector point of view).  */
2191   tree rhs = gimple_assign_rhs1 (last_stmt);
2192   tree rhs_type = TREE_TYPE (rhs);
2193   if (!INTEGRAL_TYPE_P (rhs_type)
2194       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2195       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2196     return NULL;
2197 
2198   /* Try to find an unpromoted input.  */
2199   vect_unpromoted_value unprom;
2200   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2201       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2202     return NULL;
2203 
2204   /* If the bits above RHS_TYPE matter, make sure that they're the
2205      same when extending from UNPROM as they are when extending from RHS.  */
2206   if (!INTEGRAL_TYPE_P (lhs_type)
2207       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2208     return NULL;
2209 
2210   /* We can get the same result by casting UNPROM directly, to avoid
2211      the unnecessary widening and narrowing.  */
2212   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2213 
2214   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2215   if (!*type_out)
2216     return NULL;
2217 
2218   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2219   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2220   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2221 
2222   return pattern_stmt;
2223 }
2224 
2225 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2226    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2227 
2228 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2229 vect_recog_widen_shift_pattern (vec_info *vinfo,
2230 				stmt_vec_info last_stmt_info, tree *type_out)
2231 {
2232   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2233 				      LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2234 				      "vect_recog_widen_shift_pattern");
2235 }
2236 
2237 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2238 
2239    type a_t, b_t, c_t;
2240 
2241    S0 a_t = b_t r<< c_t;
2242 
2243   Input/Output:
2244 
2245   * STMT_VINFO: The stmt from which the pattern search begins,
2246     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2247     with a sequence:
2248 
2249    S1 d_t = -c_t;
2250    S2 e_t = d_t & (B - 1);
2251    S3 f_t = b_t << c_t;
2252    S4 g_t = b_t >> e_t;
2253    S0 a_t = f_t | g_t;
2254 
2255     where B is element bitsize of type.
2256 
2257   Output:
2258 
2259   * TYPE_OUT: The type of the output of this pattern.
2260 
2261   * Return value: A new stmt that will be used to replace the rotate
2262     S0 stmt.  */
2263 
2264 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2265 vect_recog_rotate_pattern (vec_info *vinfo,
2266 			   stmt_vec_info stmt_vinfo, tree *type_out)
2267 {
2268   gimple *last_stmt = stmt_vinfo->stmt;
2269   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2270   gimple *pattern_stmt, *def_stmt;
2271   enum tree_code rhs_code;
2272   enum vect_def_type dt;
2273   optab optab1, optab2;
2274   edge ext_def = NULL;
2275   bool bswap16_p = false;
2276 
2277   if (is_gimple_assign (last_stmt))
2278     {
2279       rhs_code = gimple_assign_rhs_code (last_stmt);
2280       switch (rhs_code)
2281 	{
2282 	case LROTATE_EXPR:
2283 	case RROTATE_EXPR:
2284 	  break;
2285 	default:
2286 	  return NULL;
2287 	}
2288 
2289       lhs = gimple_assign_lhs (last_stmt);
2290       oprnd0 = gimple_assign_rhs1 (last_stmt);
2291       type = TREE_TYPE (oprnd0);
2292       oprnd1 = gimple_assign_rhs2 (last_stmt);
2293     }
2294   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2295     {
2296       /* __builtin_bswap16 (x) is another form of x r>> 8.
2297 	 The vectorizer has bswap support, but only if the argument isn't
2298 	 promoted.  */
2299       lhs = gimple_call_lhs (last_stmt);
2300       oprnd0 = gimple_call_arg (last_stmt, 0);
2301       type = TREE_TYPE (oprnd0);
2302       if (!lhs
2303 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2304 	  || TYPE_PRECISION (type) <= 16
2305 	  || TREE_CODE (oprnd0) != SSA_NAME
2306 	  || BITS_PER_UNIT != 8
2307 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2308 	return NULL;
2309 
2310       stmt_vec_info def_stmt_info;
2311       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2312 	return NULL;
2313 
2314       if (dt != vect_internal_def)
2315 	return NULL;
2316 
2317       if (gimple_assign_cast_p (def_stmt))
2318 	{
2319 	  def = gimple_assign_rhs1 (def_stmt);
2320 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2321 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2322 	    oprnd0 = def;
2323 	}
2324 
2325       type = TREE_TYPE (lhs);
2326       vectype = get_vectype_for_scalar_type (vinfo, type);
2327       if (vectype == NULL_TREE)
2328 	return NULL;
2329 
2330       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2331 	{
2332 	  /* The encoding uses one stepped pattern for each byte in the
2333 	     16-bit word.  */
2334 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2335 	  for (unsigned i = 0; i < 3; ++i)
2336 	    for (unsigned j = 0; j < 2; ++j)
2337 	      elts.quick_push ((i + 1) * 2 - j - 1);
2338 
2339 	  vec_perm_indices indices (elts, 1,
2340 				    TYPE_VECTOR_SUBPARTS (char_vectype));
2341 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2342 	    {
2343 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
2344 		 undo the argument promotion.  */
2345 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2346 		{
2347 		  def = vect_recog_temp_ssa_var (type, NULL);
2348 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2349 		  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2350 		  oprnd0 = def;
2351 		}
2352 
2353 	      /* Pattern detected.  */
2354 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2355 
2356 	      *type_out = vectype;
2357 
2358 	      /* Pattern supported.  Create a stmt to be used to replace the
2359 		 pattern, with the unpromoted argument.  */
2360 	      var = vect_recog_temp_ssa_var (type, NULL);
2361 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2362 						1, oprnd0);
2363 	      gimple_call_set_lhs (pattern_stmt, var);
2364 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2365 				      gimple_call_fntype (last_stmt));
2366 	      return pattern_stmt;
2367 	    }
2368 	}
2369 
2370       oprnd1 = build_int_cst (integer_type_node, 8);
2371       rhs_code = LROTATE_EXPR;
2372       bswap16_p = true;
2373     }
2374   else
2375     return NULL;
2376 
2377   if (TREE_CODE (oprnd0) != SSA_NAME
2378       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2379       || !INTEGRAL_TYPE_P (type)
2380       || !TYPE_UNSIGNED (type))
2381     return NULL;
2382 
2383   stmt_vec_info def_stmt_info;
2384   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2385     return NULL;
2386 
2387   if (dt != vect_internal_def
2388       && dt != vect_constant_def
2389       && dt != vect_external_def)
2390     return NULL;
2391 
2392   vectype = get_vectype_for_scalar_type (vinfo, type);
2393   if (vectype == NULL_TREE)
2394     return NULL;
2395 
2396   /* If vector/vector or vector/scalar rotate is supported by the target,
2397      don't do anything here.  */
2398   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2399   if (optab1
2400       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2401     {
2402      use_rotate:
2403       if (bswap16_p)
2404 	{
2405 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2406 	    {
2407 	      def = vect_recog_temp_ssa_var (type, NULL);
2408 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2409 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2410 	      oprnd0 = def;
2411 	    }
2412 
2413 	  /* Pattern detected.  */
2414 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2415 
2416 	  *type_out = vectype;
2417 
2418 	  /* Pattern supported.  Create a stmt to be used to replace the
2419 	     pattern.  */
2420 	  var = vect_recog_temp_ssa_var (type, NULL);
2421 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2422 					      oprnd1);
2423 	  return pattern_stmt;
2424 	}
2425       return NULL;
2426     }
2427 
2428   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2429     {
2430       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2431       if (optab2
2432 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2433 	goto use_rotate;
2434     }
2435 
2436   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2437      don't do anything here either.  */
2438   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2439   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2440   if (!optab1
2441       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2442       || !optab2
2443       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2444     {
2445       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2446 	return NULL;
2447       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2448       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2449       if (!optab1
2450 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2451 	  || !optab2
2452 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2453 	return NULL;
2454     }
2455 
2456   *type_out = vectype;
2457 
2458   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2459     {
2460       def = vect_recog_temp_ssa_var (type, NULL);
2461       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2462       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2463       oprnd0 = def;
2464     }
2465 
2466   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2467     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2468 
2469   def = NULL_TREE;
2470   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2471   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2472     def = oprnd1;
2473   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2474     {
2475       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2476       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2477 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2478 	     == TYPE_PRECISION (type))
2479 	def = rhs1;
2480     }
2481 
2482   if (def == NULL_TREE)
2483     {
2484       def = vect_recog_temp_ssa_var (type, NULL);
2485       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2486       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2487     }
2488   stype = TREE_TYPE (def);
2489 
2490   if (TREE_CODE (def) == INTEGER_CST)
2491     {
2492       if (!tree_fits_uhwi_p (def)
2493 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2494 	  || integer_zerop (def))
2495 	return NULL;
2496       def2 = build_int_cst (stype,
2497 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2498     }
2499   else
2500     {
2501       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2502 
2503       if (vecstype == NULL_TREE)
2504 	return NULL;
2505       def2 = vect_recog_temp_ssa_var (stype, NULL);
2506       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2507       if (ext_def)
2508 	{
2509 	  basic_block new_bb
2510 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2511 	  gcc_assert (!new_bb);
2512 	}
2513       else
2514 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2515 
2516       def2 = vect_recog_temp_ssa_var (stype, NULL);
2517       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2518       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2519 				      gimple_assign_lhs (def_stmt), mask);
2520       if (ext_def)
2521 	{
2522 	  basic_block new_bb
2523 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2524 	  gcc_assert (!new_bb);
2525 	}
2526       else
2527 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2528     }
2529 
2530   var1 = vect_recog_temp_ssa_var (type, NULL);
2531   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2532 					? LSHIFT_EXPR : RSHIFT_EXPR,
2533 				  oprnd0, def);
2534   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2535 
2536   var2 = vect_recog_temp_ssa_var (type, NULL);
2537   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2538 					? RSHIFT_EXPR : LSHIFT_EXPR,
2539 				  oprnd0, def2);
2540   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2541 
2542   /* Pattern detected.  */
2543   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2544 
2545   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2546   var = vect_recog_temp_ssa_var (type, NULL);
2547   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2548 
2549   return pattern_stmt;
2550 }
2551 
2552 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2553    vectorized:
2554 
2555    type a_t;
2556    TYPE b_T, res_T;
2557 
2558    S1 a_t = ;
2559    S2 b_T = ;
2560    S3 res_T = b_T op a_t;
2561 
2562   where type 'TYPE' is a type with different size than 'type',
2563   and op is <<, >> or rotate.
2564 
2565   Also detect cases:
2566 
2567    type a_t;
2568    TYPE b_T, c_T, res_T;
2569 
2570    S0 c_T = ;
2571    S1 a_t = (type) c_T;
2572    S2 b_T = ;
2573    S3 res_T = b_T op a_t;
2574 
2575   Input/Output:
2576 
2577   * STMT_VINFO: The stmt from which the pattern search begins,
2578     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2579     with a shift/rotate which has same type on both operands, in the
2580     second case just b_T op c_T, in the first case with added cast
2581     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2582 
2583   Output:
2584 
2585   * TYPE_OUT: The type of the output of this pattern.
2586 
2587   * Return value: A new stmt that will be used to replace the shift/rotate
2588     S3 stmt.  */
2589 
2590 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2591 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2592 					stmt_vec_info stmt_vinfo,
2593 					tree *type_out)
2594 {
2595   gimple *last_stmt = stmt_vinfo->stmt;
2596   tree oprnd0, oprnd1, lhs, var;
2597   gimple *pattern_stmt;
2598   enum tree_code rhs_code;
2599 
2600   if (!is_gimple_assign (last_stmt))
2601     return NULL;
2602 
2603   rhs_code = gimple_assign_rhs_code (last_stmt);
2604   switch (rhs_code)
2605     {
2606     case LSHIFT_EXPR:
2607     case RSHIFT_EXPR:
2608     case LROTATE_EXPR:
2609     case RROTATE_EXPR:
2610       break;
2611     default:
2612       return NULL;
2613     }
2614 
2615   lhs = gimple_assign_lhs (last_stmt);
2616   oprnd0 = gimple_assign_rhs1 (last_stmt);
2617   oprnd1 = gimple_assign_rhs2 (last_stmt);
2618   if (TREE_CODE (oprnd0) != SSA_NAME
2619       || TREE_CODE (oprnd1) != SSA_NAME
2620       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2621       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2622       || TYPE_PRECISION (TREE_TYPE (lhs))
2623 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2624     return NULL;
2625 
2626   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2627   if (!def_vinfo)
2628     return NULL;
2629 
2630   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2631   if (*type_out == NULL_TREE)
2632     return NULL;
2633 
2634   tree def = NULL_TREE;
2635   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2636   if (def_stmt && gimple_assign_cast_p (def_stmt))
2637     {
2638       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2639       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2640 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2641 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2642 	{
2643 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2644 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2645 	    def = rhs1;
2646 	  else
2647 	    {
2648 	      tree mask
2649 		= build_low_bits_mask (TREE_TYPE (rhs1),
2650 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2651 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2652 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2653 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
2654 							   TREE_TYPE (rhs1));
2655 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2656 	    }
2657 	}
2658     }
2659 
2660   if (def == NULL_TREE)
2661     {
2662       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2663       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2664       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2665     }
2666 
2667   /* Pattern detected.  */
2668   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2669 
2670   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2671   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2672   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2673 
2674   return pattern_stmt;
2675 }
2676 
2677 /* Return true iff the target has a vector optab implementing the operation
2678    CODE on type VECTYPE.  */
2679 
2680 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2681 target_has_vecop_for_code (tree_code code, tree vectype)
2682 {
2683   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2684   return voptab
2685 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2686 }
2687 
2688 /* Verify that the target has optabs of VECTYPE to perform all the steps
2689    needed by the multiplication-by-immediate synthesis algorithm described by
2690    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2691    present.  Return true iff the target supports all the steps.  */
2692 
2693 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2694 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2695 				 tree vectype, bool synth_shift_p)
2696 {
2697   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2698     return false;
2699 
2700   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2701   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2702 
2703   if (var == negate_variant
2704       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2705     return false;
2706 
2707   /* If we must synthesize shifts with additions make sure that vector
2708      addition is available.  */
2709   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2710     return false;
2711 
2712   for (int i = 1; i < alg->ops; i++)
2713     {
2714       switch (alg->op[i])
2715 	{
2716 	case alg_shift:
2717 	  break;
2718 	case alg_add_t_m2:
2719 	case alg_add_t2_m:
2720 	case alg_add_factor:
2721 	  if (!supports_vplus)
2722 	    return false;
2723 	  break;
2724 	case alg_sub_t_m2:
2725 	case alg_sub_t2_m:
2726 	case alg_sub_factor:
2727 	  if (!supports_vminus)
2728 	    return false;
2729 	  break;
2730 	case alg_unknown:
2731 	case alg_m:
2732 	case alg_zero:
2733 	case alg_impossible:
2734 	  return false;
2735 	default:
2736 	  gcc_unreachable ();
2737 	}
2738     }
2739 
2740   return true;
2741 }
2742 
2743 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2744    putting the final result in DEST.  Append all statements but the last into
2745    VINFO.  Return the last statement.  */
2746 
2747 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)2748 synth_lshift_by_additions (vec_info *vinfo,
2749 			   tree dest, tree op, HOST_WIDE_INT amnt,
2750 			   stmt_vec_info stmt_info)
2751 {
2752   HOST_WIDE_INT i;
2753   tree itype = TREE_TYPE (op);
2754   tree prev_res = op;
2755   gcc_assert (amnt >= 0);
2756   for (i = 0; i < amnt; i++)
2757     {
2758       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2759 		      : dest;
2760       gimple *stmt
2761         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2762       prev_res = tmp_var;
2763       if (i < amnt - 1)
2764 	append_pattern_def_seq (vinfo, stmt_info, stmt);
2765       else
2766 	return stmt;
2767     }
2768   gcc_unreachable ();
2769   return NULL;
2770 }
2771 
2772 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2773    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2774    the process if necessary.  Append the resulting assignment statements
2775    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2776    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2777    left shifts using additions.  */
2778 
2779 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2780 apply_binop_and_append_stmt (vec_info *vinfo,
2781 			     tree_code code, tree op1, tree op2,
2782 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2783 {
2784   if (integer_zerop (op2)
2785       && (code == LSHIFT_EXPR
2786 	  || code == PLUS_EXPR))
2787     {
2788       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2789       return op1;
2790     }
2791 
2792   gimple *stmt;
2793   tree itype = TREE_TYPE (op1);
2794   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2795 
2796   if (code == LSHIFT_EXPR
2797       && synth_shift_p)
2798     {
2799       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2800 					TREE_INT_CST_LOW (op2), stmt_vinfo);
2801       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2802       return tmp_var;
2803     }
2804 
2805   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2806   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2807   return tmp_var;
2808 }
2809 
2810 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2811    and simple arithmetic operations to be vectorized.  Record the statements
2812    produced in STMT_VINFO and return the last statement in the sequence or
2813    NULL if it's not possible to synthesize such a multiplication.
2814    This function mirrors the behavior of expand_mult_const in expmed.c but
2815    works on tree-ssa form.  */
2816 
2817 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)2818 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2819 			     stmt_vec_info stmt_vinfo)
2820 {
2821   tree itype = TREE_TYPE (op);
2822   machine_mode mode = TYPE_MODE (itype);
2823   struct algorithm alg;
2824   mult_variant variant;
2825   if (!tree_fits_shwi_p (val))
2826     return NULL;
2827 
2828   /* Multiplication synthesis by shifts, adds and subs can introduce
2829      signed overflow where the original operation didn't.  Perform the
2830      operations on an unsigned type and cast back to avoid this.
2831      In the future we may want to relax this for synthesis algorithms
2832      that we can prove do not cause unexpected overflow.  */
2833   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2834 
2835   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2836 
2837   /* Targets that don't support vector shifts but support vector additions
2838      can synthesize shifts that way.  */
2839   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2840 
2841   HOST_WIDE_INT hwval = tree_to_shwi (val);
2842   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2843      The vectorizer's benefit analysis will decide whether it's beneficial
2844      to do this.  */
2845   bool possible = choose_mult_variant (mode, hwval, &alg,
2846 					&variant, MAX_COST);
2847   if (!possible)
2848     return NULL;
2849 
2850   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2851 
2852   if (!vectype
2853       || !target_supports_mult_synth_alg (&alg, variant,
2854 					   vectype, synth_shift_p))
2855     return NULL;
2856 
2857   tree accumulator;
2858 
2859   /* Clear out the sequence of statements so we can populate it below.  */
2860   gimple *stmt = NULL;
2861 
2862   if (cast_to_unsigned_p)
2863     {
2864       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2865       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2866       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2867       op = tmp_op;
2868     }
2869 
2870   if (alg.op[0] == alg_zero)
2871     accumulator = build_int_cst (multtype, 0);
2872   else
2873     accumulator = op;
2874 
2875   bool needs_fixup = (variant == negate_variant)
2876 		      || (variant == add_variant);
2877 
2878   for (int i = 1; i < alg.ops; i++)
2879     {
2880       tree shft_log = build_int_cst (multtype, alg.log[i]);
2881       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2882       tree tmp_var = NULL_TREE;
2883 
2884       switch (alg.op[i])
2885 	{
2886 	case alg_shift:
2887 	  if (synth_shift_p)
2888 	    stmt
2889 	      = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2890 					   alg.log[i], stmt_vinfo);
2891 	  else
2892 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2893 					 shft_log);
2894 	  break;
2895 	case alg_add_t_m2:
2896 	  tmp_var
2897 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2898 					   stmt_vinfo, synth_shift_p);
2899 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2900 				       tmp_var);
2901 	  break;
2902 	case alg_sub_t_m2:
2903 	  tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2904 						 shft_log, stmt_vinfo,
2905 						 synth_shift_p);
2906 	  /* In some algorithms the first step involves zeroing the
2907 	     accumulator.  If subtracting from such an accumulator
2908 	     just emit the negation directly.  */
2909 	  if (integer_zerop (accumulator))
2910 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2911 	  else
2912 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2913 					tmp_var);
2914 	  break;
2915 	case alg_add_t2_m:
2916 	  tmp_var
2917 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2918 					   shft_log, stmt_vinfo, synth_shift_p);
2919 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2920 	  break;
2921 	case alg_sub_t2_m:
2922 	  tmp_var
2923 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2924 					   shft_log, stmt_vinfo, synth_shift_p);
2925 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2926 	  break;
2927 	case alg_add_factor:
2928 	  tmp_var
2929 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2930 					   shft_log, stmt_vinfo, synth_shift_p);
2931 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2932 				       tmp_var);
2933 	  break;
2934 	case alg_sub_factor:
2935 	  tmp_var
2936 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2937 					   shft_log, stmt_vinfo, synth_shift_p);
2938 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2939 				      accumulator);
2940 	  break;
2941 	default:
2942 	  gcc_unreachable ();
2943 	}
2944       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2945 	 but rather return it directly.  */
2946 
2947       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2948 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2949       accumulator = accum_tmp;
2950     }
2951   if (variant == negate_variant)
2952     {
2953       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2954       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2955       accumulator = accum_tmp;
2956       if (cast_to_unsigned_p)
2957 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2958     }
2959   else if (variant == add_variant)
2960     {
2961       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2962       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2963       accumulator = accum_tmp;
2964       if (cast_to_unsigned_p)
2965 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2966     }
2967   /* Move back to a signed if needed.  */
2968   if (cast_to_unsigned_p)
2969     {
2970       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2971       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2972     }
2973 
2974   return stmt;
2975 }
2976 
2977 /* Detect multiplication by constant and convert it into a sequence of
2978    shifts and additions, subtractions, negations.  We reuse the
2979    choose_mult_variant algorithms from expmed.c
2980 
2981    Input/Output:
2982 
2983    STMT_VINFO: The stmt from which the pattern search begins,
2984    i.e. the mult stmt.
2985 
2986  Output:
2987 
2988   * TYPE_OUT: The type of the output of this pattern.
2989 
2990   * Return value: A new stmt that will be used to replace
2991     the multiplication.  */
2992 
2993 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2994 vect_recog_mult_pattern (vec_info *vinfo,
2995 			 stmt_vec_info stmt_vinfo, tree *type_out)
2996 {
2997   gimple *last_stmt = stmt_vinfo->stmt;
2998   tree oprnd0, oprnd1, vectype, itype;
2999   gimple *pattern_stmt;
3000 
3001   if (!is_gimple_assign (last_stmt))
3002     return NULL;
3003 
3004   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3005     return NULL;
3006 
3007   oprnd0 = gimple_assign_rhs1 (last_stmt);
3008   oprnd1 = gimple_assign_rhs2 (last_stmt);
3009   itype = TREE_TYPE (oprnd0);
3010 
3011   if (TREE_CODE (oprnd0) != SSA_NAME
3012       || TREE_CODE (oprnd1) != INTEGER_CST
3013       || !INTEGRAL_TYPE_P (itype)
3014       || !type_has_mode_precision_p (itype))
3015     return NULL;
3016 
3017   vectype = get_vectype_for_scalar_type (vinfo, itype);
3018   if (vectype == NULL_TREE)
3019     return NULL;
3020 
3021   /* If the target can handle vectorized multiplication natively,
3022      don't attempt to optimize this.  */
3023   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3024   if (mul_optab != unknown_optab)
3025     {
3026       machine_mode vec_mode = TYPE_MODE (vectype);
3027       int icode = (int) optab_handler (mul_optab, vec_mode);
3028       if (icode != CODE_FOR_nothing)
3029        return NULL;
3030     }
3031 
3032   pattern_stmt = vect_synth_mult_by_constant (vinfo,
3033 					      oprnd0, oprnd1, stmt_vinfo);
3034   if (!pattern_stmt)
3035     return NULL;
3036 
3037   /* Pattern detected.  */
3038   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3039 
3040   *type_out = vectype;
3041 
3042   return pattern_stmt;
3043 }
3044 
3045 /* Detect a signed division by a constant that wouldn't be
3046    otherwise vectorized:
3047 
3048    type a_t, b_t;
3049 
3050    S1 a_t = b_t / N;
3051 
3052   where type 'type' is an integral type and N is a constant.
3053 
3054   Similarly handle modulo by a constant:
3055 
3056    S4 a_t = b_t % N;
3057 
3058   Input/Output:
3059 
3060   * STMT_VINFO: The stmt from which the pattern search begins,
3061     i.e. the division stmt.  S1 is replaced by if N is a power
3062     of two constant and type is signed:
3063   S3  y_t = b_t < 0 ? N - 1 : 0;
3064   S2  x_t = b_t + y_t;
3065   S1' a_t = x_t >> log2 (N);
3066 
3067     S4 is replaced if N is a power of two constant and
3068     type is signed by (where *_T temporaries have unsigned type):
3069   S9  y_T = b_t < 0 ? -1U : 0U;
3070   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3071   S7  z_t = (type) z_T;
3072   S6  w_t = b_t + z_t;
3073   S5  x_t = w_t & (N - 1);
3074   S4' a_t = x_t - z_t;
3075 
3076   Output:
3077 
3078   * TYPE_OUT: The type of the output of this pattern.
3079 
3080   * Return value: A new stmt that will be used to replace the division
3081     S1 or modulo S4 stmt.  */
3082 
3083 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3084 vect_recog_divmod_pattern (vec_info *vinfo,
3085 			   stmt_vec_info stmt_vinfo, tree *type_out)
3086 {
3087   gimple *last_stmt = stmt_vinfo->stmt;
3088   tree oprnd0, oprnd1, vectype, itype, cond;
3089   gimple *pattern_stmt, *def_stmt;
3090   enum tree_code rhs_code;
3091   optab optab;
3092   tree q;
3093   int dummy_int, prec;
3094 
3095   if (!is_gimple_assign (last_stmt))
3096     return NULL;
3097 
3098   rhs_code = gimple_assign_rhs_code (last_stmt);
3099   switch (rhs_code)
3100     {
3101     case TRUNC_DIV_EXPR:
3102     case EXACT_DIV_EXPR:
3103     case TRUNC_MOD_EXPR:
3104       break;
3105     default:
3106       return NULL;
3107     }
3108 
3109   oprnd0 = gimple_assign_rhs1 (last_stmt);
3110   oprnd1 = gimple_assign_rhs2 (last_stmt);
3111   itype = TREE_TYPE (oprnd0);
3112   if (TREE_CODE (oprnd0) != SSA_NAME
3113       || TREE_CODE (oprnd1) != INTEGER_CST
3114       || TREE_CODE (itype) != INTEGER_TYPE
3115       || !type_has_mode_precision_p (itype))
3116     return NULL;
3117 
3118   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3119   vectype = get_vectype_for_scalar_type (vinfo, itype);
3120   if (vectype == NULL_TREE)
3121     return NULL;
3122 
3123   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3124     {
3125       /* If the target can handle vectorized division or modulo natively,
3126 	 don't attempt to optimize this, since native division is likely
3127 	 to give smaller code.  */
3128       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3129       if (optab != unknown_optab)
3130 	{
3131 	  machine_mode vec_mode = TYPE_MODE (vectype);
3132 	  int icode = (int) optab_handler (optab, vec_mode);
3133 	  if (icode != CODE_FOR_nothing)
3134 	    return NULL;
3135 	}
3136     }
3137 
3138   prec = TYPE_PRECISION (itype);
3139   if (integer_pow2p (oprnd1))
3140     {
3141       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3142 	return NULL;
3143 
3144       /* Pattern detected.  */
3145       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3146 
3147       *type_out = vectype;
3148 
3149       /* Check if the target supports this internal function.  */
3150       internal_fn ifn = IFN_DIV_POW2;
3151       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3152 	{
3153 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3154 
3155 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3156 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3157 	  gimple_call_set_lhs (div_stmt, var_div);
3158 
3159 	  if (rhs_code == TRUNC_MOD_EXPR)
3160 	    {
3161 	      append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3162 	      def_stmt
3163 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3164 				       LSHIFT_EXPR, var_div, shift);
3165 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3166 	      pattern_stmt
3167 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 				       MINUS_EXPR, oprnd0,
3169 				       gimple_assign_lhs (def_stmt));
3170 	    }
3171 	  else
3172 	    pattern_stmt = div_stmt;
3173 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3174 
3175 	  return pattern_stmt;
3176 	}
3177 
3178       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3179 		     build_int_cst (itype, 0));
3180       if (rhs_code == TRUNC_DIV_EXPR
3181 	  || rhs_code == EXACT_DIV_EXPR)
3182 	{
3183 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
3184 	  tree shift;
3185 	  def_stmt
3186 	    = gimple_build_assign (var, COND_EXPR, cond,
3187 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3188 						build_int_cst (itype, 1)),
3189 				   build_int_cst (itype, 0));
3190 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3191 	  var = vect_recog_temp_ssa_var (itype, NULL);
3192 	  def_stmt
3193 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3194 				   gimple_assign_lhs (def_stmt));
3195 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3196 
3197 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
3198 	  pattern_stmt
3199 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3200 				   RSHIFT_EXPR, var, shift);
3201 	}
3202       else
3203 	{
3204 	  tree signmask;
3205 	  if (compare_tree_int (oprnd1, 2) == 0)
3206 	    {
3207 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3208 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3209 					      build_int_cst (itype, 1),
3210 					      build_int_cst (itype, 0));
3211 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3212 	    }
3213 	  else
3214 	    {
3215 	      tree utype
3216 		= build_nonstandard_integer_type (prec, 1);
3217 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3218 	      tree shift
3219 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3220 					- tree_log2 (oprnd1));
3221 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
3222 
3223 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3224 					      build_int_cst (utype, -1),
3225 					      build_int_cst (utype, 0));
3226 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3227 	      var = vect_recog_temp_ssa_var (utype, NULL);
3228 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3229 					      gimple_assign_lhs (def_stmt),
3230 					      shift);
3231 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3232 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3233 	      def_stmt
3234 		= gimple_build_assign (signmask, NOP_EXPR, var);
3235 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3236 	    }
3237 	  def_stmt
3238 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3239 				   PLUS_EXPR, oprnd0, signmask);
3240 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3241 	  def_stmt
3242 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3243 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3244 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3245 						build_int_cst (itype, 1)));
3246 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3247 
3248 	  pattern_stmt
3249 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3250 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
3251 				   signmask);
3252 	}
3253 
3254       return pattern_stmt;
3255     }
3256 
3257   if (prec > HOST_BITS_PER_WIDE_INT
3258       || integer_zerop (oprnd1))
3259     return NULL;
3260 
3261   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3262     return NULL;
3263 
3264   if (TYPE_UNSIGNED (itype))
3265     {
3266       unsigned HOST_WIDE_INT mh, ml;
3267       int pre_shift, post_shift;
3268       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3269 				  & GET_MODE_MASK (itype_mode));
3270       tree t1, t2, t3, t4;
3271 
3272       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3273 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3274 	return NULL;
3275 
3276       /* Find a suitable multiplier and right shift count
3277 	 instead of multiplying with D.  */
3278       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3279 
3280       /* If the suggested multiplier is more than SIZE bits, we can do better
3281 	 for even divisors, using an initial right shift.  */
3282       if (mh != 0 && (d & 1) == 0)
3283 	{
3284 	  pre_shift = ctz_or_zero (d);
3285 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3286 				  &ml, &post_shift, &dummy_int);
3287 	  gcc_assert (!mh);
3288 	}
3289       else
3290 	pre_shift = 0;
3291 
3292       if (mh != 0)
3293 	{
3294 	  if (post_shift - 1 >= prec)
3295 	    return NULL;
3296 
3297 	  /* t1 = oprnd0 h* ml;
3298 	     t2 = oprnd0 - t1;
3299 	     t3 = t2 >> 1;
3300 	     t4 = t1 + t3;
3301 	     q = t4 >> (post_shift - 1);  */
3302 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
3303 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3304 					  build_int_cst (itype, ml));
3305 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3306 
3307 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3308 	  def_stmt
3309 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3310 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3311 
3312 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3313 	  def_stmt
3314 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3315 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3316 
3317 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3318 	  def_stmt
3319 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3320 
3321 	  if (post_shift != 1)
3322 	    {
3323 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3324 
3325 	      q = vect_recog_temp_ssa_var (itype, NULL);
3326 	      pattern_stmt
3327 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
3328 				       build_int_cst (itype, post_shift - 1));
3329 	    }
3330 	  else
3331 	    {
3332 	      q = t4;
3333 	      pattern_stmt = def_stmt;
3334 	    }
3335 	}
3336       else
3337 	{
3338 	  if (pre_shift >= prec || post_shift >= prec)
3339 	    return NULL;
3340 
3341 	  /* t1 = oprnd0 >> pre_shift;
3342 	     t2 = t1 h* ml;
3343 	     q = t2 >> post_shift;  */
3344 	  if (pre_shift)
3345 	    {
3346 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
3347 	      def_stmt
3348 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3349 				       build_int_cst (NULL, pre_shift));
3350 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3351 	    }
3352 	  else
3353 	    t1 = oprnd0;
3354 
3355 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3356 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3357 					  build_int_cst (itype, ml));
3358 
3359 	  if (post_shift)
3360 	    {
3361 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3362 
3363 	      q = vect_recog_temp_ssa_var (itype, NULL);
3364 	      def_stmt
3365 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
3366 				       build_int_cst (itype, post_shift));
3367 	    }
3368 	  else
3369 	    q = t2;
3370 
3371 	  pattern_stmt = def_stmt;
3372 	}
3373     }
3374   else
3375     {
3376       unsigned HOST_WIDE_INT ml;
3377       int post_shift;
3378       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3379       unsigned HOST_WIDE_INT abs_d;
3380       bool add = false;
3381       tree t1, t2, t3, t4;
3382 
3383       /* Give up for -1.  */
3384       if (d == -1)
3385 	return NULL;
3386 
3387       /* Since d might be INT_MIN, we have to cast to
3388 	 unsigned HOST_WIDE_INT before negating to avoid
3389 	 undefined signed overflow.  */
3390       abs_d = (d >= 0
3391 	       ? (unsigned HOST_WIDE_INT) d
3392 	       : - (unsigned HOST_WIDE_INT) d);
3393 
3394       /* n rem d = n rem -d */
3395       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3396 	{
3397 	  d = abs_d;
3398 	  oprnd1 = build_int_cst (itype, abs_d);
3399 	}
3400       if (HOST_BITS_PER_WIDE_INT >= prec
3401 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3402 	/* This case is not handled correctly below.  */
3403 	return NULL;
3404 
3405       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3406       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3407 	{
3408 	  add = true;
3409 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
3410 	}
3411       if (post_shift >= prec)
3412 	return NULL;
3413 
3414       /* t1 = oprnd0 h* ml;  */
3415       t1 = vect_recog_temp_ssa_var (itype, NULL);
3416       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3417 				      build_int_cst (itype, ml));
3418 
3419       if (add)
3420 	{
3421 	  /* t2 = t1 + oprnd0;  */
3422 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3423 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3424 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3425 	}
3426       else
3427 	t2 = t1;
3428 
3429       if (post_shift)
3430 	{
3431 	  /* t3 = t2 >> post_shift;  */
3432 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3433 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3434 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3435 					  build_int_cst (itype, post_shift));
3436 	}
3437       else
3438 	t3 = t2;
3439 
3440       wide_int oprnd0_min, oprnd0_max;
3441       int msb = 1;
3442       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3443 	{
3444 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3445 	    msb = 0;
3446 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3447 	    msb = -1;
3448 	}
3449 
3450       if (msb == 0 && d >= 0)
3451 	{
3452 	  /* q = t3;  */
3453 	  q = t3;
3454 	  pattern_stmt = def_stmt;
3455 	}
3456       else
3457 	{
3458 	  /* t4 = oprnd0 >> (prec - 1);
3459 	     or if we know from VRP that oprnd0 >= 0
3460 	     t4 = 0;
3461 	     or if we know from VRP that oprnd0 < 0
3462 	     t4 = -1;  */
3463 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3464 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3465 	  if (msb != 1)
3466 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3467 					    build_int_cst (itype, msb));
3468 	  else
3469 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3470 					    build_int_cst (itype, prec - 1));
3471 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3472 
3473 	  /* q = t3 - t4;  or q = t4 - t3;  */
3474 	  q = vect_recog_temp_ssa_var (itype, NULL);
3475 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3476 					      d < 0 ? t3 : t4);
3477 	}
3478     }
3479 
3480   if (rhs_code == TRUNC_MOD_EXPR)
3481     {
3482       tree r, t1;
3483 
3484       /* We divided.  Now finish by:
3485 	 t1 = q * oprnd1;
3486 	 r = oprnd0 - t1;  */
3487       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3488 
3489       t1 = vect_recog_temp_ssa_var (itype, NULL);
3490       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3491       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3492 
3493       r = vect_recog_temp_ssa_var (itype, NULL);
3494       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3495     }
3496 
3497   /* Pattern detected.  */
3498   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3499 
3500   *type_out = vectype;
3501   return pattern_stmt;
3502 }
3503 
3504 /* Function vect_recog_mixed_size_cond_pattern
3505 
3506    Try to find the following pattern:
3507 
3508      type x_t, y_t;
3509      TYPE a_T, b_T, c_T;
3510    loop:
3511      S1  a_T = x_t CMP y_t ? b_T : c_T;
3512 
3513    where type 'TYPE' is an integral type which has different size
3514    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3515    than 'type', the constants need to fit into an integer type
3516    with the same width as 'type') or results of conversion from 'type'.
3517 
3518    Input:
3519 
3520    * STMT_VINFO: The stmt from which the pattern search begins.
3521 
3522    Output:
3523 
3524    * TYPE_OUT: The type of the output of this pattern.
3525 
3526    * Return value: A new stmt that will be used to replace the pattern.
3527 	Additionally a def_stmt is added.
3528 
3529 	a_it = x_t CMP y_t ? b_it : c_it;
3530 	a_T = (TYPE) a_it;  */
3531 
3532 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3533 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3534 				    stmt_vec_info stmt_vinfo, tree *type_out)
3535 {
3536   gimple *last_stmt = stmt_vinfo->stmt;
3537   tree cond_expr, then_clause, else_clause;
3538   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3539   gimple *pattern_stmt, *def_stmt;
3540   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3541   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3542   bool promotion;
3543   tree comp_scalar_type;
3544 
3545   if (!is_gimple_assign (last_stmt)
3546       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3547       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3548     return NULL;
3549 
3550   cond_expr = gimple_assign_rhs1 (last_stmt);
3551   then_clause = gimple_assign_rhs2 (last_stmt);
3552   else_clause = gimple_assign_rhs3 (last_stmt);
3553 
3554   if (!COMPARISON_CLASS_P (cond_expr))
3555     return NULL;
3556 
3557   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3558   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3559   if (comp_vectype == NULL_TREE)
3560     return NULL;
3561 
3562   type = gimple_expr_type (last_stmt);
3563   if (types_compatible_p (type, comp_scalar_type)
3564       || ((TREE_CODE (then_clause) != INTEGER_CST
3565 	   || TREE_CODE (else_clause) != INTEGER_CST)
3566 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3567       || !INTEGRAL_TYPE_P (type))
3568     return NULL;
3569 
3570   if ((TREE_CODE (then_clause) != INTEGER_CST
3571        && !type_conversion_p (vinfo, then_clause, false,
3572 			      &orig_type0, &def_stmt0, &promotion))
3573       || (TREE_CODE (else_clause) != INTEGER_CST
3574 	  && !type_conversion_p (vinfo, else_clause, false,
3575 				 &orig_type1, &def_stmt1, &promotion)))
3576     return NULL;
3577 
3578   if (orig_type0 && orig_type1
3579       && !types_compatible_p (orig_type0, orig_type1))
3580     return NULL;
3581 
3582   if (orig_type0)
3583     {
3584       if (!types_compatible_p (orig_type0, comp_scalar_type))
3585 	return NULL;
3586       then_clause = gimple_assign_rhs1 (def_stmt0);
3587       itype = orig_type0;
3588     }
3589 
3590   if (orig_type1)
3591     {
3592       if (!types_compatible_p (orig_type1, comp_scalar_type))
3593 	return NULL;
3594       else_clause = gimple_assign_rhs1 (def_stmt1);
3595       itype = orig_type1;
3596     }
3597 
3598 
3599   HOST_WIDE_INT cmp_mode_size
3600     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3601 
3602   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3603   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3604     return NULL;
3605 
3606   vectype = get_vectype_for_scalar_type (vinfo, type);
3607   if (vectype == NULL_TREE)
3608     return NULL;
3609 
3610   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3611     return NULL;
3612 
3613   if (itype == NULL_TREE)
3614     itype = build_nonstandard_integer_type (cmp_mode_size,
3615   					    TYPE_UNSIGNED (type));
3616 
3617   if (itype == NULL_TREE
3618       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3619     return NULL;
3620 
3621   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3622   if (vecitype == NULL_TREE)
3623     return NULL;
3624 
3625   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3626     return NULL;
3627 
3628   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3629     {
3630       if ((TREE_CODE (then_clause) == INTEGER_CST
3631 	   && !int_fits_type_p (then_clause, itype))
3632 	  || (TREE_CODE (else_clause) == INTEGER_CST
3633 	      && !int_fits_type_p (else_clause, itype)))
3634 	return NULL;
3635     }
3636 
3637   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3638 				  COND_EXPR, unshare_expr (cond_expr),
3639 				  fold_convert (itype, then_clause),
3640 				  fold_convert (itype, else_clause));
3641   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3642 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3643 
3644   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3645   *type_out = vectype;
3646 
3647   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3648 
3649   return pattern_stmt;
3650 }
3651 
3652 
3653 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3654    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3655    in case it's a result of a comparison which can be directly vectorized into
3656    a vector comparison.  Fills in STMTS with all stmts visited during the
3657    walk.  */
3658 
3659 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3660 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3661 {
3662   tree rhs1;
3663   enum tree_code rhs_code;
3664 
3665   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3666   if (!def_stmt_info)
3667     return false;
3668 
3669   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3670   if (!def_stmt)
3671     return false;
3672 
3673   if (stmts.contains (def_stmt))
3674     return true;
3675 
3676   rhs1 = gimple_assign_rhs1 (def_stmt);
3677   rhs_code = gimple_assign_rhs_code (def_stmt);
3678   switch (rhs_code)
3679     {
3680     case SSA_NAME:
3681       if (! check_bool_pattern (rhs1, vinfo, stmts))
3682 	return false;
3683       break;
3684 
3685     CASE_CONVERT:
3686       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3687 	return false;
3688       if (! check_bool_pattern (rhs1, vinfo, stmts))
3689 	return false;
3690       break;
3691 
3692     case BIT_NOT_EXPR:
3693       if (! check_bool_pattern (rhs1, vinfo, stmts))
3694 	return false;
3695       break;
3696 
3697     case BIT_AND_EXPR:
3698     case BIT_IOR_EXPR:
3699     case BIT_XOR_EXPR:
3700       if (! check_bool_pattern (rhs1, vinfo, stmts)
3701 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3702 	return false;
3703       break;
3704 
3705     default:
3706       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3707 	{
3708 	  tree vecitype, comp_vectype;
3709 
3710 	  /* If the comparison can throw, then is_gimple_condexpr will be
3711 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3712 	  if (stmt_could_throw_p (cfun, def_stmt))
3713 	    return false;
3714 
3715 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3716 	  if (comp_vectype == NULL_TREE)
3717 	    return false;
3718 
3719 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
3720 							  TREE_TYPE (rhs1));
3721 	  if (mask_type
3722 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3723 	    return false;
3724 
3725 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3726 	    {
3727 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3728 	      tree itype
3729 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3730 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
3731 	      if (vecitype == NULL_TREE)
3732 		return false;
3733 	    }
3734 	  else
3735 	    vecitype = comp_vectype;
3736 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3737 	    return false;
3738 	}
3739       else
3740 	return false;
3741       break;
3742     }
3743 
3744   bool res = stmts.add (def_stmt);
3745   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3746   gcc_assert (!res);
3747 
3748   return true;
3749 }
3750 
3751 
3752 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3753    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3754    pattern sequence.  */
3755 
3756 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)3757 adjust_bool_pattern_cast (vec_info *vinfo,
3758 			  tree type, tree var, stmt_vec_info stmt_info)
3759 {
3760   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3761 					   NOP_EXPR, var);
3762   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3763 			  get_vectype_for_scalar_type (vinfo, type));
3764   return gimple_assign_lhs (cast_stmt);
3765 }
3766 
3767 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3768    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3769    type, OUT_TYPE is the desired final integer type of the whole pattern.
3770    STMT_INFO is the info of the pattern root and is where pattern stmts should
3771    be associated with.  DEFS is a map of pattern defs.  */
3772 
3773 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3774 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3775 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3776 {
3777   gimple *stmt = SSA_NAME_DEF_STMT (var);
3778   enum tree_code rhs_code, def_rhs_code;
3779   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3780   location_t loc;
3781   gimple *pattern_stmt, *def_stmt;
3782   tree trueval = NULL_TREE;
3783 
3784   rhs1 = gimple_assign_rhs1 (stmt);
3785   rhs2 = gimple_assign_rhs2 (stmt);
3786   rhs_code = gimple_assign_rhs_code (stmt);
3787   loc = gimple_location (stmt);
3788   switch (rhs_code)
3789     {
3790     case SSA_NAME:
3791     CASE_CONVERT:
3792       irhs1 = *defs.get (rhs1);
3793       itype = TREE_TYPE (irhs1);
3794       pattern_stmt
3795 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3796 			       SSA_NAME, irhs1);
3797       break;
3798 
3799     case BIT_NOT_EXPR:
3800       irhs1 = *defs.get (rhs1);
3801       itype = TREE_TYPE (irhs1);
3802       pattern_stmt
3803 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3804 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3805       break;
3806 
3807     case BIT_AND_EXPR:
3808       /* Try to optimize x = y & (a < b ? 1 : 0); into
3809 	 x = (a < b ? y : 0);
3810 
3811 	 E.g. for:
3812 	   bool a_b, b_b, c_b;
3813 	   TYPE d_T;
3814 
3815 	   S1  a_b = x1 CMP1 y1;
3816 	   S2  b_b = x2 CMP2 y2;
3817 	   S3  c_b = a_b & b_b;
3818 	   S4  d_T = (TYPE) c_b;
3819 
3820 	 we would normally emit:
3821 
3822 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3823 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3824 	   S3'  c_T = a_T & b_T;
3825 	   S4'  d_T = c_T;
3826 
3827 	 but we can save one stmt by using the
3828 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3829 	 BIT_AND_EXPR stmt out:
3830 
3831 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3832 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3833 	   S4'  f_T = c_T;
3834 
3835 	 At least when VEC_COND_EXPR is implemented using masks
3836 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3837 	 computes the comparison masks and ands it, in one case with
3838 	 all ones vector, in the other case with a vector register.
3839 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3840 	 often more expensive.  */
3841       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3842       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3843       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3844 	{
3845 	  irhs1 = *defs.get (rhs1);
3846 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3847 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3848 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3849 	    {
3850 	      rhs_code = def_rhs_code;
3851 	      rhs1 = def_rhs1;
3852 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3853 	      trueval = irhs1;
3854 	      goto do_compare;
3855 	    }
3856 	  else
3857 	    irhs2 = *defs.get (rhs2);
3858 	  goto and_ior_xor;
3859 	}
3860       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3861       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3862       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3863 	{
3864 	  irhs2 = *defs.get (rhs2);
3865 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3866 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3867 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3868 	    {
3869 	      rhs_code = def_rhs_code;
3870 	      rhs1 = def_rhs1;
3871 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3872 	      trueval = irhs2;
3873 	      goto do_compare;
3874 	    }
3875 	  else
3876 	    irhs1 = *defs.get (rhs1);
3877 	  goto and_ior_xor;
3878 	}
3879       /* FALLTHRU */
3880     case BIT_IOR_EXPR:
3881     case BIT_XOR_EXPR:
3882       irhs1 = *defs.get (rhs1);
3883       irhs2 = *defs.get (rhs2);
3884     and_ior_xor:
3885       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3886 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3887 	{
3888 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3889 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3890 	  int out_prec = TYPE_PRECISION (out_type);
3891 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3892 	    irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3893 					      stmt_info);
3894 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3895 	    irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3896 					      stmt_info);
3897 	  else
3898 	    {
3899 	      irhs1 = adjust_bool_pattern_cast (vinfo,
3900 						out_type, irhs1, stmt_info);
3901 	      irhs2 = adjust_bool_pattern_cast (vinfo,
3902 						out_type, irhs2, stmt_info);
3903 	    }
3904 	}
3905       itype = TREE_TYPE (irhs1);
3906       pattern_stmt
3907 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3908 			       rhs_code, irhs1, irhs2);
3909       break;
3910 
3911     default:
3912     do_compare:
3913       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3914       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3915 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3916 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3917 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3918 	{
3919 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3920 	  itype
3921 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3922 	}
3923       else
3924 	itype = TREE_TYPE (rhs1);
3925       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3926       if (trueval == NULL_TREE)
3927 	trueval = build_int_cst (itype, 1);
3928       else
3929 	gcc_checking_assert (useless_type_conversion_p (itype,
3930 							TREE_TYPE (trueval)));
3931       pattern_stmt
3932 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3933 			       COND_EXPR, cond_expr, trueval,
3934 			       build_int_cst (itype, 0));
3935       break;
3936     }
3937 
3938   gimple_set_location (pattern_stmt, loc);
3939   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3940 			  get_vectype_for_scalar_type (vinfo, itype));
3941   defs.put (var, gimple_assign_lhs (pattern_stmt));
3942 }
3943 
3944 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3945 
3946 static int
sort_after_uid(const void * p1,const void * p2)3947 sort_after_uid (const void *p1, const void *p2)
3948 {
3949   const gimple *stmt1 = *(const gimple * const *)p1;
3950   const gimple *stmt2 = *(const gimple * const *)p2;
3951   return gimple_uid (stmt1) - gimple_uid (stmt2);
3952 }
3953 
3954 /* Create pattern stmts for all stmts participating in the bool pattern
3955    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3956    OUT_TYPE.  Return the def of the pattern root.  */
3957 
3958 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3959 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3960 		   tree out_type, stmt_vec_info stmt_info)
3961 {
3962   /* Gather original stmts in the bool pattern in their order of appearance
3963      in the IL.  */
3964   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3965   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3966        i != bool_stmt_set.end (); ++i)
3967     bool_stmts.quick_push (*i);
3968   bool_stmts.qsort (sort_after_uid);
3969 
3970   /* Now process them in that order, producing pattern stmts.  */
3971   hash_map <tree, tree> defs;
3972   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3973     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3974 			 out_type, stmt_info, defs);
3975 
3976   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3977   gimple *pattern_stmt
3978     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3979   return gimple_assign_lhs (pattern_stmt);
3980 }
3981 
3982 /* Return the proper type for converting bool VAR into
3983    an integer value or NULL_TREE if no such type exists.
3984    The type is chosen so that the converted value has the
3985    same number of elements as VAR's vector type.  */
3986 
3987 static tree
integer_type_for_mask(tree var,vec_info * vinfo)3988 integer_type_for_mask (tree var, vec_info *vinfo)
3989 {
3990   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3991     return NULL_TREE;
3992 
3993   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3994   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3995     return NULL_TREE;
3996 
3997   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3998 }
3999 
4000 /* Function vect_recog_bool_pattern
4001 
4002    Try to find pattern like following:
4003 
4004      bool a_b, b_b, c_b, d_b, e_b;
4005      TYPE f_T;
4006    loop:
4007      S1  a_b = x1 CMP1 y1;
4008      S2  b_b = x2 CMP2 y2;
4009      S3  c_b = a_b & b_b;
4010      S4  d_b = x3 CMP3 y3;
4011      S5  e_b = c_b | d_b;
4012      S6  f_T = (TYPE) e_b;
4013 
4014    where type 'TYPE' is an integral type.  Or a similar pattern
4015    ending in
4016 
4017      S6  f_Y = e_b ? r_Y : s_Y;
4018 
4019    as results from if-conversion of a complex condition.
4020 
4021    Input:
4022 
4023    * STMT_VINFO: The stmt at the end from which the pattern
4024 		 search begins, i.e. cast of a bool to
4025 		 an integer type.
4026 
4027    Output:
4028 
4029    * TYPE_OUT: The type of the output of this pattern.
4030 
4031    * Return value: A new stmt that will be used to replace the pattern.
4032 
4033 	Assuming size of TYPE is the same as size of all comparisons
4034 	(otherwise some casts would be added where needed), the above
4035 	sequence we create related pattern stmts:
4036 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4037 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4038 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4039 	S5'  e_T = c_T | d_T;
4040 	S6'  f_T = e_T;
4041 
4042 	Instead of the above S3' we could emit:
4043 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4044 	S3'  c_T = a_T | b_T;
4045 	but the above is more efficient.  */
4046 
4047 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4048 vect_recog_bool_pattern (vec_info *vinfo,
4049 			 stmt_vec_info stmt_vinfo, tree *type_out)
4050 {
4051   gimple *last_stmt = stmt_vinfo->stmt;
4052   enum tree_code rhs_code;
4053   tree var, lhs, rhs, vectype;
4054   gimple *pattern_stmt;
4055 
4056   if (!is_gimple_assign (last_stmt))
4057     return NULL;
4058 
4059   var = gimple_assign_rhs1 (last_stmt);
4060   lhs = gimple_assign_lhs (last_stmt);
4061   rhs_code = gimple_assign_rhs_code (last_stmt);
4062 
4063   if (rhs_code == VIEW_CONVERT_EXPR)
4064     var = TREE_OPERAND (var, 0);
4065 
4066   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4067     return NULL;
4068 
4069   hash_set<gimple *> bool_stmts;
4070 
4071   if (CONVERT_EXPR_CODE_P (rhs_code)
4072       || rhs_code == VIEW_CONVERT_EXPR)
4073     {
4074       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4075 	  || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4076 	return NULL;
4077       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4078       if (vectype == NULL_TREE)
4079 	return NULL;
4080 
4081       if (check_bool_pattern (var, vinfo, bool_stmts))
4082 	{
4083 	  rhs = adjust_bool_stmts (vinfo, bool_stmts,
4084 				   TREE_TYPE (lhs), stmt_vinfo);
4085 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4086 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4087 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4088 	  else
4089 	    pattern_stmt
4090 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
4091 	}
4092       else
4093 	{
4094 	  tree type = integer_type_for_mask (var, vinfo);
4095 	  tree cst0, cst1, tmp;
4096 
4097 	  if (!type)
4098 	    return NULL;
4099 
4100 	  /* We may directly use cond with narrowed type to avoid
4101 	     multiple cond exprs with following result packing and
4102 	     perform single cond with packed mask instead.  In case
4103 	     of widening we better make cond first and then extract
4104 	     results.  */
4105 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4106 	    type = TREE_TYPE (lhs);
4107 
4108 	  cst0 = build_int_cst (type, 0);
4109 	  cst1 = build_int_cst (type, 1);
4110 	  tmp = vect_recog_temp_ssa_var (type, NULL);
4111 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4112 
4113 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4114 	    {
4115 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4116 	      append_pattern_def_seq (vinfo, stmt_vinfo,
4117 				      pattern_stmt, new_vectype);
4118 
4119 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4120 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4121 	    }
4122 	}
4123 
4124       *type_out = vectype;
4125       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4126 
4127       return pattern_stmt;
4128     }
4129   else if (rhs_code == COND_EXPR
4130 	   && TREE_CODE (var) == SSA_NAME)
4131     {
4132       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4133       if (vectype == NULL_TREE)
4134 	return NULL;
4135 
4136       /* Build a scalar type for the boolean result that when
4137          vectorized matches the vector type of the result in
4138 	 size and number of elements.  */
4139       unsigned prec
4140 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4141 			       TYPE_VECTOR_SUBPARTS (vectype));
4142 
4143       tree type
4144 	= build_nonstandard_integer_type (prec,
4145 					  TYPE_UNSIGNED (TREE_TYPE (var)));
4146       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4147 	return NULL;
4148 
4149       if (!check_bool_pattern (var, vinfo, bool_stmts))
4150 	return NULL;
4151 
4152       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4153 
4154       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4155       pattern_stmt
4156 	  = gimple_build_assign (lhs, COND_EXPR,
4157 				 build2 (NE_EXPR, boolean_type_node,
4158 					 rhs, build_int_cst (type, 0)),
4159 				 gimple_assign_rhs2 (last_stmt),
4160 				 gimple_assign_rhs3 (last_stmt));
4161       *type_out = vectype;
4162       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4163 
4164       return pattern_stmt;
4165     }
4166   else if (rhs_code == SSA_NAME
4167 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
4168     {
4169       stmt_vec_info pattern_stmt_info;
4170       tree nunits_vectype;
4171       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4172 					   &nunits_vectype)
4173 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4174 	return NULL;
4175 
4176       if (check_bool_pattern (var, vinfo, bool_stmts))
4177 	rhs = adjust_bool_stmts (vinfo, bool_stmts,
4178 				 TREE_TYPE (vectype), stmt_vinfo);
4179       else
4180 	{
4181 	  tree type = integer_type_for_mask (var, vinfo);
4182 	  tree cst0, cst1, new_vectype;
4183 
4184 	  if (!type)
4185 	    return NULL;
4186 
4187 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4188 	    type = TREE_TYPE (vectype);
4189 
4190 	  cst0 = build_int_cst (type, 0);
4191 	  cst1 = build_int_cst (type, 1);
4192 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
4193 
4194 	  rhs = vect_recog_temp_ssa_var (type, NULL);
4195 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4196 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4197 	}
4198 
4199       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4200       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4201 	{
4202 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4203 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4204 	  append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4205 	  rhs = rhs2;
4206 	}
4207       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4208       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4209       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4210       *type_out = vectype;
4211       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4212 
4213       return pattern_stmt;
4214     }
4215   else
4216     return NULL;
4217 }
4218 
4219 
4220 /* A helper for vect_recog_mask_conversion_pattern.  Build
4221    conversion of MASK to a type suitable for masking VECTYPE.
4222    Built statement gets required vectype and is appended to
4223    a pattern sequence of STMT_VINFO.
4224 
4225    Return converted mask.  */
4226 
4227 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4228 build_mask_conversion (vec_info *vinfo,
4229 		       tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4230 {
4231   gimple *stmt;
4232   tree masktype, tmp;
4233 
4234   masktype = truth_type_for (vectype);
4235   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4236   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4237   append_pattern_def_seq (vinfo, stmt_vinfo,
4238 			  stmt, masktype, TREE_TYPE (vectype));
4239 
4240   return tmp;
4241 }
4242 
4243 
4244 /* Function vect_recog_mask_conversion_pattern
4245 
4246    Try to find statements which require boolean type
4247    converison.  Additional conversion statements are
4248    added to handle such cases.  For example:
4249 
4250    bool m_1, m_2, m_3;
4251    int i_4, i_5;
4252    double d_6, d_7;
4253    char c_1, c_2, c_3;
4254 
4255    S1   m_1 = i_4 > i_5;
4256    S2   m_2 = d_6 < d_7;
4257    S3   m_3 = m_1 & m_2;
4258    S4   c_1 = m_3 ? c_2 : c_3;
4259 
4260    Will be transformed into:
4261 
4262    S1   m_1 = i_4 > i_5;
4263    S2   m_2 = d_6 < d_7;
4264    S3'' m_2' = (_Bool[bitsize=32])m_2
4265    S3'  m_3' = m_1 & m_2';
4266    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4267    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4268 
4269 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4270 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4271 				    stmt_vec_info stmt_vinfo, tree *type_out)
4272 {
4273   gimple *last_stmt = stmt_vinfo->stmt;
4274   enum tree_code rhs_code;
4275   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4276   tree vectype1, vectype2;
4277   stmt_vec_info pattern_stmt_info;
4278   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4279   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4280 
4281   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4282   if (is_gimple_call (last_stmt)
4283       && gimple_call_internal_p (last_stmt))
4284     {
4285       gcall *pattern_stmt;
4286 
4287       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4288       int mask_argno = internal_fn_mask_index (ifn);
4289       if (mask_argno < 0)
4290 	return NULL;
4291 
4292       bool store_p = internal_store_fn_p (ifn);
4293       if (store_p)
4294 	{
4295 	  int rhs_index = internal_fn_stored_value_index (ifn);
4296 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
4297 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4298 	}
4299       else
4300 	{
4301 	  lhs = gimple_call_lhs (last_stmt);
4302 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4303 	}
4304 
4305       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4306       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4307       if (!mask_arg_type)
4308 	return NULL;
4309       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4310 
4311       if (!vectype1 || !vectype2
4312 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4313 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4314 	return NULL;
4315 
4316       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4317 
4318       auto_vec<tree, 8> args;
4319       unsigned int nargs = gimple_call_num_args (last_stmt);
4320       args.safe_grow (nargs, true);
4321       for (unsigned int i = 0; i < nargs; ++i)
4322 	args[i] = ((int) i == mask_argno
4323 		   ? tmp
4324 		   : gimple_call_arg (last_stmt, i));
4325       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4326 
4327       if (!store_p)
4328 	{
4329 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4330 	  gimple_call_set_lhs (pattern_stmt, lhs);
4331 	}
4332       gimple_call_set_nothrow (pattern_stmt, true);
4333 
4334       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4335       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4336 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4337 
4338       *type_out = vectype1;
4339       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4340 
4341       return pattern_stmt;
4342     }
4343 
4344   if (!is_gimple_assign (last_stmt))
4345     return NULL;
4346 
4347   gimple *pattern_stmt;
4348   lhs = gimple_assign_lhs (last_stmt);
4349   rhs1 = gimple_assign_rhs1 (last_stmt);
4350   rhs_code = gimple_assign_rhs_code (last_stmt);
4351 
4352   /* Check for cond expression requiring mask conversion.  */
4353   if (rhs_code == COND_EXPR)
4354     {
4355       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4356 
4357       if (TREE_CODE (rhs1) == SSA_NAME)
4358 	{
4359 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
4360 	  if (!rhs1_type)
4361 	    return NULL;
4362 	}
4363       else if (COMPARISON_CLASS_P (rhs1))
4364 	{
4365 	  /* Check whether we're comparing scalar booleans and (if so)
4366 	     whether a better mask type exists than the mask associated
4367 	     with boolean-sized elements.  This avoids unnecessary packs
4368 	     and unpacks if the booleans are set from comparisons of
4369 	     wider types.  E.g. in:
4370 
4371 	       int x1, x2, x3, x4, y1, y1;
4372 	       ...
4373 	       bool b1 = (x1 == x2);
4374 	       bool b2 = (x3 == x4);
4375 	       ... = b1 == b2 ? y1 : y2;
4376 
4377 	     it is better for b1 and b2 to use the mask type associated
4378 	     with int elements rather bool (byte) elements.  */
4379 	  rhs1_op0 = TREE_OPERAND (rhs1, 0);
4380 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
4381 	  if (!rhs1_op0 || !rhs1_op1)
4382 	    return NULL;
4383 	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4384 	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4385 
4386 	  if (!rhs1_op0_type)
4387 	    rhs1_type = TREE_TYPE (rhs1_op0);
4388 	  else if (!rhs1_op1_type)
4389 	    rhs1_type = TREE_TYPE (rhs1_op1);
4390 	  else if (TYPE_PRECISION (rhs1_op0_type)
4391 		   != TYPE_PRECISION (rhs1_op1_type))
4392 	    {
4393 	      int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4394 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4395 	      int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4396 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4397 	      if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4398 		{
4399 		  if (abs (tmp0) > abs (tmp1))
4400 		    rhs1_type = rhs1_op1_type;
4401 		  else
4402 		    rhs1_type = rhs1_op0_type;
4403 		}
4404 	      else
4405 		rhs1_type = build_nonstandard_integer_type
4406 		  (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4407 	    }
4408 	  else
4409 	    rhs1_type = rhs1_op0_type;
4410 	}
4411       else
4412 	return NULL;
4413 
4414       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4415 
4416       if (!vectype1 || !vectype2)
4417 	return NULL;
4418 
4419       /* Continue if a conversion is needed.  Also continue if we have
4420 	 a comparison whose vector type would normally be different from
4421 	 VECTYPE2 when considered in isolation.  In that case we'll
4422 	 replace the comparison with an SSA name (so that we can record
4423 	 its vector type) and behave as though the comparison was an SSA
4424 	 name from the outset.  */
4425       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4426 		    TYPE_VECTOR_SUBPARTS (vectype2))
4427 	  && !rhs1_op0_type
4428 	  && !rhs1_op1_type)
4429 	return NULL;
4430 
4431       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4432          in place, we can handle it in vectorizable_condition.  This avoids
4433 	 unnecessary promotion stmts and increased vectorization factor.  */
4434       if (COMPARISON_CLASS_P (rhs1)
4435 	  && INTEGRAL_TYPE_P (rhs1_type)
4436 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4437 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4438 	{
4439 	  enum vect_def_type dt;
4440 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4441 	      && dt == vect_external_def
4442 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4443 	      && (dt == vect_external_def
4444 		  || dt == vect_constant_def))
4445 	    {
4446 	      tree wide_scalar_type = build_nonstandard_integer_type
4447 		(vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4448 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
4449 							   wide_scalar_type);
4450 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4451 		return NULL;
4452 	    }
4453 	}
4454 
4455       /* If rhs1 is a comparison we need to move it into a
4456 	 separate statement.  */
4457       if (TREE_CODE (rhs1) != SSA_NAME)
4458 	{
4459 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4460 	  if (rhs1_op0_type
4461 	      && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4462 	    rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4463 					      vectype2, stmt_vinfo);
4464 	  if (rhs1_op1_type
4465 	      && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4466 	    rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4467 				      vectype2, stmt_vinfo);
4468 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4469 					      rhs1_op0, rhs1_op1);
4470 	  rhs1 = tmp;
4471 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4472 				  rhs1_type);
4473 	}
4474 
4475       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4476 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4477 	tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4478       else
4479 	tmp = rhs1;
4480 
4481       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4482       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4483 					  gimple_assign_rhs2 (last_stmt),
4484 					  gimple_assign_rhs3 (last_stmt));
4485 
4486       *type_out = vectype1;
4487       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4488 
4489       return pattern_stmt;
4490     }
4491 
4492   /* Now check for binary boolean operations requiring conversion for
4493      one of operands.  */
4494   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4495     return NULL;
4496 
4497   if (rhs_code != BIT_IOR_EXPR
4498       && rhs_code != BIT_XOR_EXPR
4499       && rhs_code != BIT_AND_EXPR
4500       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4501     return NULL;
4502 
4503   rhs2 = gimple_assign_rhs2 (last_stmt);
4504 
4505   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4506   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4507 
4508   if (!rhs1_type || !rhs2_type
4509       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4510     return NULL;
4511 
4512   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4513     {
4514       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4515       if (!vectype1)
4516 	return NULL;
4517       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4518     }
4519   else
4520     {
4521       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4522       if (!vectype1)
4523 	return NULL;
4524       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4525     }
4526 
4527   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4528   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4529 
4530   *type_out = vectype1;
4531   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4532 
4533   return pattern_stmt;
4534 }
4535 
4536 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4537    the boolean condition under which it occurs, otherwise return null.  */
4538 
4539 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4540 vect_get_load_store_mask (stmt_vec_info stmt_info)
4541 {
4542   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4543     {
4544       gcc_assert (gimple_assign_single_p (def_assign));
4545       return NULL_TREE;
4546     }
4547 
4548   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4549     {
4550       internal_fn ifn = gimple_call_internal_fn (def_call);
4551       int mask_index = internal_fn_mask_index (ifn);
4552       return gimple_call_arg (def_call, mask_index);
4553     }
4554 
4555   gcc_unreachable ();
4556 }
4557 
4558 /* Return MASK if MASK is suitable for masking an operation on vectors
4559    of type VECTYPE, otherwise convert it into such a form and return
4560    the result.  Associate any conversion statements with STMT_INFO's
4561    pattern.  */
4562 
4563 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4564 vect_convert_mask_for_vectype (tree mask, tree vectype,
4565 			       stmt_vec_info stmt_info, vec_info *vinfo)
4566 {
4567   tree mask_type = integer_type_for_mask (mask, vinfo);
4568   if (mask_type)
4569     {
4570       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4571       if (mask_vectype
4572 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4573 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4574 	mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4575     }
4576   return mask;
4577 }
4578 
4579 /* Return the equivalent of:
4580 
4581      fold_convert (TYPE, VALUE)
4582 
4583    with the expectation that the operation will be vectorized.
4584    If new statements are needed, add them as pattern statements
4585    to STMT_INFO.  */
4586 
4587 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4588 vect_add_conversion_to_pattern (vec_info *vinfo,
4589 				tree type, tree value, stmt_vec_info stmt_info)
4590 {
4591   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4592     return value;
4593 
4594   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4595   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4596   append_pattern_def_seq (vinfo, stmt_info, conversion,
4597 			  get_vectype_for_scalar_type (vinfo, type));
4598   return new_value;
4599 }
4600 
4601 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4602    internal function.  Return the final statement on success and set
4603    *TYPE_OUT to the vector type being loaded or stored.
4604 
4605    This function only handles gathers and scatters that were recognized
4606    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4607 
4608 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4609 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4610 				   stmt_vec_info stmt_info, tree *type_out)
4611 {
4612   /* Currently we only support this for loop vectorization.  */
4613   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4614   if (!loop_vinfo)
4615     return NULL;
4616 
4617   /* Make sure that we're looking at a gather load or scatter store.  */
4618   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4619   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4620     return NULL;
4621 
4622   /* Get the boolean that controls whether the load or store happens.
4623      This is null if the operation is unconditional.  */
4624   tree mask = vect_get_load_store_mask (stmt_info);
4625 
4626   /* Make sure that the target supports an appropriate internal
4627      function for the gather/scatter operation.  */
4628   gather_scatter_info gs_info;
4629   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4630       || gs_info.decl)
4631     return NULL;
4632 
4633   /* Convert the mask to the right form.  */
4634   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4635 						 gs_info.element_type);
4636   if (mask)
4637     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4638 					  loop_vinfo);
4639 
4640   /* Get the invariant base and non-invariant offset, converting the
4641      latter to the same width as the vector elements.  */
4642   tree base = gs_info.base;
4643   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4644   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4645 						gs_info.offset, stmt_info);
4646 
4647   /* Build the new pattern statement.  */
4648   tree scale = size_int (gs_info.scale);
4649   gcall *pattern_stmt;
4650   if (DR_IS_READ (dr))
4651     {
4652       tree zero = build_zero_cst (gs_info.element_type);
4653       if (mask != NULL)
4654 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4655 						   offset, scale, zero, mask);
4656       else
4657 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4658 						   offset, scale, zero);
4659       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4660       gimple_call_set_lhs (pattern_stmt, load_lhs);
4661     }
4662   else
4663     {
4664       tree rhs = vect_get_store_rhs (stmt_info);
4665       if (mask != NULL)
4666 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4667 						   base, offset, scale, rhs,
4668 						   mask);
4669       else
4670 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4671 						   base, offset, scale, rhs);
4672     }
4673   gimple_call_set_nothrow (pattern_stmt, true);
4674 
4675   /* Copy across relevant vectorization info and associate DR with the
4676      new pattern statement instead of the original statement.  */
4677   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4678   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4679 
4680   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4681   *type_out = vectype;
4682   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4683 
4684   return pattern_stmt;
4685 }
4686 
4687 /* Return true if TYPE is a non-boolean integer type.  These are the types
4688    that we want to consider for narrowing.  */
4689 
4690 static bool
vect_narrowable_type_p(tree type)4691 vect_narrowable_type_p (tree type)
4692 {
4693   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4694 }
4695 
4696 /* Return true if the operation given by CODE can be truncated to N bits
4697    when only N bits of the output are needed.  This is only true if bit N+1
4698    of the inputs has no effect on the low N bits of the result.  */
4699 
4700 static bool
vect_truncatable_operation_p(tree_code code)4701 vect_truncatable_operation_p (tree_code code)
4702 {
4703   switch (code)
4704     {
4705     case PLUS_EXPR:
4706     case MINUS_EXPR:
4707     case MULT_EXPR:
4708     case BIT_AND_EXPR:
4709     case BIT_IOR_EXPR:
4710     case BIT_XOR_EXPR:
4711     case COND_EXPR:
4712       return true;
4713 
4714     default:
4715       return false;
4716     }
4717 }
4718 
4719 /* Record that STMT_INFO could be changed from operating on TYPE to
4720    operating on a type with the precision and sign given by PRECISION
4721    and SIGN respectively.  PRECISION is an arbitrary bit precision;
4722    it might not be a whole number of bytes.  */
4723 
4724 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4725 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4726 			 unsigned int precision, signop sign)
4727 {
4728   /* Round the precision up to a whole number of bytes.  */
4729   precision = vect_element_precision (precision);
4730   if (precision < TYPE_PRECISION (type)
4731       && (!stmt_info->operation_precision
4732 	  || stmt_info->operation_precision > precision))
4733     {
4734       stmt_info->operation_precision = precision;
4735       stmt_info->operation_sign = sign;
4736     }
4737 }
4738 
4739 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4740    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
4741    is an arbitrary bit precision; it might not be a whole number of bytes.  */
4742 
4743 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4744 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4745 			      unsigned int min_input_precision)
4746 {
4747   /* This operation in isolation only requires the inputs to have
4748      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
4749      that MIN_INPUT_PRECISION is a natural precision for the chain
4750      as a whole.  E.g. consider something like:
4751 
4752 	 unsigned short *x, *y;
4753 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4754 
4755      The right shift can be done on unsigned chars, and only requires the
4756      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
4757      approach would mean turning a natural chain of single-vector unsigned
4758      short operations into one that truncates "*x" and then extends
4759      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4760      operation and one vector for each unsigned char operation.
4761      This would be a significant pessimization.
4762 
4763      Instead only propagate the maximum of this precision and the precision
4764      required by the users of the result.  This means that we don't pessimize
4765      the case above but continue to optimize things like:
4766 
4767 	 unsigned char *y;
4768 	 unsigned short *x;
4769 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4770 
4771      Here we would truncate two vectors of *x to a single vector of
4772      unsigned chars and use single-vector unsigned char operations for
4773      everything else, rather than doing two unsigned short copies of
4774      "(*x & 0xf0) >> 4" and then truncating the result.  */
4775   min_input_precision = MAX (min_input_precision,
4776 			     stmt_info->min_output_precision);
4777 
4778   if (min_input_precision < TYPE_PRECISION (type)
4779       && (!stmt_info->min_input_precision
4780 	  || stmt_info->min_input_precision > min_input_precision))
4781     stmt_info->min_input_precision = min_input_precision;
4782 }
4783 
4784 /* Subroutine of vect_determine_min_output_precision.  Return true if
4785    we can calculate a reduced number of output bits for STMT_INFO,
4786    whose result is LHS.  */
4787 
4788 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)4789 vect_determine_min_output_precision_1 (vec_info *vinfo,
4790 				       stmt_vec_info stmt_info, tree lhs)
4791 {
4792   /* Take the maximum precision required by users of the result.  */
4793   unsigned int precision = 0;
4794   imm_use_iterator iter;
4795   use_operand_p use;
4796   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4797     {
4798       gimple *use_stmt = USE_STMT (use);
4799       if (is_gimple_debug (use_stmt))
4800 	continue;
4801       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4802       if (!use_stmt_info || !use_stmt_info->min_input_precision)
4803 	return false;
4804       /* The input precision recorded for COND_EXPRs applies only to the
4805 	 "then" and "else" values.  */
4806       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4807       if (assign
4808 	  && gimple_assign_rhs_code (assign) == COND_EXPR
4809 	  && use->use != gimple_assign_rhs2_ptr (assign)
4810 	  && use->use != gimple_assign_rhs3_ptr (assign))
4811 	return false;
4812       precision = MAX (precision, use_stmt_info->min_input_precision);
4813     }
4814 
4815   if (dump_enabled_p ())
4816     dump_printf_loc (MSG_NOTE, vect_location,
4817 		     "only the low %d bits of %T are significant\n",
4818 		     precision, lhs);
4819   stmt_info->min_output_precision = precision;
4820   return true;
4821 }
4822 
4823 /* Calculate min_output_precision for STMT_INFO.  */
4824 
4825 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)4826 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4827 {
4828   /* We're only interested in statements with a narrowable result.  */
4829   tree lhs = gimple_get_lhs (stmt_info->stmt);
4830   if (!lhs
4831       || TREE_CODE (lhs) != SSA_NAME
4832       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4833     return;
4834 
4835   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4836     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4837 }
4838 
4839 /* Use range information to decide whether STMT (described by STMT_INFO)
4840    could be done in a narrower type.  This is effectively a forward
4841    propagation, since it uses context-independent information that applies
4842    to all users of an SSA name.  */
4843 
4844 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4845 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4846 {
4847   tree lhs = gimple_assign_lhs (stmt);
4848   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4849     return;
4850 
4851   tree type = TREE_TYPE (lhs);
4852   if (!vect_narrowable_type_p (type))
4853     return;
4854 
4855   /* First see whether we have any useful range information for the result.  */
4856   unsigned int precision = TYPE_PRECISION (type);
4857   signop sign = TYPE_SIGN (type);
4858   wide_int min_value, max_value;
4859   if (!vect_get_range_info (lhs, &min_value, &max_value))
4860     return;
4861 
4862   tree_code code = gimple_assign_rhs_code (stmt);
4863   unsigned int nops = gimple_num_ops (stmt);
4864 
4865   if (!vect_truncatable_operation_p (code))
4866     /* Check that all relevant input operands are compatible, and update
4867        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
4868     for (unsigned int i = 1; i < nops; ++i)
4869       {
4870 	tree op = gimple_op (stmt, i);
4871 	if (TREE_CODE (op) == INTEGER_CST)
4872 	  {
4873 	    /* Don't require the integer to have RHS_TYPE (which it might
4874 	       not for things like shift amounts, etc.), but do require it
4875 	       to fit the type.  */
4876 	    if (!int_fits_type_p (op, type))
4877 	      return;
4878 
4879 	    min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4880 	    max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4881 	  }
4882 	else if (TREE_CODE (op) == SSA_NAME)
4883 	  {
4884 	    /* Ignore codes that don't take uniform arguments.  */
4885 	    if (!types_compatible_p (TREE_TYPE (op), type))
4886 	      return;
4887 
4888 	    wide_int op_min_value, op_max_value;
4889 	    if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4890 	      return;
4891 
4892 	    min_value = wi::min (min_value, op_min_value, sign);
4893 	    max_value = wi::max (max_value, op_max_value, sign);
4894 	  }
4895 	else
4896 	  return;
4897       }
4898 
4899   /* Try to switch signed types for unsigned types if we can.
4900      This is better for two reasons.  First, unsigned ops tend
4901      to be cheaper than signed ops.  Second, it means that we can
4902      handle things like:
4903 
4904 	signed char c;
4905 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4906 
4907      as:
4908 
4909 	signed char c;
4910 	unsigned short res_1 = (unsigned short) c & 0xff00;
4911 	int res = (int) res_1;
4912 
4913      where the intermediate result res_1 has unsigned rather than
4914      signed type.  */
4915   if (sign == SIGNED && !wi::neg_p (min_value))
4916     sign = UNSIGNED;
4917 
4918   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
4919   unsigned int precision1 = wi::min_precision (min_value, sign);
4920   unsigned int precision2 = wi::min_precision (max_value, sign);
4921   unsigned int value_precision = MAX (precision1, precision2);
4922   if (value_precision >= precision)
4923     return;
4924 
4925   if (dump_enabled_p ())
4926     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4927 		     " without loss of precision: %G",
4928 		     sign == SIGNED ? "signed" : "unsigned",
4929 		     value_precision, stmt);
4930 
4931   vect_set_operation_type (stmt_info, type, value_precision, sign);
4932   vect_set_min_input_precision (stmt_info, type, value_precision);
4933 }
4934 
4935 /* Use information about the users of STMT's result to decide whether
4936    STMT (described by STMT_INFO) could be done in a narrower type.
4937    This is effectively a backward propagation.  */
4938 
4939 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4940 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4941 {
4942   tree_code code = gimple_assign_rhs_code (stmt);
4943   unsigned int opno = (code == COND_EXPR ? 2 : 1);
4944   tree type = TREE_TYPE (gimple_op (stmt, opno));
4945   if (!vect_narrowable_type_p (type))
4946     return;
4947 
4948   unsigned int precision = TYPE_PRECISION (type);
4949   unsigned int operation_precision, min_input_precision;
4950   switch (code)
4951     {
4952     CASE_CONVERT:
4953       /* Only the bits that contribute to the output matter.  Don't change
4954 	 the precision of the operation itself.  */
4955       operation_precision = precision;
4956       min_input_precision = stmt_info->min_output_precision;
4957       break;
4958 
4959     case LSHIFT_EXPR:
4960     case RSHIFT_EXPR:
4961       {
4962 	tree shift = gimple_assign_rhs2 (stmt);
4963 	if (TREE_CODE (shift) != INTEGER_CST
4964 	    || !wi::ltu_p (wi::to_widest (shift), precision))
4965 	  return;
4966 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
4967 	if (code == LSHIFT_EXPR)
4968 	  {
4969 	    /* Avoid creating an undefined shift.
4970 
4971 	       ??? We could instead use min_output_precision as-is and
4972 	       optimize out-of-range shifts to zero.  However, only
4973 	       degenerate testcases shift away all their useful input data,
4974 	       and it isn't natural to drop input operations in the middle
4975 	       of vectorization.  This sort of thing should really be
4976 	       handled before vectorization.  */
4977 	    operation_precision = MAX (stmt_info->min_output_precision,
4978 				       const_shift + 1);
4979 	    /* We need CONST_SHIFT fewer bits of the input.  */
4980 	    min_input_precision = (MAX (operation_precision, const_shift)
4981 				   - const_shift);
4982 	  }
4983 	else
4984 	  {
4985 	    /* We need CONST_SHIFT extra bits to do the operation.  */
4986 	    operation_precision = (stmt_info->min_output_precision
4987 				   + const_shift);
4988 	    min_input_precision = operation_precision;
4989 	  }
4990 	break;
4991       }
4992 
4993     default:
4994       if (vect_truncatable_operation_p (code))
4995 	{
4996 	  /* Input bit N has no effect on output bits N-1 and lower.  */
4997 	  operation_precision = stmt_info->min_output_precision;
4998 	  min_input_precision = operation_precision;
4999 	  break;
5000 	}
5001       return;
5002     }
5003 
5004   if (operation_precision < precision)
5005     {
5006       if (dump_enabled_p ())
5007 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5008 			 " without affecting users: %G",
5009 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5010 			 operation_precision, stmt);
5011       vect_set_operation_type (stmt_info, type, operation_precision,
5012 			       TYPE_SIGN (type));
5013     }
5014   vect_set_min_input_precision (stmt_info, type, min_input_precision);
5015 }
5016 
5017 /* Return true if the statement described by STMT_INFO sets a boolean
5018    SSA_NAME and if we know how to vectorize this kind of statement using
5019    vector mask types.  */
5020 
5021 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5022 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5023 {
5024   tree lhs = gimple_get_lhs (stmt_info->stmt);
5025   if (!lhs
5026       || TREE_CODE (lhs) != SSA_NAME
5027       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5028     return false;
5029 
5030   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5031     {
5032       tree_code rhs_code = gimple_assign_rhs_code (assign);
5033       switch (rhs_code)
5034 	{
5035 	CASE_CONVERT:
5036 	case SSA_NAME:
5037 	case BIT_NOT_EXPR:
5038 	case BIT_IOR_EXPR:
5039 	case BIT_XOR_EXPR:
5040 	case BIT_AND_EXPR:
5041 	  return true;
5042 
5043 	default:
5044 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5045 	}
5046     }
5047   else if (is_a <gphi *> (stmt_info->stmt))
5048     return true;
5049   return false;
5050 }
5051 
5052 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5053    a vector mask type instead of a normal vector type.  Record the
5054    result in STMT_INFO->mask_precision.  */
5055 
5056 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5057 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5058 {
5059   if (!possible_vector_mask_operation_p (stmt_info))
5060     return;
5061 
5062   /* If at least one boolean input uses a vector mask type,
5063      pick the mask type with the narrowest elements.
5064 
5065      ??? This is the traditional behavior.  It should always produce
5066      the smallest number of operations, but isn't necessarily the
5067      optimal choice.  For example, if we have:
5068 
5069        a = b & c
5070 
5071      where:
5072 
5073        - the user of a wants it to have a mask type for 16-bit elements (M16)
5074        - b also uses M16
5075        - c uses a mask type for 8-bit elements (M8)
5076 
5077      then picking M8 gives:
5078 
5079        - 1 M16->M8 pack for b
5080        - 1 M8 AND for a
5081        - 2 M8->M16 unpacks for the user of a
5082 
5083      whereas picking M16 would have given:
5084 
5085        - 2 M8->M16 unpacks for c
5086        - 2 M16 ANDs for a
5087 
5088      The number of operations are equal, but M16 would have given
5089      a shorter dependency chain and allowed more ILP.  */
5090   unsigned int precision = ~0U;
5091   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5092     {
5093       unsigned int nops = gimple_num_ops (assign);
5094       for (unsigned int i = 1; i < nops; ++i)
5095 	{
5096 	  tree rhs = gimple_op (assign, i);
5097 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5098 	    continue;
5099 
5100 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5101 	  if (!def_stmt_info)
5102 	    /* Don't let external or constant operands influence the choice.
5103 	       We can convert them to whichever vector type we pick.  */
5104 	    continue;
5105 
5106 	  if (def_stmt_info->mask_precision)
5107 	    {
5108 	      if (precision > def_stmt_info->mask_precision)
5109 		precision = def_stmt_info->mask_precision;
5110 	    }
5111 	}
5112 
5113       /* If the statement compares two values that shouldn't use vector masks,
5114 	 try comparing the values as normal scalars instead.  */
5115       tree_code rhs_code = gimple_assign_rhs_code (assign);
5116       if (precision == ~0U
5117 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5118 	{
5119 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5120 	  scalar_mode mode;
5121 	  tree vectype, mask_type;
5122 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5123 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5124 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5125 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5126 	    precision = GET_MODE_BITSIZE (mode);
5127 	}
5128     }
5129   else
5130     {
5131       gphi *phi = as_a <gphi *> (stmt_info->stmt);
5132       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5133 	{
5134 	  tree rhs = gimple_phi_arg_def (phi, i);
5135 
5136 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5137 	  if (!def_stmt_info)
5138 	    /* Don't let external or constant operands influence the choice.
5139 	       We can convert them to whichever vector type we pick.  */
5140 	    continue;
5141 
5142 	  if (def_stmt_info->mask_precision)
5143 	    {
5144 	      if (precision > def_stmt_info->mask_precision)
5145 		precision = def_stmt_info->mask_precision;
5146 	    }
5147 	}
5148     }
5149 
5150   if (dump_enabled_p ())
5151     {
5152       if (precision == ~0U)
5153 	dump_printf_loc (MSG_NOTE, vect_location,
5154 			 "using normal nonmask vectors for %G",
5155 			 stmt_info->stmt);
5156       else
5157 	dump_printf_loc (MSG_NOTE, vect_location,
5158 			 "using boolean precision %d for %G",
5159 			 precision, stmt_info->stmt);
5160     }
5161 
5162   stmt_info->mask_precision = precision;
5163 }
5164 
5165 /* Handle vect_determine_precisions for STMT_INFO, given that we
5166    have already done so for the users of its result.  */
5167 
5168 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5169 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5170 {
5171   vect_determine_min_output_precision (vinfo, stmt_info);
5172   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5173     {
5174       vect_determine_precisions_from_range (stmt_info, stmt);
5175       vect_determine_precisions_from_users (stmt_info, stmt);
5176     }
5177 }
5178 
5179 /* Walk backwards through the vectorizable region to determine the
5180    values of these fields:
5181 
5182    - min_output_precision
5183    - min_input_precision
5184    - operation_precision
5185    - operation_sign.  */
5186 
5187 void
vect_determine_precisions(vec_info * vinfo)5188 vect_determine_precisions (vec_info *vinfo)
5189 {
5190   DUMP_VECT_SCOPE ("vect_determine_precisions");
5191 
5192   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5193     {
5194       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5195       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5196       unsigned int nbbs = loop->num_nodes;
5197 
5198       for (unsigned int i = 0; i < nbbs; i++)
5199 	{
5200 	  basic_block bb = bbs[i];
5201 	  for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5202 	    if (!is_gimple_debug (gsi_stmt (si)))
5203 	      vect_determine_mask_precision
5204 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5205 	}
5206       for (unsigned int i = 0; i < nbbs; i++)
5207 	{
5208 	  basic_block bb = bbs[nbbs - i - 1];
5209 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
5210 	       !gsi_end_p (si); gsi_prev (&si))
5211 	    if (!is_gimple_debug (gsi_stmt (si)))
5212 	      vect_determine_stmt_precisions
5213 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5214 	}
5215     }
5216   else
5217     {
5218       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5219       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5220 	{
5221 	  basic_block bb = bb_vinfo->bbs[i];
5222 	  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5223 	    {
5224 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5225 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5226 		vect_determine_mask_precision (vinfo, stmt_info);
5227 	    }
5228 	  for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5229 	    {
5230 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5231 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5232 		vect_determine_mask_precision (vinfo, stmt_info);
5233 	    }
5234 	}
5235       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5236 	{
5237 	  for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5238 	       !gsi_end_p (gsi); gsi_prev (&gsi))
5239 	    {
5240 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5241 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5242 		vect_determine_stmt_precisions (vinfo, stmt_info);
5243 	    }
5244 	  for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5245 	       !gsi_end_p (gsi); gsi_next (&gsi))
5246 	    {
5247 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5248 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5249 		vect_determine_stmt_precisions (vinfo, stmt_info);
5250 	    }
5251 	}
5252     }
5253 }
5254 
5255 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5256 
5257 struct vect_recog_func
5258 {
5259   vect_recog_func_ptr fn;
5260   const char *name;
5261 };
5262 
5263 /* Note that ordering matters - the first pattern matching on a stmt is
5264    taken which means usually the more complex one needs to preceed the
5265    less comples onex (widen_sum only after dot_prod or sad for example).  */
5266 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5267   { vect_recog_over_widening_pattern, "over_widening" },
5268   /* Must come after over_widening, which narrows the shift as much as
5269      possible beforehand.  */
5270   { vect_recog_average_pattern, "average" },
5271   { vect_recog_mulhs_pattern, "mult_high" },
5272   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5273   { vect_recog_widen_mult_pattern, "widen_mult" },
5274   { vect_recog_dot_prod_pattern, "dot_prod" },
5275   { vect_recog_sad_pattern, "sad" },
5276   { vect_recog_widen_sum_pattern, "widen_sum" },
5277   { vect_recog_pow_pattern, "pow" },
5278   { vect_recog_widen_shift_pattern, "widen_shift" },
5279   { vect_recog_rotate_pattern, "rotate" },
5280   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5281   { vect_recog_divmod_pattern, "divmod" },
5282   { vect_recog_mult_pattern, "mult" },
5283   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5284   { vect_recog_bool_pattern, "bool" },
5285   /* This must come before mask conversion, and includes the parts
5286      of mask conversion that are needed for gather and scatter
5287      internal functions.  */
5288   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5289   { vect_recog_mask_conversion_pattern, "mask_conversion" },
5290   { vect_recog_widen_plus_pattern, "widen_plus" },
5291   { vect_recog_widen_minus_pattern, "widen_minus" },
5292 };
5293 
5294 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5295 
5296 /* Mark statements that are involved in a pattern.  */
5297 
5298 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5299 vect_mark_pattern_stmts (vec_info *vinfo,
5300 			 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5301                          tree pattern_vectype)
5302 {
5303   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5304   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5305 
5306   gimple *orig_pattern_stmt = NULL;
5307   if (is_pattern_stmt_p (orig_stmt_info))
5308     {
5309       /* We're replacing a statement in an existing pattern definition
5310 	 sequence.  */
5311       orig_pattern_stmt = orig_stmt_info->stmt;
5312       if (dump_enabled_p ())
5313 	dump_printf_loc (MSG_NOTE, vect_location,
5314 			 "replacing earlier pattern %G", orig_pattern_stmt);
5315 
5316       /* To keep the book-keeping simple, just swap the lhs of the
5317 	 old and new statements, so that the old one has a valid but
5318 	 unused lhs.  */
5319       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5320       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5321       gimple_set_lhs (pattern_stmt, old_lhs);
5322 
5323       if (dump_enabled_p ())
5324 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5325 
5326       /* Switch to the statement that ORIG replaces.  */
5327       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5328 
5329       /* We shouldn't be replacing the main pattern statement.  */
5330       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5331 		  != orig_pattern_stmt);
5332     }
5333 
5334   if (def_seq)
5335     for (gimple_stmt_iterator si = gsi_start (def_seq);
5336 	 !gsi_end_p (si); gsi_next (&si))
5337       {
5338 	if (dump_enabled_p ())
5339 	  dump_printf_loc (MSG_NOTE, vect_location,
5340 			   "extra pattern stmt: %G", gsi_stmt (si));
5341 	stmt_vec_info pattern_stmt_info
5342 	  = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5343 				    orig_stmt_info, pattern_vectype);
5344 	/* Stmts in the def sequence are not vectorizable cycle or
5345 	   induction defs, instead they should all be vect_internal_def
5346 	   feeding the main pattern stmt which retains this def type.  */
5347 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5348       }
5349 
5350   if (orig_pattern_stmt)
5351     {
5352       vect_init_pattern_stmt (vinfo, pattern_stmt,
5353 			      orig_stmt_info, pattern_vectype);
5354 
5355       /* Insert all the new pattern statements before the original one.  */
5356       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5357       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5358 					       orig_def_seq);
5359       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5360       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5361 
5362       /* Remove the pattern statement that this new pattern replaces.  */
5363       gsi_remove (&gsi, false);
5364     }
5365   else
5366     vect_set_pattern_stmt (vinfo,
5367 			   pattern_stmt, orig_stmt_info, pattern_vectype);
5368 
5369   /* Transfer reduction path info to the pattern.  */
5370   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5371     {
5372       tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5373 				1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5374       /* Search the pattern def sequence and the main pattern stmt.  Note
5375          we may have inserted all into a containing pattern def sequence
5376 	 so the following is a bit awkward.  */
5377       gimple_stmt_iterator si;
5378       gimple *s;
5379       if (def_seq)
5380 	{
5381 	  si = gsi_start (def_seq);
5382 	  s = gsi_stmt (si);
5383 	  gsi_next (&si);
5384 	}
5385       else
5386 	{
5387 	  si = gsi_none ();
5388 	  s = pattern_stmt;
5389 	}
5390       do
5391 	{
5392 	  bool found = false;
5393 	  for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5394 	    if (gimple_op (s, i) == lookfor)
5395 	      {
5396 		STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5397 		lookfor = gimple_get_lhs (s);
5398 		found = true;
5399 		break;
5400 	      }
5401 	  if (s == pattern_stmt)
5402 	    {
5403 	      if (!found && dump_enabled_p ())
5404 		dump_printf_loc (MSG_NOTE, vect_location,
5405 				 "failed to update reduction index.\n");
5406 	      break;
5407 	    }
5408 	  if (gsi_end_p (si))
5409 	    s = pattern_stmt;
5410 	  else
5411 	    {
5412 	      s = gsi_stmt (si);
5413 	      if (s == pattern_stmt)
5414 		/* Found the end inside a bigger pattern def seq.  */
5415 		si = gsi_none ();
5416 	      else
5417 		gsi_next (&si);
5418 	    }
5419 	} while (1);
5420     }
5421 }
5422 
5423 /* Function vect_pattern_recog_1
5424 
5425    Input:
5426    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5427         computation pattern.
5428    STMT_INFO: A stmt from which the pattern search should start.
5429 
5430    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5431    a sequence of statements that has the same functionality and can be
5432    used to replace STMT_INFO.  It returns the last statement in the sequence
5433    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5434    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5435    statement, having first checked that the target supports the new operation
5436    in that type.
5437 
5438    This function also does some bookkeeping, as explained in the documentation
5439    for vect_recog_pattern.  */
5440 
5441 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5442 vect_pattern_recog_1 (vec_info *vinfo,
5443 		      vect_recog_func *recog_func, stmt_vec_info stmt_info)
5444 {
5445   gimple *pattern_stmt;
5446   loop_vec_info loop_vinfo;
5447   tree pattern_vectype;
5448 
5449   /* If this statement has already been replaced with pattern statements,
5450      leave the original statement alone, since the first match wins.
5451      Instead try to match against the definition statements that feed
5452      the main pattern statement.  */
5453   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5454     {
5455       gimple_stmt_iterator gsi;
5456       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5457 	   !gsi_end_p (gsi); gsi_next (&gsi))
5458 	vect_pattern_recog_1 (vinfo, recog_func,
5459 			      vinfo->lookup_stmt (gsi_stmt (gsi)));
5460       return;
5461     }
5462 
5463   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5464   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5465   if (!pattern_stmt)
5466     {
5467       /* Clear any half-formed pattern definition sequence.  */
5468       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5469       return;
5470     }
5471 
5472   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5473   gcc_assert (pattern_vectype);
5474 
5475   /* Found a vectorizable pattern.  */
5476   if (dump_enabled_p ())
5477     dump_printf_loc (MSG_NOTE, vect_location,
5478 		     "%s pattern recognized: %G",
5479 		     recog_func->name, pattern_stmt);
5480 
5481   /* Mark the stmts that are involved in the pattern. */
5482   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5483 
5484   /* Patterns cannot be vectorized using SLP, because they change the order of
5485      computation.  */
5486   if (loop_vinfo)
5487     {
5488       unsigned ix, ix2;
5489       stmt_vec_info *elem_ptr;
5490       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5491 			     elem_ptr, *elem_ptr == stmt_info);
5492     }
5493 }
5494 
5495 
5496 /* Function vect_pattern_recog
5497 
5498    Input:
5499    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5500         computation idioms.
5501 
5502    Output - for each computation idiom that is detected we create a new stmt
5503         that provides the same functionality and that can be vectorized.  We
5504         also record some information in the struct_stmt_info of the relevant
5505         stmts, as explained below:
5506 
5507    At the entry to this function we have the following stmts, with the
5508    following initial value in the STMT_VINFO fields:
5509 
5510          stmt                     in_pattern_p  related_stmt    vec_stmt
5511          S1: a_i = ....                 -       -               -
5512          S2: a_2 = ..use(a_i)..         -       -               -
5513          S3: a_1 = ..use(a_2)..         -       -               -
5514          S4: a_0 = ..use(a_1)..         -       -               -
5515          S5: ... = ..use(a_0)..         -       -               -
5516 
5517    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5518    represented by a single stmt.  We then:
5519    - create a new stmt S6 equivalent to the pattern (the stmt is not
5520      inserted into the code)
5521    - fill in the STMT_VINFO fields as follows:
5522 
5523                                   in_pattern_p  related_stmt    vec_stmt
5524          S1: a_i = ....                 -       -               -
5525          S2: a_2 = ..use(a_i)..         -       -               -
5526          S3: a_1 = ..use(a_2)..         -       -               -
5527          S4: a_0 = ..use(a_1)..         true    S6              -
5528           '---> S6: a_new = ....        -       S4              -
5529          S5: ... = ..use(a_0)..         -       -               -
5530 
5531    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5532    to each other through the RELATED_STMT field).
5533 
5534    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5535    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5536    remain irrelevant unless used by stmts other than S4.
5537 
5538    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5539    (because they are marked as irrelevant).  It will vectorize S6, and record
5540    a pointer to the new vector stmt VS6 from S6 (as usual).
5541    S4 will be skipped, and S5 will be vectorized as usual:
5542 
5543                                   in_pattern_p  related_stmt    vec_stmt
5544          S1: a_i = ....                 -       -               -
5545          S2: a_2 = ..use(a_i)..         -       -               -
5546          S3: a_1 = ..use(a_2)..         -       -               -
5547        > VS6: va_new = ....             -       -               -
5548          S4: a_0 = ..use(a_1)..         true    S6              VS6
5549           '---> S6: a_new = ....        -       S4              VS6
5550        > VS5: ... = ..vuse(va_new)..    -       -               -
5551          S5: ... = ..use(a_0)..         -       -               -
5552 
5553    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5554    elsewhere), and we'll end up with:
5555 
5556         VS6: va_new = ....
5557         VS5: ... = ..vuse(va_new)..
5558 
5559    In case of more than one pattern statements, e.g., widen-mult with
5560    intermediate type:
5561 
5562      S1  a_t = ;
5563      S2  a_T = (TYPE) a_t;
5564            '--> S3: a_it = (interm_type) a_t;
5565      S4  prod_T = a_T * CONST;
5566            '--> S5: prod_T' = a_it w* CONST;
5567 
5568    there may be other users of a_T outside the pattern.  In that case S2 will
5569    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5570    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5571    be recorded in S3.  */
5572 
5573 void
vect_pattern_recog(vec_info * vinfo)5574 vect_pattern_recog (vec_info *vinfo)
5575 {
5576   class loop *loop;
5577   basic_block *bbs;
5578   unsigned int nbbs;
5579   gimple_stmt_iterator si;
5580   unsigned int i, j;
5581 
5582   vect_determine_precisions (vinfo);
5583 
5584   DUMP_VECT_SCOPE ("vect_pattern_recog");
5585 
5586   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5587     {
5588       loop = LOOP_VINFO_LOOP (loop_vinfo);
5589       bbs = LOOP_VINFO_BBS (loop_vinfo);
5590       nbbs = loop->num_nodes;
5591 
5592       /* Scan through the loop stmts, applying the pattern recognition
5593 	 functions starting at each stmt visited:  */
5594       for (i = 0; i < nbbs; i++)
5595 	{
5596 	  basic_block bb = bbs[i];
5597 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5598 	    {
5599 	      if (is_gimple_debug (gsi_stmt (si)))
5600 		continue;
5601 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5602 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
5603 	      for (j = 0; j < NUM_PATTERNS; j++)
5604 		vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5605 				      stmt_info);
5606 	    }
5607 	}
5608     }
5609   else
5610     {
5611       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5612       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5613 	for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5614 	     !gsi_end_p (gsi); gsi_next (&gsi))
5615 	  {
5616 	    stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5617 	    if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5618 	      continue;
5619 
5620 	    /* Scan over all generic vect_recog_xxx_pattern functions.  */
5621 	    for (j = 0; j < NUM_PATTERNS; j++)
5622 	      vect_pattern_recog_1 (vinfo,
5623 				    &vect_vect_recog_func_ptrs[j], stmt_info);
5624 	  }
5625     }
5626 
5627   /* After this no more add_stmt calls are allowed.  */
5628   vinfo->stmt_vec_info_ro = true;
5629 }
5630