1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84 }
85
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
99
100 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 stmt_vec_info orig_stmt_info, tree vectype)
103 {
104 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
106 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108
109 pattern_stmt_info->pattern_stmt_p = true;
110 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
114 {
115 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 == vect_use_mask_type_p (orig_stmt_info));
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
119 }
120 return pattern_stmt_info;
121 }
122
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 have one already. */
126
127 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 stmt_vec_info orig_stmt_info, tree vectype)
130 {
131 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132 STMT_VINFO_RELATED_STMT (orig_stmt_info)
133 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
134 }
135
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
137 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138 be different from the vector type of the final pattern statement.
139 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140 from which it was derived. */
141
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
147 {
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 if (vectype)
151 {
152 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154 if (scalar_type_for_mask)
155 new_stmt_info->mask_precision
156 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
157 }
158 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 new_stmt);
160 }
161
162 /* The caller wants to perform new operations on vect_external variable
163 VAR, so that the result of the operations would also be vect_external.
164 Return the edge on which the operations can be performed, if one exists.
165 Return null if the operations should instead be treated as part of
166 the pattern that needs them. */
167
168 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)169 vect_get_external_def_edge (vec_info *vinfo, tree var)
170 {
171 edge e = NULL;
172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
173 {
174 e = loop_preheader_edge (loop_vinfo->loop);
175 if (!SSA_NAME_IS_DEFAULT_DEF (var))
176 {
177 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 if (bb == NULL
179 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 e = NULL;
181 }
182 }
183 return e;
184 }
185
186 /* Return true if the target supports a vector version of CODE,
187 where CODE is known to map to a direct optab. ITYPE specifies
188 the type of (some of) the scalar inputs and OTYPE specifies the
189 type of the scalar result.
190
191 If CODE allows the inputs and outputs to have different type
192 (such as for WIDEN_SUM_EXPR), it is the input mode rather
193 than the output mode that determines the appropriate target pattern.
194 Operand 0 of the target pattern then specifies the mode that the output
195 must have.
196
197 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199 is nonnull. */
200
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 tree itype, tree *vecotype_out,
204 tree *vecitype_out = NULL)
205 {
206 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207 if (!vecitype)
208 return false;
209
210 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211 if (!vecotype)
212 return false;
213
214 optab optab = optab_for_tree_code (code, vecitype, optab_default);
215 if (!optab)
216 return false;
217
218 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219 if (icode == CODE_FOR_nothing
220 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221 return false;
222
223 *vecotype_out = vecotype;
224 if (vecitype_out)
225 *vecitype_out = vecitype;
226 return true;
227 }
228
229 /* Round bit precision PRECISION up to a full element. */
230
231 static unsigned int
vect_element_precision(unsigned int precision)232 vect_element_precision (unsigned int precision)
233 {
234 precision = 1 << ceil_log2 (precision);
235 return MAX (precision, BITS_PER_UNIT);
236 }
237
238 /* If OP is defined by a statement that's being considered for vectorization,
239 return information about that statement, otherwise return NULL. */
240
241 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)242 vect_get_internal_def (vec_info *vinfo, tree op)
243 {
244 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245 if (def_stmt_info
246 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247 return def_stmt_info;
248 return NULL;
249 }
250
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252 is a result of a type promotion, such that:
253 DEF_STMT: NAME = NOP (name0)
254 If CHECK_SIGN is TRUE, check that either both types are signed or both are
255 unsigned. */
256
257 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 tree *orig_type, gimple **def_stmt, bool *promotion)
260 {
261 tree type = TREE_TYPE (name);
262 tree oprnd0;
263 enum vect_def_type dt;
264
265 stmt_vec_info def_stmt_info;
266 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267 return false;
268
269 if (dt != vect_internal_def
270 && dt != vect_external_def && dt != vect_constant_def)
271 return false;
272
273 if (!*def_stmt)
274 return false;
275
276 if (!is_gimple_assign (*def_stmt))
277 return false;
278
279 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280 return false;
281
282 oprnd0 = gimple_assign_rhs1 (*def_stmt);
283
284 *orig_type = TREE_TYPE (oprnd0);
285 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287 return false;
288
289 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290 *promotion = true;
291 else
292 *promotion = false;
293
294 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295 return false;
296
297 return true;
298 }
299
300 /* Holds information about an input operand after some sign changes
301 and type promotions have been peeled away. */
302 class vect_unpromoted_value {
303 public:
304 vect_unpromoted_value ();
305
306 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
307
308 /* The value obtained after peeling away zero or more casts. */
309 tree op;
310
311 /* The type of OP. */
312 tree type;
313
314 /* The definition type of OP. */
315 vect_def_type dt;
316
317 /* If OP is the result of peeling at least one cast, and if the cast
318 of OP itself is a vectorizable statement, CASTER identifies that
319 statement, otherwise it is null. */
320 stmt_vec_info caster;
321 };
322
vect_unpromoted_value()323 inline vect_unpromoted_value::vect_unpromoted_value ()
324 : op (NULL_TREE),
325 type (NULL_TREE),
326 dt (vect_uninitialized_def),
327 caster (NULL)
328 {
329 }
330
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332 statement that casts it to CASTER_IN. */
333
334 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 stmt_vec_info caster_in)
337 {
338 op = op_in;
339 type = TREE_TYPE (op);
340 dt = dt_in;
341 caster = caster_in;
342 }
343
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345 to reach some vectorizable inner operand OP', continuing as long as it
346 is possible to convert OP' back to OP using a possible sign change
347 followed by a possible promotion P. Return this OP', or null if OP is
348 not a vectorizable SSA name. If there is a promotion P, describe its
349 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
350 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351 have more than one user.
352
353 A successful return means that it is possible to go from OP' to OP
354 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
355 whereas the cast from UNPROM to OP might be a promotion, a sign
356 change, or a nop.
357
358 E.g. say we have:
359
360 signed short *ptr = ...;
361 signed short C = *ptr;
362 unsigned short B = (unsigned short) C; // sign change
363 signed int A = (signed int) B; // unsigned promotion
364 ...possible other uses of A...
365 unsigned int OP = (unsigned int) A; // sign change
366
367 In this case it's possible to go directly from C to OP using:
368
369 OP = (unsigned int) (unsigned short) C;
370 +------------+ +--------------+
371 promotion sign change
372
373 so OP' would be C. The input to the promotion is B, so UNPROM
374 would describe B. */
375
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 vect_unpromoted_value *unprom,
379 bool *single_use_p = NULL)
380 {
381 tree res = NULL_TREE;
382 tree op_type = TREE_TYPE (op);
383 unsigned int orig_precision = TYPE_PRECISION (op_type);
384 unsigned int min_precision = orig_precision;
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
387 {
388 /* See whether OP is simple enough to vectorize. */
389 stmt_vec_info def_stmt_info;
390 gimple *def_stmt;
391 vect_def_type dt;
392 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 break;
394
395 /* If OP is the input of a demotion, skip over it to see whether
396 OP is itself the result of a promotion. If so, the combined
397 effect of the promotion and the demotion might fit the required
398 pattern, otherwise neither operation fits.
399
400 This copes with cases such as the result of an arithmetic
401 operation being truncated before being stored, and where that
402 arithmetic operation has been recognized as an over-widened one. */
403 if (TYPE_PRECISION (op_type) <= min_precision)
404 {
405 /* Use OP as the UNPROM described above if we haven't yet
406 found a promotion, or if using the new input preserves the
407 sign of the previous promotion. */
408 if (!res
409 || TYPE_PRECISION (unprom->type) == orig_precision
410 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
411 {
412 unprom->set_op (op, dt, caster);
413 min_precision = TYPE_PRECISION (op_type);
414 }
415 /* Stop if we've already seen a promotion and if this
416 conversion does more than change the sign. */
417 else if (TYPE_PRECISION (op_type)
418 != TYPE_PRECISION (unprom->type))
419 break;
420
421 /* The sequence now extends to OP. */
422 res = op;
423 }
424
425 /* See whether OP is defined by a cast. Record it as CASTER if
426 the cast is potentially vectorizable. */
427 if (!def_stmt)
428 break;
429 caster = def_stmt_info;
430
431 /* Ignore pattern statements, since we don't link uses for them. */
432 if (caster
433 && single_use_p
434 && !STMT_VINFO_RELATED_STMT (caster)
435 && !has_single_use (res))
436 *single_use_p = false;
437
438 gassign *assign = dyn_cast <gassign *> (def_stmt);
439 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 break;
441
442 /* Continue with the input to the cast. */
443 op = gimple_assign_rhs1 (def_stmt);
444 op_type = TREE_TYPE (op);
445 }
446 return res;
447 }
448
449 /* OP is an integer operand to an operation that returns TYPE, and we
450 want to treat the operation as a widening one. So far we can treat
451 it as widening from *COMMON_TYPE.
452
453 Return true if OP is suitable for such a widening operation,
454 either widening from *COMMON_TYPE or from some supertype of it.
455 Update *COMMON_TYPE to the supertype in the latter case.
456
457 SHIFT_P is true if OP is a shift amount. */
458
459 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 tree *common_type)
462 {
463 /* Calculate the minimum precision required by OP, without changing
464 the sign of either operand. */
465 unsigned int precision;
466 if (shift_p)
467 {
468 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 return false;
470 precision = TREE_INT_CST_LOW (op);
471 }
472 else
473 {
474 precision = wi::min_precision (wi::to_widest (op),
475 TYPE_SIGN (*common_type));
476 if (precision * 2 > TYPE_PRECISION (type))
477 return false;
478 }
479
480 /* If OP requires a wider type, switch to that type. The checks
481 above ensure that this is still narrower than the result. */
482 precision = vect_element_precision (precision);
483 if (TYPE_PRECISION (*common_type) < precision)
484 *common_type = build_nonstandard_integer_type
485 (precision, TYPE_UNSIGNED (*common_type));
486 return true;
487 }
488
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
491
492 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
494 {
495 if (types_compatible_p (*common_type, new_type))
496 return true;
497
498 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
499 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501 return true;
502
503 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
504 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
506 {
507 *common_type = new_type;
508 return true;
509 }
510
511 /* We have mismatched signs, with the signed type being
512 no wider than the unsigned type. In this case we need
513 a wider signed type. */
514 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 TYPE_PRECISION (new_type));
516 precision *= 2;
517 if (precision * 2 > TYPE_PRECISION (type))
518 return false;
519
520 *common_type = build_nonstandard_integer_type (precision, false);
521 return true;
522 }
523
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525 in which each node either performs CODE or WIDENED_CODE, and where
526 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
527 specifies the maximum number of leaf operands. SHIFT_P says whether
528 CODE and WIDENED_CODE are some sort of shift.
529
530 If STMT_INFO is such a tree, return the number of leaf operands
531 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
532 to a type that (a) is narrower than the result of STMT_INFO and
533 (b) can hold all leaf operand values.
534
535 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536 exists. */
537
538 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 tree_code widened_code, bool shift_p,
541 unsigned int max_nops,
542 vect_unpromoted_value *unprom, tree *common_type)
543 {
544 /* Check for an integer operation with the right code. */
545 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546 if (!assign)
547 return 0;
548
549 tree_code rhs_code = gimple_assign_rhs_code (assign);
550 if (rhs_code != code && rhs_code != widened_code)
551 return 0;
552
553 tree type = gimple_expr_type (assign);
554 if (!INTEGRAL_TYPE_P (type))
555 return 0;
556
557 /* Assume that both operands will be leaf operands. */
558 max_nops -= 2;
559
560 /* Check the operands. */
561 unsigned int next_op = 0;
562 for (unsigned int i = 0; i < 2; ++i)
563 {
564 vect_unpromoted_value *this_unprom = &unprom[next_op];
565 unsigned int nops = 1;
566 tree op = gimple_op (assign, i + 1);
567 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
568 {
569 /* We already have a common type from earlier operands.
570 Update it to account for OP. */
571 this_unprom->set_op (op, vect_constant_def);
572 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 return 0;
574 }
575 else
576 {
577 /* Only allow shifts by constants. */
578 if (shift_p && i == 1)
579 return 0;
580
581 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 return 0;
583
584 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 {
586 /* The operand isn't widened. If STMT_INFO has the code
587 for an unwidened operation, recursively check whether
588 this operand is a node of the tree. */
589 if (rhs_code != code
590 || max_nops == 0
591 || this_unprom->dt != vect_internal_def)
592 return 0;
593
594 /* Give back the leaf slot allocated above now that we're
595 not treating this as a leaf operand. */
596 max_nops += 1;
597
598 /* Recursively process the definition of the operand. */
599 stmt_vec_info def_stmt_info
600 = vinfo->lookup_def (this_unprom->op);
601 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 widened_code, shift_p, max_nops,
603 this_unprom, common_type);
604 if (nops == 0)
605 return 0;
606
607 max_nops -= nops;
608 }
609 else
610 {
611 /* Make sure that the operand is narrower than the result. */
612 if (TYPE_PRECISION (this_unprom->type) * 2
613 > TYPE_PRECISION (type))
614 return 0;
615
616 /* Update COMMON_TYPE for the new operand. */
617 if (i == 0)
618 *common_type = this_unprom->type;
619 else if (!vect_joust_widened_type (type, this_unprom->type,
620 common_type))
621 return 0;
622 }
623 }
624 next_op += nops;
625 }
626 return next_op;
627 }
628
629 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
630 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631
632 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 {
635 return make_temp_ssa_name (type, stmt, "patt");
636 }
637
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639 followed by a version of STMT2_INFO that takes NEW_RHS as its first
640 input. Try to do this using pattern statements, returning true on
641 success. */
642
643 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 gimple *stmt1, tree vectype)
646 {
647 if (is_pattern_stmt_p (stmt2_info))
648 {
649 /* STMT2_INFO is part of a pattern. Get the statement to which
650 the pattern is attached. */
651 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
653
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "Splitting pattern statement: %G", stmt2_info->stmt);
657
658 /* Since STMT2_INFO is a pattern statement, we can change it
659 in-situ without worrying about changing the code for the
660 containing block. */
661 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
662
663 if (dump_enabled_p ())
664 {
665 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 stmt2_info->stmt);
668 }
669
670 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 /* STMT2_INFO is the actual pattern statement. Add STMT1
673 to the end of the definition sequence. */
674 gimple_seq_add_stmt_without_update (def_seq, stmt1);
675 else
676 {
677 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
678 before it. */
679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
681 }
682 return true;
683 }
684 else
685 {
686 /* STMT2_INFO doesn't yet have a pattern. Try to create a
687 two-statement pattern now. */
688 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691 if (!lhs_vectype)
692 return false;
693
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE, vect_location,
696 "Splitting statement: %G", stmt2_info->stmt);
697
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
702
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
707
708 if (dump_enabled_p ())
709 {
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: %G", stmt1);
712 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
713 }
714
715 return true;
716 }
717 }
718
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720 to STMT_INFO's pattern definition statements if no better way is
721 available. VECTYPE is the vector form of TYPE. */
722
723 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 vect_unpromoted_value *unprom, tree vectype)
726 {
727 /* Check for a no-op conversion. */
728 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729 return unprom->op;
730
731 /* Allow the caller to create constant vect_unpromoted_values. */
732 if (TREE_CODE (unprom->op) == INTEGER_CST)
733 return wide_int_to_tree (type, wi::to_widest (unprom->op));
734
735 tree input = unprom->op;
736 if (unprom->caster)
737 {
738 tree lhs = gimple_get_lhs (unprom->caster->stmt);
739 tree lhs_type = TREE_TYPE (lhs);
740
741 /* If the result of the existing cast is the right width, use it
742 instead of the source of the cast. */
743 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 input = lhs;
745 /* If the precision we want is between the source and result
746 precisions of the existing cast, try splitting the cast into
747 two and tapping into a mid-way point. */
748 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
750 {
751 /* In order to preserve the semantics of the original cast,
752 give the mid-way point the same signedness as the input value.
753
754 It would be possible to use a signed type here instead if
755 TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 make the sign of the midtype sensitive to the order in
757 which we process the statements, since the signedness of
758 TYPE is the signedness required by just one of possibly
759 many users. Also, unsigned promotions are usually as cheap
760 as or cheaper than signed ones, so it's better to keep an
761 unsigned promotion. */
762 tree midtype = build_nonstandard_integer_type
763 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 if (vec_midtype)
766 {
767 input = vect_recog_temp_ssa_var (midtype, NULL);
768 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 unprom->op);
770 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 vec_midtype))
772 append_pattern_def_seq (vinfo, stmt_info,
773 new_stmt, vec_midtype);
774 }
775 }
776
777 /* See if we can reuse an existing result. */
778 if (types_compatible_p (type, TREE_TYPE (input)))
779 return input;
780 }
781
782 /* We need a new conversion statement. */
783 tree new_op = vect_recog_temp_ssa_var (type, NULL);
784 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
785
786 /* If OP is an external value, see if we can insert the new statement
787 on an incoming edge. */
788 if (input == unprom->op && unprom->dt == vect_external_def)
789 if (edge e = vect_get_external_def_edge (vinfo, input))
790 {
791 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 gcc_assert (!new_bb);
793 return new_op;
794 }
795
796 /* As a (common) last resort, add the statement to the pattern itself. */
797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798 return new_op;
799 }
800
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802 result in the corresponding elements of RESULT. */
803
804 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 tree *result, tree type, vect_unpromoted_value *unprom,
807 tree vectype)
808 {
809 for (unsigned int i = 0; i < n; ++i)
810 {
811 unsigned int j;
812 for (j = 0; j < i; ++j)
813 if (unprom[j].op == unprom[i].op)
814 break;
815 if (j < i)
816 result[i] = result[j];
817 else
818 result[i] = vect_convert_input (vinfo, stmt_info,
819 type, &unprom[i], vectype);
820 }
821 }
822
823 /* The caller has created a (possibly empty) sequence of pattern definition
824 statements followed by a single statement PATTERN_STMT. Cast the result
825 of this final statement to TYPE. If a new statement is needed, add
826 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827 and return the new statement, otherwise return PATTERN_STMT as-is.
828 VECITYPE is the vector form of PATTERN_STMT's result type. */
829
830 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 gimple *pattern_stmt, tree vecitype)
833 {
834 tree lhs = gimple_get_lhs (pattern_stmt);
835 if (!types_compatible_p (type, TREE_TYPE (lhs)))
836 {
837 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
840 }
841 return pattern_stmt;
842 }
843
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845 is allowed. If STMT_INFO is part of a group, assume that it's part of
846 a reduction chain and optimistically assume that all statements
847 except the last allow reassociation.
848 Also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
851
852 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)853 vect_reassociating_reduction_p (vec_info *vinfo,
854 stmt_vec_info stmt_info, tree_code code,
855 tree *op0_out, tree *op1_out)
856 {
857 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858 if (!loop_info)
859 return false;
860
861 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862 if (!assign || gimple_assign_rhs_code (assign) != code)
863 return false;
864
865 /* We don't allow changing the order of the computation in the inner-loop
866 when doing outer-loop vectorization. */
867 class loop *loop = LOOP_VINFO_LOOP (loop_info);
868 if (loop && nested_in_vect_loop_p (loop, stmt_info))
869 return false;
870
871 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
872 {
873 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 code))
875 return false;
876 }
877 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878 return false;
879
880 *op0_out = gimple_assign_rhs1 (assign);
881 *op1_out = gimple_assign_rhs2 (assign);
882 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883 std::swap (*op0_out, *op1_out);
884 return true;
885 }
886
887 /* Function vect_recog_dot_prod_pattern
888
889 Try to find the following pattern:
890
891 type x_t, y_t;
892 TYPE1 prod;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 prod = x_T * y_T;
901 [S6 prod = (TYPE2) prod; #optional]
902 S7 sum_1 = prod + sum_0;
903
904 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905 same size of 'TYPE1' or bigger. This is a special case of a reduction
906 computation.
907
908 Input:
909
910 * STMT_VINFO: The stmt from which the pattern search begins. In the
911 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912 will be detected.
913
914 Output:
915
916 * TYPE_OUT: The type of the output of this pattern.
917
918 * Return value: A new stmt that will be used to replace the sequence of
919 stmts that constitute the pattern. In this case it will be:
920 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
921
922 Note: The dot-prod idiom is a widening reduction pattern that is
923 vectorized without preserving all the intermediate results. It
924 produces only N/2 (widened) results (by summing up pairs of
925 intermediate results) rather than all N results. Therefore, we
926 cannot allow this pattern when we want to get all the results and in
927 the correct order (as is the case when this computation is in an
928 inner-loop nested in an outer-loop that us being vectorized). */
929
930 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 stmt_vec_info stmt_vinfo, tree *type_out)
933 {
934 tree oprnd0, oprnd1;
935 gimple *last_stmt = stmt_vinfo->stmt;
936 tree type, half_type;
937 gimple *pattern_stmt;
938 tree var;
939
940 /* Look for the following pattern
941 DX = (TYPE1) X;
942 DY = (TYPE1) Y;
943 DPROD = DX * DY;
944 DDPROD = (TYPE2) DPROD;
945 sum_1 = DDPROD + sum_0;
946 In which
947 - DX is double the size of X
948 - DY is double the size of Y
949 - DX, DY, DPROD all have the same type
950 - sum is the same size of DPROD or bigger
951 - sum has been recognized as a reduction variable.
952
953 This is equivalent to:
954 DPROD = X w* Y; #widen mult
955 sum_1 = DPROD w+ sum_0; #widen summation
956 or
957 DPROD = X w* Y; #widen mult
958 sum_1 = DPROD + sum_0; #summation
959 */
960
961 /* Starting from LAST_STMT, follow the defs of its uses in search
962 of the above pattern. */
963
964 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 &oprnd0, &oprnd1))
966 return NULL;
967
968 type = gimple_expr_type (last_stmt);
969
970 vect_unpromoted_value unprom_mult;
971 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
972
973 /* So far so good. Since last_stmt was detected as a (summation) reduction,
974 we know that oprnd1 is the reduction variable (defined by a loop-header
975 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976 Left to check that oprnd0 is defined by a (widen_)mult_expr */
977 if (!oprnd0)
978 return NULL;
979
980 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981 if (!mult_vinfo)
982 return NULL;
983
984 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
985 inside the loop (in case we are analyzing an outer-loop). */
986 vect_unpromoted_value unprom0[2];
987 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 false, 2, unprom0, &half_type))
989 return NULL;
990
991 /* If there are two widening operations, make sure they agree on
992 the sign of the extension. */
993 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995 return NULL;
996
997 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
998
999 tree half_vectype;
1000 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 type_out, &half_vectype))
1002 return NULL;
1003
1004 /* Get the inputs in the appropriate types. */
1005 tree mult_oprnd[2];
1006 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 unprom0, half_vectype);
1008
1009 var = vect_recog_temp_ssa_var (type, NULL);
1010 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 mult_oprnd[0], mult_oprnd[1], oprnd1);
1012
1013 return pattern_stmt;
1014 }
1015
1016
1017 /* Function vect_recog_sad_pattern
1018
1019 Try to find the following Sum of Absolute Difference (SAD) pattern:
1020
1021 type x_t, y_t;
1022 signed TYPE1 diff, abs_diff;
1023 TYPE2 sum = init;
1024 loop:
1025 sum_0 = phi <init, sum_1>
1026 S1 x_t = ...
1027 S2 y_t = ...
1028 S3 x_T = (TYPE1) x_t;
1029 S4 y_T = (TYPE1) y_t;
1030 S5 diff = x_T - y_T;
1031 S6 abs_diff = ABS_EXPR <diff>;
1032 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1033 S8 sum_1 = abs_diff + sum_0;
1034
1035 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036 same size of 'TYPE1' or bigger. This is a special case of a reduction
1037 computation.
1038
1039 Input:
1040
1041 * STMT_VINFO: The stmt from which the pattern search begins. In the
1042 example, when this function is called with S8, the pattern
1043 {S3,S4,S5,S6,S7,S8} will be detected.
1044
1045 Output:
1046
1047 * TYPE_OUT: The type of the output of this pattern.
1048
1049 * Return value: A new stmt that will be used to replace the sequence of
1050 stmts that constitute the pattern. In this case it will be:
1051 SAD_EXPR <x_t, y_t, sum_0>
1052 */
1053
1054 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 stmt_vec_info stmt_vinfo, tree *type_out)
1057 {
1058 gimple *last_stmt = stmt_vinfo->stmt;
1059 tree half_type;
1060
1061 /* Look for the following pattern
1062 DX = (TYPE1) X;
1063 DY = (TYPE1) Y;
1064 DDIFF = DX - DY;
1065 DAD = ABS_EXPR <DDIFF>;
1066 DDPROD = (TYPE2) DPROD;
1067 sum_1 = DAD + sum_0;
1068 In which
1069 - DX is at least double the size of X
1070 - DY is at least double the size of Y
1071 - DX, DY, DDIFF, DAD all have the same type
1072 - sum is the same size of DAD or bigger
1073 - sum has been recognized as a reduction variable.
1074
1075 This is equivalent to:
1076 DDIFF = X w- Y; #widen sub
1077 DAD = ABS_EXPR <DDIFF>;
1078 sum_1 = DAD w+ sum_0; #widen summation
1079 or
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD + sum_0; #summation
1083 */
1084
1085 /* Starting from LAST_STMT, follow the defs of its uses in search
1086 of the above pattern. */
1087
1088 tree plus_oprnd0, plus_oprnd1;
1089 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 &plus_oprnd0, &plus_oprnd1))
1091 return NULL;
1092
1093 tree sum_type = gimple_expr_type (last_stmt);
1094
1095 /* Any non-truncating sequence of conversions is OK here, since
1096 with a successful match, the result of the ABS(U) is known to fit
1097 within the nonnegative range of the result type. (It cannot be the
1098 negative of the minimum signed value due to the range of the widening
1099 MINUS_EXPR.) */
1100 vect_unpromoted_value unprom_abs;
1101 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 &unprom_abs);
1103
1104 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1105 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107 Then check that plus_oprnd0 is defined by an abs_expr. */
1108
1109 if (!plus_oprnd0)
1110 return NULL;
1111
1112 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113 if (!abs_stmt_vinfo)
1114 return NULL;
1115
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119 if (!abs_stmt
1120 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122 return NULL;
1123
1124 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125 tree abs_type = TREE_TYPE (abs_oprnd);
1126 if (TYPE_UNSIGNED (abs_type))
1127 return NULL;
1128
1129 /* Peel off conversions from the ABS input. This can involve sign
1130 changes (e.g. from an unsigned subtraction to a signed ABS input)
1131 or signed promotion, but it can't include unsigned promotion.
1132 (Note that ABS of an unsigned promotion should have been folded
1133 away before now anyway.) */
1134 vect_unpromoted_value unprom_diff;
1135 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 &unprom_diff);
1137 if (!abs_oprnd)
1138 return NULL;
1139 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140 && TYPE_UNSIGNED (unprom_diff.type))
1141 return NULL;
1142
1143 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1144 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145 if (!diff_stmt_vinfo)
1146 return NULL;
1147
1148 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1149 inside the loop (in case we are analyzing an outer-loop). */
1150 vect_unpromoted_value unprom[2];
1151 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1152 false, 2, unprom, &half_type))
1153 return NULL;
1154
1155 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1156
1157 tree half_vectype;
1158 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 type_out, &half_vectype))
1160 return NULL;
1161
1162 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1163 tree sad_oprnd[2];
1164 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 unprom, half_vectype);
1166
1167 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 sad_oprnd[1], plus_oprnd1);
1170
1171 return pattern_stmt;
1172 }
1173
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175 so that it can be treated as though it had the form:
1176
1177 A_TYPE a;
1178 B_TYPE b;
1179 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1180 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1181 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1182 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1183 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1184
1185 Try to replace the pattern with:
1186
1187 A_TYPE a;
1188 B_TYPE b;
1189 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1190 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1191 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1193
1194 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1195
1196 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1197 name of the pattern being matched, for dump purposes. */
1198
1199 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 stmt_vec_info last_stmt_info, tree *type_out,
1202 tree_code orig_code, tree_code wide_code,
1203 bool shift_p, const char *name)
1204 {
1205 gimple *last_stmt = last_stmt_info->stmt;
1206
1207 vect_unpromoted_value unprom[2];
1208 tree half_type;
1209 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 shift_p, 2, unprom, &half_type))
1211 return NULL;
1212
1213 /* Pattern detected. */
1214 vect_pattern_detected (name, last_stmt);
1215
1216 tree type = gimple_expr_type (last_stmt);
1217 tree itype = type;
1218 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 TYPE_UNSIGNED (half_type));
1222
1223 /* Check target support */
1224 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226 tree ctype = itype;
1227 tree vecctype = vecitype;
1228 if (orig_code == MINUS_EXPR
1229 && TYPE_UNSIGNED (itype)
1230 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1231 {
1232 /* Subtraction is special, even if half_type is unsigned and no matter
1233 whether type is signed or unsigned, if type is wider than itype,
1234 we need to sign-extend from the widening operation result to the
1235 result type.
1236 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1237 itype unsigned short and type either int or unsigned int.
1238 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1239 (unsigned short) 0xffff, but for type int we want the result -1
1240 and for type unsigned int 0xffffffff rather than 0xffff. */
1241 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1242 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1243 }
1244
1245 enum tree_code dummy_code;
1246 int dummy_int;
1247 auto_vec<tree> dummy_vec;
1248 if (!vectype
1249 || !vecitype
1250 || !vecctype
1251 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1252 vecitype, vectype,
1253 &dummy_code, &dummy_code,
1254 &dummy_int, &dummy_vec))
1255 return NULL;
1256
1257 *type_out = get_vectype_for_scalar_type (vinfo, type);
1258 if (!*type_out)
1259 return NULL;
1260
1261 tree oprnd[2];
1262 vect_convert_inputs (vinfo, last_stmt_info,
1263 2, oprnd, half_type, unprom, vectype);
1264
1265 tree var = vect_recog_temp_ssa_var (itype, NULL);
1266 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1267 oprnd[0], oprnd[1]);
1268
1269 if (vecctype != vecitype)
1270 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1271 pattern_stmt, vecitype);
1272
1273 return vect_convert_output (vinfo, last_stmt_info,
1274 type, pattern_stmt, vecctype);
1275 }
1276
1277 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1278 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1279
1280 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1281 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1282 tree *type_out)
1283 {
1284 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1285 MULT_EXPR, WIDEN_MULT_EXPR, false,
1286 "vect_recog_widen_mult_pattern");
1287 }
1288
1289 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1290 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1291
1292 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1293 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1294 tree *type_out)
1295 {
1296 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1297 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1298 "vect_recog_widen_plus_pattern");
1299 }
1300
1301 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1302 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1303 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1304 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1305 tree *type_out)
1306 {
1307 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1308 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1309 "vect_recog_widen_minus_pattern");
1310 }
1311
1312 /* Function vect_recog_pow_pattern
1313
1314 Try to find the following pattern:
1315
1316 x = POW (y, N);
1317
1318 with POW being one of pow, powf, powi, powif and N being
1319 either 2 or 0.5.
1320
1321 Input:
1322
1323 * STMT_VINFO: The stmt from which the pattern search begins.
1324
1325 Output:
1326
1327 * TYPE_OUT: The type of the output of this pattern.
1328
1329 * Return value: A new stmt that will be used to replace the sequence of
1330 stmts that constitute the pattern. In this case it will be:
1331 x = x * x
1332 or
1333 x = sqrt (x)
1334 */
1335
1336 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1337 vect_recog_pow_pattern (vec_info *vinfo,
1338 stmt_vec_info stmt_vinfo, tree *type_out)
1339 {
1340 gimple *last_stmt = stmt_vinfo->stmt;
1341 tree base, exp;
1342 gimple *stmt;
1343 tree var;
1344
1345 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1346 return NULL;
1347
1348 switch (gimple_call_combined_fn (last_stmt))
1349 {
1350 CASE_CFN_POW:
1351 CASE_CFN_POWI:
1352 break;
1353
1354 default:
1355 return NULL;
1356 }
1357
1358 base = gimple_call_arg (last_stmt, 0);
1359 exp = gimple_call_arg (last_stmt, 1);
1360 if (TREE_CODE (exp) != REAL_CST
1361 && TREE_CODE (exp) != INTEGER_CST)
1362 {
1363 if (flag_unsafe_math_optimizations
1364 && TREE_CODE (base) == REAL_CST
1365 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1366 {
1367 combined_fn log_cfn;
1368 built_in_function exp_bfn;
1369 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1370 {
1371 case BUILT_IN_POW:
1372 log_cfn = CFN_BUILT_IN_LOG;
1373 exp_bfn = BUILT_IN_EXP;
1374 break;
1375 case BUILT_IN_POWF:
1376 log_cfn = CFN_BUILT_IN_LOGF;
1377 exp_bfn = BUILT_IN_EXPF;
1378 break;
1379 case BUILT_IN_POWL:
1380 log_cfn = CFN_BUILT_IN_LOGL;
1381 exp_bfn = BUILT_IN_EXPL;
1382 break;
1383 default:
1384 return NULL;
1385 }
1386 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1387 tree exp_decl = builtin_decl_implicit (exp_bfn);
1388 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1389 does that, but if C is a power of 2, we want to use
1390 exp2 (log2 (C) * x) in the non-vectorized version, but for
1391 vectorization we don't have vectorized exp2. */
1392 if (logc
1393 && TREE_CODE (logc) == REAL_CST
1394 && exp_decl
1395 && lookup_attribute ("omp declare simd",
1396 DECL_ATTRIBUTES (exp_decl)))
1397 {
1398 cgraph_node *node = cgraph_node::get_create (exp_decl);
1399 if (node->simd_clones == NULL)
1400 {
1401 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1402 || node->definition)
1403 return NULL;
1404 expand_simd_clones (node);
1405 if (node->simd_clones == NULL)
1406 return NULL;
1407 }
1408 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1409 if (!*type_out)
1410 return NULL;
1411 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1412 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1413 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1414 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1415 g = gimple_build_call (exp_decl, 1, def);
1416 gimple_call_set_lhs (g, res);
1417 return g;
1418 }
1419 }
1420
1421 return NULL;
1422 }
1423
1424 /* We now have a pow or powi builtin function call with a constant
1425 exponent. */
1426
1427 /* Catch squaring. */
1428 if ((tree_fits_shwi_p (exp)
1429 && tree_to_shwi (exp) == 2)
1430 || (TREE_CODE (exp) == REAL_CST
1431 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1432 {
1433 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1434 TREE_TYPE (base), type_out))
1435 return NULL;
1436
1437 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1438 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1439 return stmt;
1440 }
1441
1442 /* Catch square root. */
1443 if (TREE_CODE (exp) == REAL_CST
1444 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1445 {
1446 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1447 if (*type_out
1448 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1449 OPTIMIZE_FOR_SPEED))
1450 {
1451 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1452 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1453 gimple_call_set_lhs (stmt, var);
1454 gimple_call_set_nothrow (stmt, true);
1455 return stmt;
1456 }
1457 }
1458
1459 return NULL;
1460 }
1461
1462
1463 /* Function vect_recog_widen_sum_pattern
1464
1465 Try to find the following pattern:
1466
1467 type x_t;
1468 TYPE x_T, sum = init;
1469 loop:
1470 sum_0 = phi <init, sum_1>
1471 S1 x_t = *p;
1472 S2 x_T = (TYPE) x_t;
1473 S3 sum_1 = x_T + sum_0;
1474
1475 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1476 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1477 a special case of a reduction computation.
1478
1479 Input:
1480
1481 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1482 when this function is called with S3, the pattern {S2,S3} will be detected.
1483
1484 Output:
1485
1486 * TYPE_OUT: The type of the output of this pattern.
1487
1488 * Return value: A new stmt that will be used to replace the sequence of
1489 stmts that constitute the pattern. In this case it will be:
1490 WIDEN_SUM <x_t, sum_0>
1491
1492 Note: The widening-sum idiom is a widening reduction pattern that is
1493 vectorized without preserving all the intermediate results. It
1494 produces only N/2 (widened) results (by summing up pairs of
1495 intermediate results) rather than all N results. Therefore, we
1496 cannot allow this pattern when we want to get all the results and in
1497 the correct order (as is the case when this computation is in an
1498 inner-loop nested in an outer-loop that us being vectorized). */
1499
1500 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1501 vect_recog_widen_sum_pattern (vec_info *vinfo,
1502 stmt_vec_info stmt_vinfo, tree *type_out)
1503 {
1504 gimple *last_stmt = stmt_vinfo->stmt;
1505 tree oprnd0, oprnd1;
1506 tree type;
1507 gimple *pattern_stmt;
1508 tree var;
1509
1510 /* Look for the following pattern
1511 DX = (TYPE) X;
1512 sum_1 = DX + sum_0;
1513 In which DX is at least double the size of X, and sum_1 has been
1514 recognized as a reduction variable.
1515 */
1516
1517 /* Starting from LAST_STMT, follow the defs of its uses in search
1518 of the above pattern. */
1519
1520 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1521 &oprnd0, &oprnd1))
1522 return NULL;
1523
1524 type = gimple_expr_type (last_stmt);
1525
1526 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1527 we know that oprnd1 is the reduction variable (defined by a loop-header
1528 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1529 Left to check that oprnd0 is defined by a cast from type 'type' to type
1530 'TYPE'. */
1531
1532 vect_unpromoted_value unprom0;
1533 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1534 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1535 return NULL;
1536
1537 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1538
1539 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1540 unprom0.type, type_out))
1541 return NULL;
1542
1543 var = vect_recog_temp_ssa_var (type, NULL);
1544 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1545
1546 return pattern_stmt;
1547 }
1548
1549 /* Recognize cases in which an operation is performed in one type WTYPE
1550 but could be done more efficiently in a narrower type NTYPE. For example,
1551 if we have:
1552
1553 ATYPE a; // narrower than NTYPE
1554 BTYPE b; // narrower than NTYPE
1555 WTYPE aw = (WTYPE) a;
1556 WTYPE bw = (WTYPE) b;
1557 WTYPE res = aw + bw; // only uses of aw and bw
1558
1559 then it would be more efficient to do:
1560
1561 NTYPE an = (NTYPE) a;
1562 NTYPE bn = (NTYPE) b;
1563 NTYPE resn = an + bn;
1564 WTYPE res = (WTYPE) resn;
1565
1566 Other situations include things like:
1567
1568 ATYPE a; // NTYPE or narrower
1569 WTYPE aw = (WTYPE) a;
1570 WTYPE res = aw + b;
1571
1572 when only "(NTYPE) res" is significant. In that case it's more efficient
1573 to truncate "b" and do the operation on NTYPE instead:
1574
1575 NTYPE an = (NTYPE) a;
1576 NTYPE bn = (NTYPE) b; // truncation
1577 NTYPE resn = an + bn;
1578 WTYPE res = (WTYPE) resn;
1579
1580 All users of "res" should then use "resn" instead, making the final
1581 statement dead (not marked as relevant). The final statement is still
1582 needed to maintain the type correctness of the IR.
1583
1584 vect_determine_precisions has already determined the minimum
1585 precison of the operation and the minimum precision required
1586 by users of the result. */
1587
1588 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1589 vect_recog_over_widening_pattern (vec_info *vinfo,
1590 stmt_vec_info last_stmt_info, tree *type_out)
1591 {
1592 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1593 if (!last_stmt)
1594 return NULL;
1595
1596 /* See whether we have found that this operation can be done on a
1597 narrower type without changing its semantics. */
1598 unsigned int new_precision = last_stmt_info->operation_precision;
1599 if (!new_precision)
1600 return NULL;
1601
1602 tree lhs = gimple_assign_lhs (last_stmt);
1603 tree type = TREE_TYPE (lhs);
1604 tree_code code = gimple_assign_rhs_code (last_stmt);
1605
1606 /* Punt for reductions where we don't handle the type conversions. */
1607 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1608 return NULL;
1609
1610 /* Keep the first operand of a COND_EXPR as-is: only the other two
1611 operands are interesting. */
1612 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1613
1614 /* Check the operands. */
1615 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1616 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1617 unprom.quick_grow (nops);
1618 unsigned int min_precision = 0;
1619 bool single_use_p = false;
1620 for (unsigned int i = 0; i < nops; ++i)
1621 {
1622 tree op = gimple_op (last_stmt, first_op + i);
1623 if (TREE_CODE (op) == INTEGER_CST)
1624 unprom[i].set_op (op, vect_constant_def);
1625 else if (TREE_CODE (op) == SSA_NAME)
1626 {
1627 bool op_single_use_p = true;
1628 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1629 &op_single_use_p))
1630 return NULL;
1631 /* If:
1632
1633 (1) N bits of the result are needed;
1634 (2) all inputs are widened from M<N bits; and
1635 (3) one operand OP is a single-use SSA name
1636
1637 we can shift the M->N widening from OP to the output
1638 without changing the number or type of extensions involved.
1639 This then reduces the number of copies of STMT_INFO.
1640
1641 If instead of (3) more than one operand is a single-use SSA name,
1642 shifting the extension to the output is even more of a win.
1643
1644 If instead:
1645
1646 (1) N bits of the result are needed;
1647 (2) one operand OP2 is widened from M2<N bits;
1648 (3) another operand OP1 is widened from M1<M2 bits; and
1649 (4) both OP1 and OP2 are single-use
1650
1651 the choice is between:
1652
1653 (a) truncating OP2 to M1, doing the operation on M1,
1654 and then widening the result to N
1655
1656 (b) widening OP1 to M2, doing the operation on M2, and then
1657 widening the result to N
1658
1659 Both shift the M2->N widening of the inputs to the output.
1660 (a) additionally shifts the M1->M2 widening to the output;
1661 it requires fewer copies of STMT_INFO but requires an extra
1662 M2->M1 truncation.
1663
1664 Which is better will depend on the complexity and cost of
1665 STMT_INFO, which is hard to predict at this stage. However,
1666 a clear tie-breaker in favor of (b) is the fact that the
1667 truncation in (a) increases the length of the operation chain.
1668
1669 If instead of (4) only one of OP1 or OP2 is single-use,
1670 (b) is still a win over doing the operation in N bits:
1671 it still shifts the M2->N widening on the single-use operand
1672 to the output and reduces the number of STMT_INFO copies.
1673
1674 If neither operand is single-use then operating on fewer than
1675 N bits might lead to more extensions overall. Whether it does
1676 or not depends on global information about the vectorization
1677 region, and whether that's a good trade-off would again
1678 depend on the complexity and cost of the statements involved,
1679 as well as things like register pressure that are not normally
1680 modelled at this stage. We therefore ignore these cases
1681 and just optimize the clear single-use wins above.
1682
1683 Thus we take the maximum precision of the unpromoted operands
1684 and record whether any operand is single-use. */
1685 if (unprom[i].dt == vect_internal_def)
1686 {
1687 min_precision = MAX (min_precision,
1688 TYPE_PRECISION (unprom[i].type));
1689 single_use_p |= op_single_use_p;
1690 }
1691 }
1692 else
1693 return NULL;
1694 }
1695
1696 /* Although the operation could be done in operation_precision, we have
1697 to balance that against introducing extra truncations or extensions.
1698 Calculate the minimum precision that can be handled efficiently.
1699
1700 The loop above determined that the operation could be handled
1701 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1702 extension from the inputs to the output without introducing more
1703 instructions, and would reduce the number of instructions required
1704 for STMT_INFO itself.
1705
1706 vect_determine_precisions has also determined that the result only
1707 needs min_output_precision bits. Truncating by a factor of N times
1708 requires a tree of N - 1 instructions, so if TYPE is N times wider
1709 than min_output_precision, doing the operation in TYPE and truncating
1710 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1711 In contrast:
1712
1713 - truncating the input to a unary operation and doing the operation
1714 in the new type requires at most N - 1 + 1 = N instructions per
1715 output vector
1716
1717 - doing the same for a binary operation requires at most
1718 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1719
1720 Both unary and binary operations require fewer instructions than
1721 this if the operands were extended from a suitable truncated form.
1722 Thus there is usually nothing to lose by doing operations in
1723 min_output_precision bits, but there can be something to gain. */
1724 if (!single_use_p)
1725 min_precision = last_stmt_info->min_output_precision;
1726 else
1727 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1728
1729 /* Apply the minimum efficient precision we just calculated. */
1730 if (new_precision < min_precision)
1731 new_precision = min_precision;
1732 new_precision = vect_element_precision (new_precision);
1733 if (new_precision >= TYPE_PRECISION (type))
1734 return NULL;
1735
1736 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1737
1738 *type_out = get_vectype_for_scalar_type (vinfo, type);
1739 if (!*type_out)
1740 return NULL;
1741
1742 /* We've found a viable pattern. Get the new type of the operation. */
1743 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1744 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1745
1746 /* If we're truncating an operation, we need to make sure that we
1747 don't introduce new undefined overflow. The codes tested here are
1748 a subset of those accepted by vect_truncatable_operation_p. */
1749 tree op_type = new_type;
1750 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1751 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1752 op_type = build_nonstandard_integer_type (new_precision, true);
1753
1754 /* We specifically don't check here whether the target supports the
1755 new operation, since it might be something that a later pattern
1756 wants to rewrite anyway. If targets have a minimum element size
1757 for some optabs, we should pattern-match smaller ops to larger ops
1758 where beneficial. */
1759 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1760 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1761 if (!new_vectype || !op_vectype)
1762 return NULL;
1763
1764 if (dump_enabled_p ())
1765 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1766 type, new_type);
1767
1768 /* Calculate the rhs operands for an operation on OP_TYPE. */
1769 tree ops[3] = {};
1770 for (unsigned int i = 1; i < first_op; ++i)
1771 ops[i - 1] = gimple_op (last_stmt, i);
1772 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1773 op_type, &unprom[0], op_vectype);
1774
1775 /* Use the operation to produce a result of type OP_TYPE. */
1776 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1777 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1778 ops[0], ops[1], ops[2]);
1779 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1780
1781 if (dump_enabled_p ())
1782 dump_printf_loc (MSG_NOTE, vect_location,
1783 "created pattern stmt: %G", pattern_stmt);
1784
1785 /* Convert back to the original signedness, if OP_TYPE is different
1786 from NEW_TYPE. */
1787 if (op_type != new_type)
1788 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1789 pattern_stmt, op_vectype);
1790
1791 /* Promote the result to the original type. */
1792 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1793 pattern_stmt, new_vectype);
1794
1795 return pattern_stmt;
1796 }
1797
1798 /* Recognize the following patterns:
1799
1800 ATYPE a; // narrower than TYPE
1801 BTYPE b; // narrower than TYPE
1802
1803 1) Multiply high with scaling
1804 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1805 2) ... or also with rounding
1806 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1807
1808 where only the bottom half of res is used. */
1809
1810 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1811 vect_recog_mulhs_pattern (vec_info *vinfo,
1812 stmt_vec_info last_stmt_info, tree *type_out)
1813 {
1814 /* Check for a right shift. */
1815 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1816 if (!last_stmt
1817 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1818 return NULL;
1819
1820 /* Check that the shift result is wider than the users of the
1821 result need (i.e. that narrowing would be a natural choice). */
1822 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1823 unsigned int target_precision
1824 = vect_element_precision (last_stmt_info->min_output_precision);
1825 if (!INTEGRAL_TYPE_P (lhs_type)
1826 || target_precision >= TYPE_PRECISION (lhs_type))
1827 return NULL;
1828
1829 /* Look through any change in sign on the outer shift input. */
1830 vect_unpromoted_value unprom_rshift_input;
1831 tree rshift_input = vect_look_through_possible_promotion
1832 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1833 if (!rshift_input
1834 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1835 != TYPE_PRECISION (lhs_type))
1836 return NULL;
1837
1838 /* Get the definition of the shift input. */
1839 stmt_vec_info rshift_input_stmt_info
1840 = vect_get_internal_def (vinfo, rshift_input);
1841 if (!rshift_input_stmt_info)
1842 return NULL;
1843 gassign *rshift_input_stmt
1844 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1845 if (!rshift_input_stmt)
1846 return NULL;
1847
1848 stmt_vec_info mulh_stmt_info;
1849 tree scale_term;
1850 internal_fn ifn;
1851 unsigned int expect_offset;
1852
1853 /* Check for the presence of the rounding term. */
1854 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1855 {
1856 /* Check that the outer shift was by 1. */
1857 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1858 return NULL;
1859
1860 /* Check that the second operand of the PLUS_EXPR is 1. */
1861 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1862 return NULL;
1863
1864 /* Look through any change in sign on the addition input. */
1865 vect_unpromoted_value unprom_plus_input;
1866 tree plus_input = vect_look_through_possible_promotion
1867 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1868 if (!plus_input
1869 || TYPE_PRECISION (TREE_TYPE (plus_input))
1870 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1871 return NULL;
1872
1873 /* Get the definition of the multiply-high-scale part. */
1874 stmt_vec_info plus_input_stmt_info
1875 = vect_get_internal_def (vinfo, plus_input);
1876 if (!plus_input_stmt_info)
1877 return NULL;
1878 gassign *plus_input_stmt
1879 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1880 if (!plus_input_stmt
1881 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1882 return NULL;
1883
1884 /* Look through any change in sign on the scaling input. */
1885 vect_unpromoted_value unprom_scale_input;
1886 tree scale_input = vect_look_through_possible_promotion
1887 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1888 if (!scale_input
1889 || TYPE_PRECISION (TREE_TYPE (scale_input))
1890 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1891 return NULL;
1892
1893 /* Get the definition of the multiply-high part. */
1894 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1895 if (!mulh_stmt_info)
1896 return NULL;
1897
1898 /* Get the scaling term. */
1899 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1900
1901 expect_offset = target_precision + 2;
1902 ifn = IFN_MULHRS;
1903 }
1904 else
1905 {
1906 mulh_stmt_info = rshift_input_stmt_info;
1907 scale_term = gimple_assign_rhs2 (last_stmt);
1908
1909 expect_offset = target_precision + 1;
1910 ifn = IFN_MULHS;
1911 }
1912
1913 /* Check that the scaling factor is correct. */
1914 if (TREE_CODE (scale_term) != INTEGER_CST
1915 || wi::to_widest (scale_term) + expect_offset
1916 != TYPE_PRECISION (lhs_type))
1917 return NULL;
1918
1919 /* Check whether the scaling input term can be seen as two widened
1920 inputs multiplied together. */
1921 vect_unpromoted_value unprom_mult[2];
1922 tree new_type;
1923 unsigned int nops
1924 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1925 false, 2, unprom_mult, &new_type);
1926 if (nops != 2)
1927 return NULL;
1928
1929 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1930
1931 /* Adjust output precision. */
1932 if (TYPE_PRECISION (new_type) < target_precision)
1933 new_type = build_nonstandard_integer_type
1934 (target_precision, TYPE_UNSIGNED (new_type));
1935
1936 /* Check for target support. */
1937 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1938 if (!new_vectype
1939 || !direct_internal_fn_supported_p
1940 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1941 return NULL;
1942
1943 /* The IR requires a valid vector type for the cast result, even though
1944 it's likely to be discarded. */
1945 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1946 if (!*type_out)
1947 return NULL;
1948
1949 /* Generate the IFN_MULHRS call. */
1950 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1951 tree new_ops[2];
1952 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1953 unprom_mult, new_vectype);
1954 gcall *mulhrs_stmt
1955 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1956 gimple_call_set_lhs (mulhrs_stmt, new_var);
1957 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1958
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_NOTE, vect_location,
1961 "created pattern stmt: %G", mulhrs_stmt);
1962
1963 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1964 mulhrs_stmt, new_vectype);
1965 }
1966
1967 /* Recognize the patterns:
1968
1969 ATYPE a; // narrower than TYPE
1970 BTYPE b; // narrower than TYPE
1971 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1972 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1973
1974 where only the bottom half of avg is used. Try to transform them into:
1975
1976 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1977 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1978
1979 followed by:
1980
1981 TYPE avg = (TYPE) avg';
1982
1983 where NTYPE is no wider than half of TYPE. Since only the bottom half
1984 of avg is used, all or part of the cast of avg' should become redundant.
1985
1986 If there is no target support available, generate code to distribute rshift
1987 over plus and add a carry. */
1988
1989 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1990 vect_recog_average_pattern (vec_info *vinfo,
1991 stmt_vec_info last_stmt_info, tree *type_out)
1992 {
1993 /* Check for a shift right by one bit. */
1994 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1995 if (!last_stmt
1996 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1997 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1998 return NULL;
1999
2000 /* Check that the shift result is wider than the users of the
2001 result need (i.e. that narrowing would be a natural choice). */
2002 tree lhs = gimple_assign_lhs (last_stmt);
2003 tree type = TREE_TYPE (lhs);
2004 unsigned int target_precision
2005 = vect_element_precision (last_stmt_info->min_output_precision);
2006 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2007 return NULL;
2008
2009 /* Look through any change in sign on the shift input. */
2010 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2011 vect_unpromoted_value unprom_plus;
2012 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2013 &unprom_plus);
2014 if (!rshift_rhs
2015 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2016 return NULL;
2017
2018 /* Get the definition of the shift input. */
2019 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2020 if (!plus_stmt_info)
2021 return NULL;
2022
2023 /* Check whether the shift input can be seen as a tree of additions on
2024 2 or 3 widened inputs.
2025
2026 Note that the pattern should be a win even if the result of one or
2027 more additions is reused elsewhere: if the pattern matches, we'd be
2028 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2029 internal_fn ifn = IFN_AVG_FLOOR;
2030 vect_unpromoted_value unprom[3];
2031 tree new_type;
2032 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2033 WIDEN_PLUS_EXPR, false, 3,
2034 unprom, &new_type);
2035 if (nops == 0)
2036 return NULL;
2037 if (nops == 3)
2038 {
2039 /* Check that one operand is 1. */
2040 unsigned int i;
2041 for (i = 0; i < 3; ++i)
2042 if (integer_onep (unprom[i].op))
2043 break;
2044 if (i == 3)
2045 return NULL;
2046 /* Throw away the 1 operand and keep the other two. */
2047 if (i < 2)
2048 unprom[i] = unprom[2];
2049 ifn = IFN_AVG_CEIL;
2050 }
2051
2052 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2053
2054 /* We know that:
2055
2056 (a) the operation can be viewed as:
2057
2058 TYPE widened0 = (TYPE) UNPROM[0];
2059 TYPE widened1 = (TYPE) UNPROM[1];
2060 TYPE tmp1 = widened0 + widened1 {+ 1};
2061 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2062
2063 (b) the first two statements are equivalent to:
2064
2065 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2066 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2067
2068 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2069 where sensible;
2070
2071 (d) all the operations can be performed correctly at twice the width of
2072 NEW_TYPE, due to the nature of the average operation; and
2073
2074 (e) users of the result of the right shift need only TARGET_PRECISION
2075 bits, where TARGET_PRECISION is no more than half of TYPE's
2076 precision.
2077
2078 Under these circumstances, the only situation in which NEW_TYPE
2079 could be narrower than TARGET_PRECISION is if widened0, widened1
2080 and an addition result are all used more than once. Thus we can
2081 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2082 as "free", whereas widening the result of the average instruction
2083 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2084 therefore better not to go narrower than TARGET_PRECISION. */
2085 if (TYPE_PRECISION (new_type) < target_precision)
2086 new_type = build_nonstandard_integer_type (target_precision,
2087 TYPE_UNSIGNED (new_type));
2088
2089 /* Check for target support. */
2090 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2091 if (!new_vectype)
2092 return NULL;
2093
2094 bool fallback_p = false;
2095
2096 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2097 ;
2098 else if (TYPE_UNSIGNED (new_type)
2099 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2100 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2101 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2102 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2103 fallback_p = true;
2104 else
2105 return NULL;
2106
2107 /* The IR requires a valid vector type for the cast result, even though
2108 it's likely to be discarded. */
2109 *type_out = get_vectype_for_scalar_type (vinfo, type);
2110 if (!*type_out)
2111 return NULL;
2112
2113 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2114 tree new_ops[2];
2115 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2116 unprom, new_vectype);
2117
2118 if (fallback_p)
2119 {
2120 /* As a fallback, generate code for following sequence:
2121
2122 shifted_op0 = new_ops[0] >> 1;
2123 shifted_op1 = new_ops[1] >> 1;
2124 sum_of_shifted = shifted_op0 + shifted_op1;
2125 unmasked_carry = new_ops[0] and/or new_ops[1];
2126 carry = unmasked_carry & 1;
2127 new_var = sum_of_shifted + carry;
2128 */
2129
2130 tree one_cst = build_one_cst (new_type);
2131 gassign *g;
2132
2133 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2134 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2135 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2136
2137 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2138 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2139 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2140
2141 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2142 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2143 shifted_op0, shifted_op1);
2144 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2145
2146 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2147 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2148 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2149 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2150
2151 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2152 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2153 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2154
2155 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2156 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2157 }
2158
2159 /* Generate the IFN_AVG* call. */
2160 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2161 new_ops[1]);
2162 gimple_call_set_lhs (average_stmt, new_var);
2163 gimple_set_location (average_stmt, gimple_location (last_stmt));
2164
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_NOTE, vect_location,
2167 "created pattern stmt: %G", average_stmt);
2168
2169 return vect_convert_output (vinfo, last_stmt_info,
2170 type, average_stmt, new_vectype);
2171 }
2172
2173 /* Recognize cases in which the input to a cast is wider than its
2174 output, and the input is fed by a widening operation. Fold this
2175 by removing the unnecessary intermediate widening. E.g.:
2176
2177 unsigned char a;
2178 unsigned int b = (unsigned int) a;
2179 unsigned short c = (unsigned short) b;
2180
2181 -->
2182
2183 unsigned short c = (unsigned short) a;
2184
2185 Although this is rare in input IR, it is an expected side-effect
2186 of the over-widening pattern above.
2187
2188 This is beneficial also for integer-to-float conversions, if the
2189 widened integer has more bits than the float, and if the unwidened
2190 input doesn't. */
2191
2192 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2193 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2194 stmt_vec_info last_stmt_info, tree *type_out)
2195 {
2196 /* Check for a cast, including an integer-to-float conversion. */
2197 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2198 if (!last_stmt)
2199 return NULL;
2200 tree_code code = gimple_assign_rhs_code (last_stmt);
2201 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2202 return NULL;
2203
2204 /* Make sure that the rhs is a scalar with a natural bitsize. */
2205 tree lhs = gimple_assign_lhs (last_stmt);
2206 if (!lhs)
2207 return NULL;
2208 tree lhs_type = TREE_TYPE (lhs);
2209 scalar_mode lhs_mode;
2210 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2211 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2212 return NULL;
2213
2214 /* Check for a narrowing operation (from a vector point of view). */
2215 tree rhs = gimple_assign_rhs1 (last_stmt);
2216 tree rhs_type = TREE_TYPE (rhs);
2217 if (!INTEGRAL_TYPE_P (rhs_type)
2218 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2219 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2220 return NULL;
2221
2222 /* Try to find an unpromoted input. */
2223 vect_unpromoted_value unprom;
2224 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2225 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2226 return NULL;
2227
2228 /* If the bits above RHS_TYPE matter, make sure that they're the
2229 same when extending from UNPROM as they are when extending from RHS. */
2230 if (!INTEGRAL_TYPE_P (lhs_type)
2231 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2232 return NULL;
2233
2234 /* We can get the same result by casting UNPROM directly, to avoid
2235 the unnecessary widening and narrowing. */
2236 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2237
2238 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2239 if (!*type_out)
2240 return NULL;
2241
2242 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2243 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2244 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2245
2246 return pattern_stmt;
2247 }
2248
2249 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2250 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2251
2252 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2253 vect_recog_widen_shift_pattern (vec_info *vinfo,
2254 stmt_vec_info last_stmt_info, tree *type_out)
2255 {
2256 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2257 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2258 "vect_recog_widen_shift_pattern");
2259 }
2260
2261 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2262
2263 type a_t, b_t, c_t;
2264
2265 S0 a_t = b_t r<< c_t;
2266
2267 Input/Output:
2268
2269 * STMT_VINFO: The stmt from which the pattern search begins,
2270 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2271 with a sequence:
2272
2273 S1 d_t = -c_t;
2274 S2 e_t = d_t & (B - 1);
2275 S3 f_t = b_t << c_t;
2276 S4 g_t = b_t >> e_t;
2277 S0 a_t = f_t | g_t;
2278
2279 where B is element bitsize of type.
2280
2281 Output:
2282
2283 * TYPE_OUT: The type of the output of this pattern.
2284
2285 * Return value: A new stmt that will be used to replace the rotate
2286 S0 stmt. */
2287
2288 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2289 vect_recog_rotate_pattern (vec_info *vinfo,
2290 stmt_vec_info stmt_vinfo, tree *type_out)
2291 {
2292 gimple *last_stmt = stmt_vinfo->stmt;
2293 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2294 gimple *pattern_stmt, *def_stmt;
2295 enum tree_code rhs_code;
2296 enum vect_def_type dt;
2297 optab optab1, optab2;
2298 edge ext_def = NULL;
2299 bool bswap16_p = false;
2300
2301 if (is_gimple_assign (last_stmt))
2302 {
2303 rhs_code = gimple_assign_rhs_code (last_stmt);
2304 switch (rhs_code)
2305 {
2306 case LROTATE_EXPR:
2307 case RROTATE_EXPR:
2308 break;
2309 default:
2310 return NULL;
2311 }
2312
2313 lhs = gimple_assign_lhs (last_stmt);
2314 oprnd0 = gimple_assign_rhs1 (last_stmt);
2315 type = TREE_TYPE (oprnd0);
2316 oprnd1 = gimple_assign_rhs2 (last_stmt);
2317 }
2318 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2319 {
2320 /* __builtin_bswap16 (x) is another form of x r>> 8.
2321 The vectorizer has bswap support, but only if the argument isn't
2322 promoted. */
2323 lhs = gimple_call_lhs (last_stmt);
2324 oprnd0 = gimple_call_arg (last_stmt, 0);
2325 type = TREE_TYPE (oprnd0);
2326 if (!lhs
2327 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2328 || TYPE_PRECISION (type) <= 16
2329 || TREE_CODE (oprnd0) != SSA_NAME
2330 || BITS_PER_UNIT != 8
2331 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2332 return NULL;
2333
2334 stmt_vec_info def_stmt_info;
2335 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2336 return NULL;
2337
2338 if (dt != vect_internal_def)
2339 return NULL;
2340
2341 if (gimple_assign_cast_p (def_stmt))
2342 {
2343 def = gimple_assign_rhs1 (def_stmt);
2344 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2345 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2346 oprnd0 = def;
2347 }
2348
2349 type = TREE_TYPE (lhs);
2350 vectype = get_vectype_for_scalar_type (vinfo, type);
2351 if (vectype == NULL_TREE)
2352 return NULL;
2353
2354 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2355 {
2356 /* The encoding uses one stepped pattern for each byte in the
2357 16-bit word. */
2358 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2359 for (unsigned i = 0; i < 3; ++i)
2360 for (unsigned j = 0; j < 2; ++j)
2361 elts.quick_push ((i + 1) * 2 - j - 1);
2362
2363 vec_perm_indices indices (elts, 1,
2364 TYPE_VECTOR_SUBPARTS (char_vectype));
2365 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2366 {
2367 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2368 undo the argument promotion. */
2369 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2370 {
2371 def = vect_recog_temp_ssa_var (type, NULL);
2372 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2373 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2374 oprnd0 = def;
2375 }
2376
2377 /* Pattern detected. */
2378 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2379
2380 *type_out = vectype;
2381
2382 /* Pattern supported. Create a stmt to be used to replace the
2383 pattern, with the unpromoted argument. */
2384 var = vect_recog_temp_ssa_var (type, NULL);
2385 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2386 1, oprnd0);
2387 gimple_call_set_lhs (pattern_stmt, var);
2388 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2389 gimple_call_fntype (last_stmt));
2390 return pattern_stmt;
2391 }
2392 }
2393
2394 oprnd1 = build_int_cst (integer_type_node, 8);
2395 rhs_code = LROTATE_EXPR;
2396 bswap16_p = true;
2397 }
2398 else
2399 return NULL;
2400
2401 if (TREE_CODE (oprnd0) != SSA_NAME
2402 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2403 || !INTEGRAL_TYPE_P (type)
2404 || !TYPE_UNSIGNED (type))
2405 return NULL;
2406
2407 stmt_vec_info def_stmt_info;
2408 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2409 return NULL;
2410
2411 if (dt != vect_internal_def
2412 && dt != vect_constant_def
2413 && dt != vect_external_def)
2414 return NULL;
2415
2416 vectype = get_vectype_for_scalar_type (vinfo, type);
2417 if (vectype == NULL_TREE)
2418 return NULL;
2419
2420 /* If vector/vector or vector/scalar rotate is supported by the target,
2421 don't do anything here. */
2422 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2423 if (optab1
2424 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2425 {
2426 use_rotate:
2427 if (bswap16_p)
2428 {
2429 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2430 {
2431 def = vect_recog_temp_ssa_var (type, NULL);
2432 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2433 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2434 oprnd0 = def;
2435 }
2436
2437 /* Pattern detected. */
2438 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2439
2440 *type_out = vectype;
2441
2442 /* Pattern supported. Create a stmt to be used to replace the
2443 pattern. */
2444 var = vect_recog_temp_ssa_var (type, NULL);
2445 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2446 oprnd1);
2447 return pattern_stmt;
2448 }
2449 return NULL;
2450 }
2451
2452 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2453 {
2454 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2455 if (optab2
2456 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2457 goto use_rotate;
2458 }
2459
2460 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2461 don't do anything here either. */
2462 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2463 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2464 if (!optab1
2465 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2466 || !optab2
2467 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2468 {
2469 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2470 return NULL;
2471 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2472 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2473 if (!optab1
2474 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2475 || !optab2
2476 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2477 return NULL;
2478 }
2479
2480 *type_out = vectype;
2481
2482 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2483 {
2484 def = vect_recog_temp_ssa_var (type, NULL);
2485 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2486 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2487 oprnd0 = def;
2488 }
2489
2490 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2491 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2492
2493 def = NULL_TREE;
2494 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2495 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2496 def = oprnd1;
2497 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2498 {
2499 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2500 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2501 && TYPE_PRECISION (TREE_TYPE (rhs1))
2502 == TYPE_PRECISION (type))
2503 def = rhs1;
2504 }
2505
2506 if (def == NULL_TREE)
2507 {
2508 def = vect_recog_temp_ssa_var (type, NULL);
2509 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2510 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2511 }
2512 stype = TREE_TYPE (def);
2513
2514 if (TREE_CODE (def) == INTEGER_CST)
2515 {
2516 if (!tree_fits_uhwi_p (def)
2517 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2518 || integer_zerop (def))
2519 return NULL;
2520 def2 = build_int_cst (stype,
2521 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2522 }
2523 else
2524 {
2525 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2526
2527 if (vecstype == NULL_TREE)
2528 return NULL;
2529 def2 = vect_recog_temp_ssa_var (stype, NULL);
2530 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2531 if (ext_def)
2532 {
2533 basic_block new_bb
2534 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2535 gcc_assert (!new_bb);
2536 }
2537 else
2538 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2539
2540 def2 = vect_recog_temp_ssa_var (stype, NULL);
2541 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2542 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2543 gimple_assign_lhs (def_stmt), mask);
2544 if (ext_def)
2545 {
2546 basic_block new_bb
2547 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2548 gcc_assert (!new_bb);
2549 }
2550 else
2551 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2552 }
2553
2554 var1 = vect_recog_temp_ssa_var (type, NULL);
2555 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2556 ? LSHIFT_EXPR : RSHIFT_EXPR,
2557 oprnd0, def);
2558 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2559
2560 var2 = vect_recog_temp_ssa_var (type, NULL);
2561 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2562 ? RSHIFT_EXPR : LSHIFT_EXPR,
2563 oprnd0, def2);
2564 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2565
2566 /* Pattern detected. */
2567 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2568
2569 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2570 var = vect_recog_temp_ssa_var (type, NULL);
2571 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2572
2573 return pattern_stmt;
2574 }
2575
2576 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2577 vectorized:
2578
2579 type a_t;
2580 TYPE b_T, res_T;
2581
2582 S1 a_t = ;
2583 S2 b_T = ;
2584 S3 res_T = b_T op a_t;
2585
2586 where type 'TYPE' is a type with different size than 'type',
2587 and op is <<, >> or rotate.
2588
2589 Also detect cases:
2590
2591 type a_t;
2592 TYPE b_T, c_T, res_T;
2593
2594 S0 c_T = ;
2595 S1 a_t = (type) c_T;
2596 S2 b_T = ;
2597 S3 res_T = b_T op a_t;
2598
2599 Input/Output:
2600
2601 * STMT_VINFO: The stmt from which the pattern search begins,
2602 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2603 with a shift/rotate which has same type on both operands, in the
2604 second case just b_T op c_T, in the first case with added cast
2605 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2606
2607 Output:
2608
2609 * TYPE_OUT: The type of the output of this pattern.
2610
2611 * Return value: A new stmt that will be used to replace the shift/rotate
2612 S3 stmt. */
2613
2614 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2615 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2616 stmt_vec_info stmt_vinfo,
2617 tree *type_out)
2618 {
2619 gimple *last_stmt = stmt_vinfo->stmt;
2620 tree oprnd0, oprnd1, lhs, var;
2621 gimple *pattern_stmt;
2622 enum tree_code rhs_code;
2623
2624 if (!is_gimple_assign (last_stmt))
2625 return NULL;
2626
2627 rhs_code = gimple_assign_rhs_code (last_stmt);
2628 switch (rhs_code)
2629 {
2630 case LSHIFT_EXPR:
2631 case RSHIFT_EXPR:
2632 case LROTATE_EXPR:
2633 case RROTATE_EXPR:
2634 break;
2635 default:
2636 return NULL;
2637 }
2638
2639 lhs = gimple_assign_lhs (last_stmt);
2640 oprnd0 = gimple_assign_rhs1 (last_stmt);
2641 oprnd1 = gimple_assign_rhs2 (last_stmt);
2642 if (TREE_CODE (oprnd0) != SSA_NAME
2643 || TREE_CODE (oprnd1) != SSA_NAME
2644 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2645 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2646 || TYPE_PRECISION (TREE_TYPE (lhs))
2647 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2648 return NULL;
2649
2650 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2651 if (!def_vinfo)
2652 return NULL;
2653
2654 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2655 if (*type_out == NULL_TREE)
2656 return NULL;
2657
2658 tree def = NULL_TREE;
2659 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2660 if (def_stmt && gimple_assign_cast_p (def_stmt))
2661 {
2662 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2663 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2664 && TYPE_PRECISION (TREE_TYPE (rhs1))
2665 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2666 {
2667 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2668 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2669 def = rhs1;
2670 else
2671 {
2672 tree mask
2673 = build_low_bits_mask (TREE_TYPE (rhs1),
2674 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2675 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2676 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2677 tree vecstype = get_vectype_for_scalar_type (vinfo,
2678 TREE_TYPE (rhs1));
2679 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2680 }
2681 }
2682 }
2683
2684 if (def == NULL_TREE)
2685 {
2686 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2687 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2688 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2689 }
2690
2691 /* Pattern detected. */
2692 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2693
2694 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2695 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2696 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2697
2698 return pattern_stmt;
2699 }
2700
2701 /* Return true iff the target has a vector optab implementing the operation
2702 CODE on type VECTYPE. */
2703
2704 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2705 target_has_vecop_for_code (tree_code code, tree vectype)
2706 {
2707 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2708 return voptab
2709 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2710 }
2711
2712 /* Verify that the target has optabs of VECTYPE to perform all the steps
2713 needed by the multiplication-by-immediate synthesis algorithm described by
2714 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2715 present. Return true iff the target supports all the steps. */
2716
2717 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2718 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2719 tree vectype, bool synth_shift_p)
2720 {
2721 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2722 return false;
2723
2724 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2725 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2726
2727 if (var == negate_variant
2728 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2729 return false;
2730
2731 /* If we must synthesize shifts with additions make sure that vector
2732 addition is available. */
2733 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2734 return false;
2735
2736 for (int i = 1; i < alg->ops; i++)
2737 {
2738 switch (alg->op[i])
2739 {
2740 case alg_shift:
2741 break;
2742 case alg_add_t_m2:
2743 case alg_add_t2_m:
2744 case alg_add_factor:
2745 if (!supports_vplus)
2746 return false;
2747 break;
2748 case alg_sub_t_m2:
2749 case alg_sub_t2_m:
2750 case alg_sub_factor:
2751 if (!supports_vminus)
2752 return false;
2753 break;
2754 case alg_unknown:
2755 case alg_m:
2756 case alg_zero:
2757 case alg_impossible:
2758 return false;
2759 default:
2760 gcc_unreachable ();
2761 }
2762 }
2763
2764 return true;
2765 }
2766
2767 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2768 putting the final result in DEST. Append all statements but the last into
2769 VINFO. Return the last statement. */
2770
2771 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)2772 synth_lshift_by_additions (vec_info *vinfo,
2773 tree dest, tree op, HOST_WIDE_INT amnt,
2774 stmt_vec_info stmt_info)
2775 {
2776 HOST_WIDE_INT i;
2777 tree itype = TREE_TYPE (op);
2778 tree prev_res = op;
2779 gcc_assert (amnt >= 0);
2780 for (i = 0; i < amnt; i++)
2781 {
2782 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2783 : dest;
2784 gimple *stmt
2785 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2786 prev_res = tmp_var;
2787 if (i < amnt - 1)
2788 append_pattern_def_seq (vinfo, stmt_info, stmt);
2789 else
2790 return stmt;
2791 }
2792 gcc_unreachable ();
2793 return NULL;
2794 }
2795
2796 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2797 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2798 the process if necessary. Append the resulting assignment statements
2799 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2800 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2801 left shifts using additions. */
2802
2803 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2804 apply_binop_and_append_stmt (vec_info *vinfo,
2805 tree_code code, tree op1, tree op2,
2806 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2807 {
2808 if (integer_zerop (op2)
2809 && (code == LSHIFT_EXPR
2810 || code == PLUS_EXPR))
2811 {
2812 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2813 return op1;
2814 }
2815
2816 gimple *stmt;
2817 tree itype = TREE_TYPE (op1);
2818 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2819
2820 if (code == LSHIFT_EXPR
2821 && synth_shift_p)
2822 {
2823 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2824 TREE_INT_CST_LOW (op2), stmt_vinfo);
2825 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2826 return tmp_var;
2827 }
2828
2829 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2830 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2831 return tmp_var;
2832 }
2833
2834 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2835 and simple arithmetic operations to be vectorized. Record the statements
2836 produced in STMT_VINFO and return the last statement in the sequence or
2837 NULL if it's not possible to synthesize such a multiplication.
2838 This function mirrors the behavior of expand_mult_const in expmed.c but
2839 works on tree-ssa form. */
2840
2841 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)2842 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2843 stmt_vec_info stmt_vinfo)
2844 {
2845 tree itype = TREE_TYPE (op);
2846 machine_mode mode = TYPE_MODE (itype);
2847 struct algorithm alg;
2848 mult_variant variant;
2849 if (!tree_fits_shwi_p (val))
2850 return NULL;
2851
2852 /* Multiplication synthesis by shifts, adds and subs can introduce
2853 signed overflow where the original operation didn't. Perform the
2854 operations on an unsigned type and cast back to avoid this.
2855 In the future we may want to relax this for synthesis algorithms
2856 that we can prove do not cause unexpected overflow. */
2857 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2858
2859 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2860
2861 /* Targets that don't support vector shifts but support vector additions
2862 can synthesize shifts that way. */
2863 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2864
2865 HOST_WIDE_INT hwval = tree_to_shwi (val);
2866 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2867 The vectorizer's benefit analysis will decide whether it's beneficial
2868 to do this. */
2869 bool possible = choose_mult_variant (mode, hwval, &alg,
2870 &variant, MAX_COST);
2871 if (!possible)
2872 return NULL;
2873
2874 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2875
2876 if (!vectype
2877 || !target_supports_mult_synth_alg (&alg, variant,
2878 vectype, synth_shift_p))
2879 return NULL;
2880
2881 tree accumulator;
2882
2883 /* Clear out the sequence of statements so we can populate it below. */
2884 gimple *stmt = NULL;
2885
2886 if (cast_to_unsigned_p)
2887 {
2888 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2889 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2890 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2891 op = tmp_op;
2892 }
2893
2894 if (alg.op[0] == alg_zero)
2895 accumulator = build_int_cst (multtype, 0);
2896 else
2897 accumulator = op;
2898
2899 bool needs_fixup = (variant == negate_variant)
2900 || (variant == add_variant);
2901
2902 for (int i = 1; i < alg.ops; i++)
2903 {
2904 tree shft_log = build_int_cst (multtype, alg.log[i]);
2905 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2906 tree tmp_var = NULL_TREE;
2907
2908 switch (alg.op[i])
2909 {
2910 case alg_shift:
2911 if (synth_shift_p)
2912 stmt
2913 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2914 alg.log[i], stmt_vinfo);
2915 else
2916 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2917 shft_log);
2918 break;
2919 case alg_add_t_m2:
2920 tmp_var
2921 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2922 stmt_vinfo, synth_shift_p);
2923 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2924 tmp_var);
2925 break;
2926 case alg_sub_t_m2:
2927 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2928 shft_log, stmt_vinfo,
2929 synth_shift_p);
2930 /* In some algorithms the first step involves zeroing the
2931 accumulator. If subtracting from such an accumulator
2932 just emit the negation directly. */
2933 if (integer_zerop (accumulator))
2934 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2935 else
2936 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2937 tmp_var);
2938 break;
2939 case alg_add_t2_m:
2940 tmp_var
2941 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2942 shft_log, stmt_vinfo, synth_shift_p);
2943 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2944 break;
2945 case alg_sub_t2_m:
2946 tmp_var
2947 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2948 shft_log, stmt_vinfo, synth_shift_p);
2949 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2950 break;
2951 case alg_add_factor:
2952 tmp_var
2953 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2954 shft_log, stmt_vinfo, synth_shift_p);
2955 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2956 tmp_var);
2957 break;
2958 case alg_sub_factor:
2959 tmp_var
2960 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2961 shft_log, stmt_vinfo, synth_shift_p);
2962 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2963 accumulator);
2964 break;
2965 default:
2966 gcc_unreachable ();
2967 }
2968 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2969 but rather return it directly. */
2970
2971 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2972 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2973 accumulator = accum_tmp;
2974 }
2975 if (variant == negate_variant)
2976 {
2977 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2978 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2979 accumulator = accum_tmp;
2980 if (cast_to_unsigned_p)
2981 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2982 }
2983 else if (variant == add_variant)
2984 {
2985 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2986 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2987 accumulator = accum_tmp;
2988 if (cast_to_unsigned_p)
2989 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2990 }
2991 /* Move back to a signed if needed. */
2992 if (cast_to_unsigned_p)
2993 {
2994 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2995 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2996 }
2997
2998 return stmt;
2999 }
3000
3001 /* Detect multiplication by constant and convert it into a sequence of
3002 shifts and additions, subtractions, negations. We reuse the
3003 choose_mult_variant algorithms from expmed.c
3004
3005 Input/Output:
3006
3007 STMT_VINFO: The stmt from which the pattern search begins,
3008 i.e. the mult stmt.
3009
3010 Output:
3011
3012 * TYPE_OUT: The type of the output of this pattern.
3013
3014 * Return value: A new stmt that will be used to replace
3015 the multiplication. */
3016
3017 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3018 vect_recog_mult_pattern (vec_info *vinfo,
3019 stmt_vec_info stmt_vinfo, tree *type_out)
3020 {
3021 gimple *last_stmt = stmt_vinfo->stmt;
3022 tree oprnd0, oprnd1, vectype, itype;
3023 gimple *pattern_stmt;
3024
3025 if (!is_gimple_assign (last_stmt))
3026 return NULL;
3027
3028 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3029 return NULL;
3030
3031 oprnd0 = gimple_assign_rhs1 (last_stmt);
3032 oprnd1 = gimple_assign_rhs2 (last_stmt);
3033 itype = TREE_TYPE (oprnd0);
3034
3035 if (TREE_CODE (oprnd0) != SSA_NAME
3036 || TREE_CODE (oprnd1) != INTEGER_CST
3037 || !INTEGRAL_TYPE_P (itype)
3038 || !type_has_mode_precision_p (itype))
3039 return NULL;
3040
3041 vectype = get_vectype_for_scalar_type (vinfo, itype);
3042 if (vectype == NULL_TREE)
3043 return NULL;
3044
3045 /* If the target can handle vectorized multiplication natively,
3046 don't attempt to optimize this. */
3047 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3048 if (mul_optab != unknown_optab)
3049 {
3050 machine_mode vec_mode = TYPE_MODE (vectype);
3051 int icode = (int) optab_handler (mul_optab, vec_mode);
3052 if (icode != CODE_FOR_nothing)
3053 return NULL;
3054 }
3055
3056 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3057 oprnd0, oprnd1, stmt_vinfo);
3058 if (!pattern_stmt)
3059 return NULL;
3060
3061 /* Pattern detected. */
3062 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3063
3064 *type_out = vectype;
3065
3066 return pattern_stmt;
3067 }
3068
3069 /* Detect a signed division by a constant that wouldn't be
3070 otherwise vectorized:
3071
3072 type a_t, b_t;
3073
3074 S1 a_t = b_t / N;
3075
3076 where type 'type' is an integral type and N is a constant.
3077
3078 Similarly handle modulo by a constant:
3079
3080 S4 a_t = b_t % N;
3081
3082 Input/Output:
3083
3084 * STMT_VINFO: The stmt from which the pattern search begins,
3085 i.e. the division stmt. S1 is replaced by if N is a power
3086 of two constant and type is signed:
3087 S3 y_t = b_t < 0 ? N - 1 : 0;
3088 S2 x_t = b_t + y_t;
3089 S1' a_t = x_t >> log2 (N);
3090
3091 S4 is replaced if N is a power of two constant and
3092 type is signed by (where *_T temporaries have unsigned type):
3093 S9 y_T = b_t < 0 ? -1U : 0U;
3094 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3095 S7 z_t = (type) z_T;
3096 S6 w_t = b_t + z_t;
3097 S5 x_t = w_t & (N - 1);
3098 S4' a_t = x_t - z_t;
3099
3100 Output:
3101
3102 * TYPE_OUT: The type of the output of this pattern.
3103
3104 * Return value: A new stmt that will be used to replace the division
3105 S1 or modulo S4 stmt. */
3106
3107 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3108 vect_recog_divmod_pattern (vec_info *vinfo,
3109 stmt_vec_info stmt_vinfo, tree *type_out)
3110 {
3111 gimple *last_stmt = stmt_vinfo->stmt;
3112 tree oprnd0, oprnd1, vectype, itype, cond;
3113 gimple *pattern_stmt, *def_stmt;
3114 enum tree_code rhs_code;
3115 optab optab;
3116 tree q;
3117 int dummy_int, prec;
3118
3119 if (!is_gimple_assign (last_stmt))
3120 return NULL;
3121
3122 rhs_code = gimple_assign_rhs_code (last_stmt);
3123 switch (rhs_code)
3124 {
3125 case TRUNC_DIV_EXPR:
3126 case EXACT_DIV_EXPR:
3127 case TRUNC_MOD_EXPR:
3128 break;
3129 default:
3130 return NULL;
3131 }
3132
3133 oprnd0 = gimple_assign_rhs1 (last_stmt);
3134 oprnd1 = gimple_assign_rhs2 (last_stmt);
3135 itype = TREE_TYPE (oprnd0);
3136 if (TREE_CODE (oprnd0) != SSA_NAME
3137 || TREE_CODE (oprnd1) != INTEGER_CST
3138 || TREE_CODE (itype) != INTEGER_TYPE
3139 || !type_has_mode_precision_p (itype))
3140 return NULL;
3141
3142 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3143 vectype = get_vectype_for_scalar_type (vinfo, itype);
3144 if (vectype == NULL_TREE)
3145 return NULL;
3146
3147 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3148 {
3149 /* If the target can handle vectorized division or modulo natively,
3150 don't attempt to optimize this, since native division is likely
3151 to give smaller code. */
3152 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3153 if (optab != unknown_optab)
3154 {
3155 machine_mode vec_mode = TYPE_MODE (vectype);
3156 int icode = (int) optab_handler (optab, vec_mode);
3157 if (icode != CODE_FOR_nothing)
3158 return NULL;
3159 }
3160 }
3161
3162 prec = TYPE_PRECISION (itype);
3163 if (integer_pow2p (oprnd1))
3164 {
3165 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3166 return NULL;
3167
3168 /* Pattern detected. */
3169 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3170
3171 *type_out = vectype;
3172
3173 /* Check if the target supports this internal function. */
3174 internal_fn ifn = IFN_DIV_POW2;
3175 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3176 {
3177 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3178
3179 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3180 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3181 gimple_call_set_lhs (div_stmt, var_div);
3182
3183 if (rhs_code == TRUNC_MOD_EXPR)
3184 {
3185 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3186 def_stmt
3187 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3188 LSHIFT_EXPR, var_div, shift);
3189 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3190 pattern_stmt
3191 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3192 MINUS_EXPR, oprnd0,
3193 gimple_assign_lhs (def_stmt));
3194 }
3195 else
3196 pattern_stmt = div_stmt;
3197 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3198
3199 return pattern_stmt;
3200 }
3201
3202 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3203 build_int_cst (itype, 0));
3204 if (rhs_code == TRUNC_DIV_EXPR
3205 || rhs_code == EXACT_DIV_EXPR)
3206 {
3207 tree var = vect_recog_temp_ssa_var (itype, NULL);
3208 tree shift;
3209 def_stmt
3210 = gimple_build_assign (var, COND_EXPR, cond,
3211 fold_build2 (MINUS_EXPR, itype, oprnd1,
3212 build_int_cst (itype, 1)),
3213 build_int_cst (itype, 0));
3214 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3215 var = vect_recog_temp_ssa_var (itype, NULL);
3216 def_stmt
3217 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3218 gimple_assign_lhs (def_stmt));
3219 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3220
3221 shift = build_int_cst (itype, tree_log2 (oprnd1));
3222 pattern_stmt
3223 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3224 RSHIFT_EXPR, var, shift);
3225 }
3226 else
3227 {
3228 tree signmask;
3229 if (compare_tree_int (oprnd1, 2) == 0)
3230 {
3231 signmask = vect_recog_temp_ssa_var (itype, NULL);
3232 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3233 build_int_cst (itype, 1),
3234 build_int_cst (itype, 0));
3235 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3236 }
3237 else
3238 {
3239 tree utype
3240 = build_nonstandard_integer_type (prec, 1);
3241 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3242 tree shift
3243 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3244 - tree_log2 (oprnd1));
3245 tree var = vect_recog_temp_ssa_var (utype, NULL);
3246
3247 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3248 build_int_cst (utype, -1),
3249 build_int_cst (utype, 0));
3250 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3251 var = vect_recog_temp_ssa_var (utype, NULL);
3252 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3253 gimple_assign_lhs (def_stmt),
3254 shift);
3255 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3256 signmask = vect_recog_temp_ssa_var (itype, NULL);
3257 def_stmt
3258 = gimple_build_assign (signmask, NOP_EXPR, var);
3259 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3260 }
3261 def_stmt
3262 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3263 PLUS_EXPR, oprnd0, signmask);
3264 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3265 def_stmt
3266 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3267 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3268 fold_build2 (MINUS_EXPR, itype, oprnd1,
3269 build_int_cst (itype, 1)));
3270 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3271
3272 pattern_stmt
3273 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3274 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3275 signmask);
3276 }
3277
3278 return pattern_stmt;
3279 }
3280
3281 if (prec > HOST_BITS_PER_WIDE_INT
3282 || integer_zerop (oprnd1))
3283 return NULL;
3284
3285 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3286 return NULL;
3287
3288 if (TYPE_UNSIGNED (itype))
3289 {
3290 unsigned HOST_WIDE_INT mh, ml;
3291 int pre_shift, post_shift;
3292 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3293 & GET_MODE_MASK (itype_mode));
3294 tree t1, t2, t3, t4;
3295
3296 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3297 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3298 return NULL;
3299
3300 /* Find a suitable multiplier and right shift count
3301 instead of multiplying with D. */
3302 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3303
3304 /* If the suggested multiplier is more than SIZE bits, we can do better
3305 for even divisors, using an initial right shift. */
3306 if (mh != 0 && (d & 1) == 0)
3307 {
3308 pre_shift = ctz_or_zero (d);
3309 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3310 &ml, &post_shift, &dummy_int);
3311 gcc_assert (!mh);
3312 }
3313 else
3314 pre_shift = 0;
3315
3316 if (mh != 0)
3317 {
3318 if (post_shift - 1 >= prec)
3319 return NULL;
3320
3321 /* t1 = oprnd0 h* ml;
3322 t2 = oprnd0 - t1;
3323 t3 = t2 >> 1;
3324 t4 = t1 + t3;
3325 q = t4 >> (post_shift - 1); */
3326 t1 = vect_recog_temp_ssa_var (itype, NULL);
3327 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3328 build_int_cst (itype, ml));
3329 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3330
3331 t2 = vect_recog_temp_ssa_var (itype, NULL);
3332 def_stmt
3333 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3334 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3335
3336 t3 = vect_recog_temp_ssa_var (itype, NULL);
3337 def_stmt
3338 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3339 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3340
3341 t4 = vect_recog_temp_ssa_var (itype, NULL);
3342 def_stmt
3343 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3344
3345 if (post_shift != 1)
3346 {
3347 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3348
3349 q = vect_recog_temp_ssa_var (itype, NULL);
3350 pattern_stmt
3351 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3352 build_int_cst (itype, post_shift - 1));
3353 }
3354 else
3355 {
3356 q = t4;
3357 pattern_stmt = def_stmt;
3358 }
3359 }
3360 else
3361 {
3362 if (pre_shift >= prec || post_shift >= prec)
3363 return NULL;
3364
3365 /* t1 = oprnd0 >> pre_shift;
3366 t2 = t1 h* ml;
3367 q = t2 >> post_shift; */
3368 if (pre_shift)
3369 {
3370 t1 = vect_recog_temp_ssa_var (itype, NULL);
3371 def_stmt
3372 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3373 build_int_cst (NULL, pre_shift));
3374 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3375 }
3376 else
3377 t1 = oprnd0;
3378
3379 t2 = vect_recog_temp_ssa_var (itype, NULL);
3380 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3381 build_int_cst (itype, ml));
3382
3383 if (post_shift)
3384 {
3385 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3386
3387 q = vect_recog_temp_ssa_var (itype, NULL);
3388 def_stmt
3389 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3390 build_int_cst (itype, post_shift));
3391 }
3392 else
3393 q = t2;
3394
3395 pattern_stmt = def_stmt;
3396 }
3397 }
3398 else
3399 {
3400 unsigned HOST_WIDE_INT ml;
3401 int post_shift;
3402 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3403 unsigned HOST_WIDE_INT abs_d;
3404 bool add = false;
3405 tree t1, t2, t3, t4;
3406
3407 /* Give up for -1. */
3408 if (d == -1)
3409 return NULL;
3410
3411 /* Since d might be INT_MIN, we have to cast to
3412 unsigned HOST_WIDE_INT before negating to avoid
3413 undefined signed overflow. */
3414 abs_d = (d >= 0
3415 ? (unsigned HOST_WIDE_INT) d
3416 : - (unsigned HOST_WIDE_INT) d);
3417
3418 /* n rem d = n rem -d */
3419 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3420 {
3421 d = abs_d;
3422 oprnd1 = build_int_cst (itype, abs_d);
3423 }
3424 if (HOST_BITS_PER_WIDE_INT >= prec
3425 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3426 /* This case is not handled correctly below. */
3427 return NULL;
3428
3429 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3430 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3431 {
3432 add = true;
3433 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3434 }
3435 if (post_shift >= prec)
3436 return NULL;
3437
3438 /* t1 = oprnd0 h* ml; */
3439 t1 = vect_recog_temp_ssa_var (itype, NULL);
3440 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3441 build_int_cst (itype, ml));
3442
3443 if (add)
3444 {
3445 /* t2 = t1 + oprnd0; */
3446 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3447 t2 = vect_recog_temp_ssa_var (itype, NULL);
3448 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3449 }
3450 else
3451 t2 = t1;
3452
3453 if (post_shift)
3454 {
3455 /* t3 = t2 >> post_shift; */
3456 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3457 t3 = vect_recog_temp_ssa_var (itype, NULL);
3458 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3459 build_int_cst (itype, post_shift));
3460 }
3461 else
3462 t3 = t2;
3463
3464 wide_int oprnd0_min, oprnd0_max;
3465 int msb = 1;
3466 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3467 {
3468 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3469 msb = 0;
3470 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3471 msb = -1;
3472 }
3473
3474 if (msb == 0 && d >= 0)
3475 {
3476 /* q = t3; */
3477 q = t3;
3478 pattern_stmt = def_stmt;
3479 }
3480 else
3481 {
3482 /* t4 = oprnd0 >> (prec - 1);
3483 or if we know from VRP that oprnd0 >= 0
3484 t4 = 0;
3485 or if we know from VRP that oprnd0 < 0
3486 t4 = -1; */
3487 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3488 t4 = vect_recog_temp_ssa_var (itype, NULL);
3489 if (msb != 1)
3490 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3491 build_int_cst (itype, msb));
3492 else
3493 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3494 build_int_cst (itype, prec - 1));
3495 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3496
3497 /* q = t3 - t4; or q = t4 - t3; */
3498 q = vect_recog_temp_ssa_var (itype, NULL);
3499 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3500 d < 0 ? t3 : t4);
3501 }
3502 }
3503
3504 if (rhs_code == TRUNC_MOD_EXPR)
3505 {
3506 tree r, t1;
3507
3508 /* We divided. Now finish by:
3509 t1 = q * oprnd1;
3510 r = oprnd0 - t1; */
3511 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3512
3513 t1 = vect_recog_temp_ssa_var (itype, NULL);
3514 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3515 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3516
3517 r = vect_recog_temp_ssa_var (itype, NULL);
3518 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3519 }
3520
3521 /* Pattern detected. */
3522 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3523
3524 *type_out = vectype;
3525 return pattern_stmt;
3526 }
3527
3528 /* Function vect_recog_mixed_size_cond_pattern
3529
3530 Try to find the following pattern:
3531
3532 type x_t, y_t;
3533 TYPE a_T, b_T, c_T;
3534 loop:
3535 S1 a_T = x_t CMP y_t ? b_T : c_T;
3536
3537 where type 'TYPE' is an integral type which has different size
3538 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3539 than 'type', the constants need to fit into an integer type
3540 with the same width as 'type') or results of conversion from 'type'.
3541
3542 Input:
3543
3544 * STMT_VINFO: The stmt from which the pattern search begins.
3545
3546 Output:
3547
3548 * TYPE_OUT: The type of the output of this pattern.
3549
3550 * Return value: A new stmt that will be used to replace the pattern.
3551 Additionally a def_stmt is added.
3552
3553 a_it = x_t CMP y_t ? b_it : c_it;
3554 a_T = (TYPE) a_it; */
3555
3556 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3557 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3558 stmt_vec_info stmt_vinfo, tree *type_out)
3559 {
3560 gimple *last_stmt = stmt_vinfo->stmt;
3561 tree cond_expr, then_clause, else_clause;
3562 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3563 gimple *pattern_stmt, *def_stmt;
3564 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3565 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3566 bool promotion;
3567 tree comp_scalar_type;
3568
3569 if (!is_gimple_assign (last_stmt)
3570 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3571 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3572 return NULL;
3573
3574 cond_expr = gimple_assign_rhs1 (last_stmt);
3575 then_clause = gimple_assign_rhs2 (last_stmt);
3576 else_clause = gimple_assign_rhs3 (last_stmt);
3577
3578 if (!COMPARISON_CLASS_P (cond_expr))
3579 return NULL;
3580
3581 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3582 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3583 if (comp_vectype == NULL_TREE)
3584 return NULL;
3585
3586 type = gimple_expr_type (last_stmt);
3587 if (types_compatible_p (type, comp_scalar_type)
3588 || ((TREE_CODE (then_clause) != INTEGER_CST
3589 || TREE_CODE (else_clause) != INTEGER_CST)
3590 && !INTEGRAL_TYPE_P (comp_scalar_type))
3591 || !INTEGRAL_TYPE_P (type))
3592 return NULL;
3593
3594 if ((TREE_CODE (then_clause) != INTEGER_CST
3595 && !type_conversion_p (vinfo, then_clause, false,
3596 &orig_type0, &def_stmt0, &promotion))
3597 || (TREE_CODE (else_clause) != INTEGER_CST
3598 && !type_conversion_p (vinfo, else_clause, false,
3599 &orig_type1, &def_stmt1, &promotion)))
3600 return NULL;
3601
3602 if (orig_type0 && orig_type1
3603 && !types_compatible_p (orig_type0, orig_type1))
3604 return NULL;
3605
3606 if (orig_type0)
3607 {
3608 if (!types_compatible_p (orig_type0, comp_scalar_type))
3609 return NULL;
3610 then_clause = gimple_assign_rhs1 (def_stmt0);
3611 itype = orig_type0;
3612 }
3613
3614 if (orig_type1)
3615 {
3616 if (!types_compatible_p (orig_type1, comp_scalar_type))
3617 return NULL;
3618 else_clause = gimple_assign_rhs1 (def_stmt1);
3619 itype = orig_type1;
3620 }
3621
3622
3623 HOST_WIDE_INT cmp_mode_size
3624 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3625
3626 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3627 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3628 return NULL;
3629
3630 vectype = get_vectype_for_scalar_type (vinfo, type);
3631 if (vectype == NULL_TREE)
3632 return NULL;
3633
3634 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3635 return NULL;
3636
3637 if (itype == NULL_TREE)
3638 itype = build_nonstandard_integer_type (cmp_mode_size,
3639 TYPE_UNSIGNED (type));
3640
3641 if (itype == NULL_TREE
3642 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3643 return NULL;
3644
3645 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3646 if (vecitype == NULL_TREE)
3647 return NULL;
3648
3649 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3650 return NULL;
3651
3652 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3653 {
3654 if ((TREE_CODE (then_clause) == INTEGER_CST
3655 && !int_fits_type_p (then_clause, itype))
3656 || (TREE_CODE (else_clause) == INTEGER_CST
3657 && !int_fits_type_p (else_clause, itype)))
3658 return NULL;
3659 }
3660
3661 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3662 COND_EXPR, unshare_expr (cond_expr),
3663 fold_convert (itype, then_clause),
3664 fold_convert (itype, else_clause));
3665 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3666 NOP_EXPR, gimple_assign_lhs (def_stmt));
3667
3668 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3669 *type_out = vectype;
3670
3671 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3672
3673 return pattern_stmt;
3674 }
3675
3676
3677 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3678 true if bool VAR can and should be optimized that way. Assume it shouldn't
3679 in case it's a result of a comparison which can be directly vectorized into
3680 a vector comparison. Fills in STMTS with all stmts visited during the
3681 walk. */
3682
3683 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3684 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3685 {
3686 tree rhs1;
3687 enum tree_code rhs_code;
3688
3689 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3690 if (!def_stmt_info)
3691 return false;
3692
3693 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3694 if (!def_stmt)
3695 return false;
3696
3697 if (stmts.contains (def_stmt))
3698 return true;
3699
3700 rhs1 = gimple_assign_rhs1 (def_stmt);
3701 rhs_code = gimple_assign_rhs_code (def_stmt);
3702 switch (rhs_code)
3703 {
3704 case SSA_NAME:
3705 if (! check_bool_pattern (rhs1, vinfo, stmts))
3706 return false;
3707 break;
3708
3709 CASE_CONVERT:
3710 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3711 return false;
3712 if (! check_bool_pattern (rhs1, vinfo, stmts))
3713 return false;
3714 break;
3715
3716 case BIT_NOT_EXPR:
3717 if (! check_bool_pattern (rhs1, vinfo, stmts))
3718 return false;
3719 break;
3720
3721 case BIT_AND_EXPR:
3722 case BIT_IOR_EXPR:
3723 case BIT_XOR_EXPR:
3724 if (! check_bool_pattern (rhs1, vinfo, stmts)
3725 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3726 return false;
3727 break;
3728
3729 default:
3730 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3731 {
3732 tree vecitype, comp_vectype;
3733
3734 /* If the comparison can throw, then is_gimple_condexpr will be
3735 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3736 if (stmt_could_throw_p (cfun, def_stmt))
3737 return false;
3738
3739 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3740 if (comp_vectype == NULL_TREE)
3741 return false;
3742
3743 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3744 TREE_TYPE (rhs1));
3745 if (mask_type
3746 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3747 return false;
3748
3749 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3750 {
3751 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3752 tree itype
3753 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3754 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3755 if (vecitype == NULL_TREE)
3756 return false;
3757 }
3758 else
3759 vecitype = comp_vectype;
3760 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3761 return false;
3762 }
3763 else
3764 return false;
3765 break;
3766 }
3767
3768 bool res = stmts.add (def_stmt);
3769 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3770 gcc_assert (!res);
3771
3772 return true;
3773 }
3774
3775
3776 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3777 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3778 pattern sequence. */
3779
3780 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)3781 adjust_bool_pattern_cast (vec_info *vinfo,
3782 tree type, tree var, stmt_vec_info stmt_info)
3783 {
3784 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3785 NOP_EXPR, var);
3786 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3787 get_vectype_for_scalar_type (vinfo, type));
3788 return gimple_assign_lhs (cast_stmt);
3789 }
3790
3791 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3792 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3793 type, OUT_TYPE is the desired final integer type of the whole pattern.
3794 STMT_INFO is the info of the pattern root and is where pattern stmts should
3795 be associated with. DEFS is a map of pattern defs. */
3796
3797 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3798 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3799 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3800 {
3801 gimple *stmt = SSA_NAME_DEF_STMT (var);
3802 enum tree_code rhs_code, def_rhs_code;
3803 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3804 location_t loc;
3805 gimple *pattern_stmt, *def_stmt;
3806 tree trueval = NULL_TREE;
3807
3808 rhs1 = gimple_assign_rhs1 (stmt);
3809 rhs2 = gimple_assign_rhs2 (stmt);
3810 rhs_code = gimple_assign_rhs_code (stmt);
3811 loc = gimple_location (stmt);
3812 switch (rhs_code)
3813 {
3814 case SSA_NAME:
3815 CASE_CONVERT:
3816 irhs1 = *defs.get (rhs1);
3817 itype = TREE_TYPE (irhs1);
3818 pattern_stmt
3819 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3820 SSA_NAME, irhs1);
3821 break;
3822
3823 case BIT_NOT_EXPR:
3824 irhs1 = *defs.get (rhs1);
3825 itype = TREE_TYPE (irhs1);
3826 pattern_stmt
3827 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3828 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3829 break;
3830
3831 case BIT_AND_EXPR:
3832 /* Try to optimize x = y & (a < b ? 1 : 0); into
3833 x = (a < b ? y : 0);
3834
3835 E.g. for:
3836 bool a_b, b_b, c_b;
3837 TYPE d_T;
3838
3839 S1 a_b = x1 CMP1 y1;
3840 S2 b_b = x2 CMP2 y2;
3841 S3 c_b = a_b & b_b;
3842 S4 d_T = (TYPE) c_b;
3843
3844 we would normally emit:
3845
3846 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3847 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3848 S3' c_T = a_T & b_T;
3849 S4' d_T = c_T;
3850
3851 but we can save one stmt by using the
3852 result of one of the COND_EXPRs in the other COND_EXPR and leave
3853 BIT_AND_EXPR stmt out:
3854
3855 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3856 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3857 S4' f_T = c_T;
3858
3859 At least when VEC_COND_EXPR is implemented using masks
3860 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3861 computes the comparison masks and ands it, in one case with
3862 all ones vector, in the other case with a vector register.
3863 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3864 often more expensive. */
3865 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3866 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3867 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3868 {
3869 irhs1 = *defs.get (rhs1);
3870 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3871 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3872 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3873 {
3874 rhs_code = def_rhs_code;
3875 rhs1 = def_rhs1;
3876 rhs2 = gimple_assign_rhs2 (def_stmt);
3877 trueval = irhs1;
3878 goto do_compare;
3879 }
3880 else
3881 irhs2 = *defs.get (rhs2);
3882 goto and_ior_xor;
3883 }
3884 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3885 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3886 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3887 {
3888 irhs2 = *defs.get (rhs2);
3889 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3890 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3891 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3892 {
3893 rhs_code = def_rhs_code;
3894 rhs1 = def_rhs1;
3895 rhs2 = gimple_assign_rhs2 (def_stmt);
3896 trueval = irhs2;
3897 goto do_compare;
3898 }
3899 else
3900 irhs1 = *defs.get (rhs1);
3901 goto and_ior_xor;
3902 }
3903 /* FALLTHRU */
3904 case BIT_IOR_EXPR:
3905 case BIT_XOR_EXPR:
3906 irhs1 = *defs.get (rhs1);
3907 irhs2 = *defs.get (rhs2);
3908 and_ior_xor:
3909 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3910 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3911 {
3912 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3913 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3914 int out_prec = TYPE_PRECISION (out_type);
3915 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3916 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3917 stmt_info);
3918 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3919 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3920 stmt_info);
3921 else
3922 {
3923 irhs1 = adjust_bool_pattern_cast (vinfo,
3924 out_type, irhs1, stmt_info);
3925 irhs2 = adjust_bool_pattern_cast (vinfo,
3926 out_type, irhs2, stmt_info);
3927 }
3928 }
3929 itype = TREE_TYPE (irhs1);
3930 pattern_stmt
3931 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3932 rhs_code, irhs1, irhs2);
3933 break;
3934
3935 default:
3936 do_compare:
3937 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3938 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3939 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3940 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3941 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3942 {
3943 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3944 itype
3945 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3946 }
3947 else
3948 itype = TREE_TYPE (rhs1);
3949 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3950 if (trueval == NULL_TREE)
3951 trueval = build_int_cst (itype, 1);
3952 else
3953 gcc_checking_assert (useless_type_conversion_p (itype,
3954 TREE_TYPE (trueval)));
3955 pattern_stmt
3956 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3957 COND_EXPR, cond_expr, trueval,
3958 build_int_cst (itype, 0));
3959 break;
3960 }
3961
3962 gimple_set_location (pattern_stmt, loc);
3963 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3964 get_vectype_for_scalar_type (vinfo, itype));
3965 defs.put (var, gimple_assign_lhs (pattern_stmt));
3966 }
3967
3968 /* Comparison function to qsort a vector of gimple stmts after UID. */
3969
3970 static int
sort_after_uid(const void * p1,const void * p2)3971 sort_after_uid (const void *p1, const void *p2)
3972 {
3973 const gimple *stmt1 = *(const gimple * const *)p1;
3974 const gimple *stmt2 = *(const gimple * const *)p2;
3975 return gimple_uid (stmt1) - gimple_uid (stmt2);
3976 }
3977
3978 /* Create pattern stmts for all stmts participating in the bool pattern
3979 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3980 OUT_TYPE. Return the def of the pattern root. */
3981
3982 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3983 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3984 tree out_type, stmt_vec_info stmt_info)
3985 {
3986 /* Gather original stmts in the bool pattern in their order of appearance
3987 in the IL. */
3988 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3989 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3990 i != bool_stmt_set.end (); ++i)
3991 bool_stmts.quick_push (*i);
3992 bool_stmts.qsort (sort_after_uid);
3993
3994 /* Now process them in that order, producing pattern stmts. */
3995 hash_map <tree, tree> defs;
3996 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3997 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3998 out_type, stmt_info, defs);
3999
4000 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4001 gimple *pattern_stmt
4002 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4003 return gimple_assign_lhs (pattern_stmt);
4004 }
4005
4006 /* Return the proper type for converting bool VAR into
4007 an integer value or NULL_TREE if no such type exists.
4008 The type is chosen so that the converted value has the
4009 same number of elements as VAR's vector type. */
4010
4011 static tree
integer_type_for_mask(tree var,vec_info * vinfo)4012 integer_type_for_mask (tree var, vec_info *vinfo)
4013 {
4014 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4015 return NULL_TREE;
4016
4017 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4018 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4019 return NULL_TREE;
4020
4021 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4022 }
4023
4024 /* Function vect_recog_bool_pattern
4025
4026 Try to find pattern like following:
4027
4028 bool a_b, b_b, c_b, d_b, e_b;
4029 TYPE f_T;
4030 loop:
4031 S1 a_b = x1 CMP1 y1;
4032 S2 b_b = x2 CMP2 y2;
4033 S3 c_b = a_b & b_b;
4034 S4 d_b = x3 CMP3 y3;
4035 S5 e_b = c_b | d_b;
4036 S6 f_T = (TYPE) e_b;
4037
4038 where type 'TYPE' is an integral type. Or a similar pattern
4039 ending in
4040
4041 S6 f_Y = e_b ? r_Y : s_Y;
4042
4043 as results from if-conversion of a complex condition.
4044
4045 Input:
4046
4047 * STMT_VINFO: The stmt at the end from which the pattern
4048 search begins, i.e. cast of a bool to
4049 an integer type.
4050
4051 Output:
4052
4053 * TYPE_OUT: The type of the output of this pattern.
4054
4055 * Return value: A new stmt that will be used to replace the pattern.
4056
4057 Assuming size of TYPE is the same as size of all comparisons
4058 (otherwise some casts would be added where needed), the above
4059 sequence we create related pattern stmts:
4060 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4061 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4062 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4063 S5' e_T = c_T | d_T;
4064 S6' f_T = e_T;
4065
4066 Instead of the above S3' we could emit:
4067 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4068 S3' c_T = a_T | b_T;
4069 but the above is more efficient. */
4070
4071 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4072 vect_recog_bool_pattern (vec_info *vinfo,
4073 stmt_vec_info stmt_vinfo, tree *type_out)
4074 {
4075 gimple *last_stmt = stmt_vinfo->stmt;
4076 enum tree_code rhs_code;
4077 tree var, lhs, rhs, vectype;
4078 gimple *pattern_stmt;
4079
4080 if (!is_gimple_assign (last_stmt))
4081 return NULL;
4082
4083 var = gimple_assign_rhs1 (last_stmt);
4084 lhs = gimple_assign_lhs (last_stmt);
4085 rhs_code = gimple_assign_rhs_code (last_stmt);
4086
4087 if (rhs_code == VIEW_CONVERT_EXPR)
4088 var = TREE_OPERAND (var, 0);
4089
4090 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4091 return NULL;
4092
4093 hash_set<gimple *> bool_stmts;
4094
4095 if (CONVERT_EXPR_CODE_P (rhs_code)
4096 || rhs_code == VIEW_CONVERT_EXPR)
4097 {
4098 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4099 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4100 return NULL;
4101 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4102 if (vectype == NULL_TREE)
4103 return NULL;
4104
4105 if (check_bool_pattern (var, vinfo, bool_stmts))
4106 {
4107 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4108 TREE_TYPE (lhs), stmt_vinfo);
4109 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4110 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4111 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4112 else
4113 pattern_stmt
4114 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4115 }
4116 else
4117 {
4118 tree type = integer_type_for_mask (var, vinfo);
4119 tree cst0, cst1, tmp;
4120
4121 if (!type)
4122 return NULL;
4123
4124 /* We may directly use cond with narrowed type to avoid
4125 multiple cond exprs with following result packing and
4126 perform single cond with packed mask instead. In case
4127 of widening we better make cond first and then extract
4128 results. */
4129 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4130 type = TREE_TYPE (lhs);
4131
4132 cst0 = build_int_cst (type, 0);
4133 cst1 = build_int_cst (type, 1);
4134 tmp = vect_recog_temp_ssa_var (type, NULL);
4135 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4136
4137 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4138 {
4139 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4140 append_pattern_def_seq (vinfo, stmt_vinfo,
4141 pattern_stmt, new_vectype);
4142
4143 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4144 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4145 }
4146 }
4147
4148 *type_out = vectype;
4149 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4150
4151 return pattern_stmt;
4152 }
4153 else if (rhs_code == COND_EXPR
4154 && TREE_CODE (var) == SSA_NAME)
4155 {
4156 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4157 if (vectype == NULL_TREE)
4158 return NULL;
4159
4160 /* Build a scalar type for the boolean result that when
4161 vectorized matches the vector type of the result in
4162 size and number of elements. */
4163 unsigned prec
4164 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4165 TYPE_VECTOR_SUBPARTS (vectype));
4166
4167 tree type
4168 = build_nonstandard_integer_type (prec,
4169 TYPE_UNSIGNED (TREE_TYPE (var)));
4170 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4171 return NULL;
4172
4173 if (!check_bool_pattern (var, vinfo, bool_stmts))
4174 return NULL;
4175
4176 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4177
4178 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4179 pattern_stmt
4180 = gimple_build_assign (lhs, COND_EXPR,
4181 build2 (NE_EXPR, boolean_type_node,
4182 rhs, build_int_cst (type, 0)),
4183 gimple_assign_rhs2 (last_stmt),
4184 gimple_assign_rhs3 (last_stmt));
4185 *type_out = vectype;
4186 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4187
4188 return pattern_stmt;
4189 }
4190 else if (rhs_code == SSA_NAME
4191 && STMT_VINFO_DATA_REF (stmt_vinfo))
4192 {
4193 stmt_vec_info pattern_stmt_info;
4194 tree nunits_vectype;
4195 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4196 &nunits_vectype)
4197 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4198 return NULL;
4199
4200 if (check_bool_pattern (var, vinfo, bool_stmts))
4201 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4202 TREE_TYPE (vectype), stmt_vinfo);
4203 else
4204 {
4205 tree type = integer_type_for_mask (var, vinfo);
4206 tree cst0, cst1, new_vectype;
4207
4208 if (!type)
4209 return NULL;
4210
4211 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4212 type = TREE_TYPE (vectype);
4213
4214 cst0 = build_int_cst (type, 0);
4215 cst1 = build_int_cst (type, 1);
4216 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4217
4218 rhs = vect_recog_temp_ssa_var (type, NULL);
4219 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4220 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4221 }
4222
4223 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4224 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4225 {
4226 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4227 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4228 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4229 rhs = rhs2;
4230 }
4231 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4232 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4233 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4234 *type_out = vectype;
4235 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4236
4237 return pattern_stmt;
4238 }
4239 else
4240 return NULL;
4241 }
4242
4243
4244 /* A helper for vect_recog_mask_conversion_pattern. Build
4245 conversion of MASK to a type suitable for masking VECTYPE.
4246 Built statement gets required vectype and is appended to
4247 a pattern sequence of STMT_VINFO.
4248
4249 Return converted mask. */
4250
4251 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4252 build_mask_conversion (vec_info *vinfo,
4253 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4254 {
4255 gimple *stmt;
4256 tree masktype, tmp;
4257
4258 masktype = truth_type_for (vectype);
4259 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4260 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4261 append_pattern_def_seq (vinfo, stmt_vinfo,
4262 stmt, masktype, TREE_TYPE (vectype));
4263
4264 return tmp;
4265 }
4266
4267
4268 /* Function vect_recog_mask_conversion_pattern
4269
4270 Try to find statements which require boolean type
4271 converison. Additional conversion statements are
4272 added to handle such cases. For example:
4273
4274 bool m_1, m_2, m_3;
4275 int i_4, i_5;
4276 double d_6, d_7;
4277 char c_1, c_2, c_3;
4278
4279 S1 m_1 = i_4 > i_5;
4280 S2 m_2 = d_6 < d_7;
4281 S3 m_3 = m_1 & m_2;
4282 S4 c_1 = m_3 ? c_2 : c_3;
4283
4284 Will be transformed into:
4285
4286 S1 m_1 = i_4 > i_5;
4287 S2 m_2 = d_6 < d_7;
4288 S3'' m_2' = (_Bool[bitsize=32])m_2
4289 S3' m_3' = m_1 & m_2';
4290 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4291 S4' c_1' = m_3'' ? c_2 : c_3; */
4292
4293 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4294 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4295 stmt_vec_info stmt_vinfo, tree *type_out)
4296 {
4297 gimple *last_stmt = stmt_vinfo->stmt;
4298 enum tree_code rhs_code;
4299 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4300 tree vectype1, vectype2;
4301 stmt_vec_info pattern_stmt_info;
4302 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4303 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4304
4305 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4306 if (is_gimple_call (last_stmt)
4307 && gimple_call_internal_p (last_stmt))
4308 {
4309 gcall *pattern_stmt;
4310
4311 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4312 int mask_argno = internal_fn_mask_index (ifn);
4313 if (mask_argno < 0)
4314 return NULL;
4315
4316 bool store_p = internal_store_fn_p (ifn);
4317 if (store_p)
4318 {
4319 int rhs_index = internal_fn_stored_value_index (ifn);
4320 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4321 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4322 }
4323 else
4324 {
4325 lhs = gimple_call_lhs (last_stmt);
4326 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4327 }
4328
4329 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4330 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4331 if (!mask_arg_type)
4332 return NULL;
4333 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4334
4335 if (!vectype1 || !vectype2
4336 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4337 TYPE_VECTOR_SUBPARTS (vectype2)))
4338 return NULL;
4339
4340 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4341
4342 auto_vec<tree, 8> args;
4343 unsigned int nargs = gimple_call_num_args (last_stmt);
4344 args.safe_grow (nargs, true);
4345 for (unsigned int i = 0; i < nargs; ++i)
4346 args[i] = ((int) i == mask_argno
4347 ? tmp
4348 : gimple_call_arg (last_stmt, i));
4349 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4350
4351 if (!store_p)
4352 {
4353 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4354 gimple_call_set_lhs (pattern_stmt, lhs);
4355 }
4356 gimple_call_set_nothrow (pattern_stmt, true);
4357
4358 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4359 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4360 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4361
4362 *type_out = vectype1;
4363 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4364
4365 return pattern_stmt;
4366 }
4367
4368 if (!is_gimple_assign (last_stmt))
4369 return NULL;
4370
4371 gimple *pattern_stmt;
4372 lhs = gimple_assign_lhs (last_stmt);
4373 rhs1 = gimple_assign_rhs1 (last_stmt);
4374 rhs_code = gimple_assign_rhs_code (last_stmt);
4375
4376 /* Check for cond expression requiring mask conversion. */
4377 if (rhs_code == COND_EXPR)
4378 {
4379 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4380
4381 if (TREE_CODE (rhs1) == SSA_NAME)
4382 {
4383 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4384 if (!rhs1_type)
4385 return NULL;
4386 }
4387 else if (COMPARISON_CLASS_P (rhs1))
4388 {
4389 /* Check whether we're comparing scalar booleans and (if so)
4390 whether a better mask type exists than the mask associated
4391 with boolean-sized elements. This avoids unnecessary packs
4392 and unpacks if the booleans are set from comparisons of
4393 wider types. E.g. in:
4394
4395 int x1, x2, x3, x4, y1, y1;
4396 ...
4397 bool b1 = (x1 == x2);
4398 bool b2 = (x3 == x4);
4399 ... = b1 == b2 ? y1 : y2;
4400
4401 it is better for b1 and b2 to use the mask type associated
4402 with int elements rather bool (byte) elements. */
4403 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4404 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4405 if (!rhs1_op0 || !rhs1_op1)
4406 return NULL;
4407 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4408 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4409
4410 if (!rhs1_op0_type)
4411 rhs1_type = TREE_TYPE (rhs1_op0);
4412 else if (!rhs1_op1_type)
4413 rhs1_type = TREE_TYPE (rhs1_op1);
4414 else if (TYPE_PRECISION (rhs1_op0_type)
4415 != TYPE_PRECISION (rhs1_op1_type))
4416 {
4417 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4418 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4419 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4420 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4421 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4422 {
4423 if (abs (tmp0) > abs (tmp1))
4424 rhs1_type = rhs1_op1_type;
4425 else
4426 rhs1_type = rhs1_op0_type;
4427 }
4428 else
4429 rhs1_type = build_nonstandard_integer_type
4430 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4431 }
4432 else
4433 rhs1_type = rhs1_op0_type;
4434 }
4435 else
4436 return NULL;
4437
4438 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4439
4440 if (!vectype1 || !vectype2)
4441 return NULL;
4442
4443 /* Continue if a conversion is needed. Also continue if we have
4444 a comparison whose vector type would normally be different from
4445 VECTYPE2 when considered in isolation. In that case we'll
4446 replace the comparison with an SSA name (so that we can record
4447 its vector type) and behave as though the comparison was an SSA
4448 name from the outset. */
4449 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4450 TYPE_VECTOR_SUBPARTS (vectype2))
4451 && !rhs1_op0_type
4452 && !rhs1_op1_type)
4453 return NULL;
4454
4455 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4456 in place, we can handle it in vectorizable_condition. This avoids
4457 unnecessary promotion stmts and increased vectorization factor. */
4458 if (COMPARISON_CLASS_P (rhs1)
4459 && INTEGRAL_TYPE_P (rhs1_type)
4460 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4461 TYPE_VECTOR_SUBPARTS (vectype2)))
4462 {
4463 enum vect_def_type dt;
4464 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4465 && dt == vect_external_def
4466 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4467 && (dt == vect_external_def
4468 || dt == vect_constant_def))
4469 {
4470 tree wide_scalar_type = build_nonstandard_integer_type
4471 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4472 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4473 wide_scalar_type);
4474 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4475 return NULL;
4476 }
4477 }
4478
4479 /* If rhs1 is a comparison we need to move it into a
4480 separate statement. */
4481 if (TREE_CODE (rhs1) != SSA_NAME)
4482 {
4483 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4484 if (rhs1_op0_type
4485 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4486 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4487 vectype2, stmt_vinfo);
4488 if (rhs1_op1_type
4489 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4490 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4491 vectype2, stmt_vinfo);
4492 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4493 rhs1_op0, rhs1_op1);
4494 rhs1 = tmp;
4495 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4496 rhs1_type);
4497 }
4498
4499 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4500 TYPE_VECTOR_SUBPARTS (vectype2)))
4501 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4502 else
4503 tmp = rhs1;
4504
4505 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4506 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4507 gimple_assign_rhs2 (last_stmt),
4508 gimple_assign_rhs3 (last_stmt));
4509
4510 *type_out = vectype1;
4511 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4512
4513 return pattern_stmt;
4514 }
4515
4516 /* Now check for binary boolean operations requiring conversion for
4517 one of operands. */
4518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4519 return NULL;
4520
4521 if (rhs_code != BIT_IOR_EXPR
4522 && rhs_code != BIT_XOR_EXPR
4523 && rhs_code != BIT_AND_EXPR
4524 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4525 return NULL;
4526
4527 rhs2 = gimple_assign_rhs2 (last_stmt);
4528
4529 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4530 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4531
4532 if (!rhs1_type || !rhs2_type
4533 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4534 return NULL;
4535
4536 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4537 {
4538 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4539 if (!vectype1)
4540 return NULL;
4541 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4542 }
4543 else
4544 {
4545 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4546 if (!vectype1)
4547 return NULL;
4548 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4549 }
4550
4551 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4552 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4553
4554 *type_out = vectype1;
4555 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4556
4557 return pattern_stmt;
4558 }
4559
4560 /* STMT_INFO is a load or store. If the load or store is conditional, return
4561 the boolean condition under which it occurs, otherwise return null. */
4562
4563 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4564 vect_get_load_store_mask (stmt_vec_info stmt_info)
4565 {
4566 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4567 {
4568 gcc_assert (gimple_assign_single_p (def_assign));
4569 return NULL_TREE;
4570 }
4571
4572 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4573 {
4574 internal_fn ifn = gimple_call_internal_fn (def_call);
4575 int mask_index = internal_fn_mask_index (ifn);
4576 return gimple_call_arg (def_call, mask_index);
4577 }
4578
4579 gcc_unreachable ();
4580 }
4581
4582 /* Return MASK if MASK is suitable for masking an operation on vectors
4583 of type VECTYPE, otherwise convert it into such a form and return
4584 the result. Associate any conversion statements with STMT_INFO's
4585 pattern. */
4586
4587 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4588 vect_convert_mask_for_vectype (tree mask, tree vectype,
4589 stmt_vec_info stmt_info, vec_info *vinfo)
4590 {
4591 tree mask_type = integer_type_for_mask (mask, vinfo);
4592 if (mask_type)
4593 {
4594 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4595 if (mask_vectype
4596 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4597 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4598 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4599 }
4600 return mask;
4601 }
4602
4603 /* Return the equivalent of:
4604
4605 fold_convert (TYPE, VALUE)
4606
4607 with the expectation that the operation will be vectorized.
4608 If new statements are needed, add them as pattern statements
4609 to STMT_INFO. */
4610
4611 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4612 vect_add_conversion_to_pattern (vec_info *vinfo,
4613 tree type, tree value, stmt_vec_info stmt_info)
4614 {
4615 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4616 return value;
4617
4618 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4619 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4620 append_pattern_def_seq (vinfo, stmt_info, conversion,
4621 get_vectype_for_scalar_type (vinfo, type));
4622 return new_value;
4623 }
4624
4625 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4626 internal function. Return the final statement on success and set
4627 *TYPE_OUT to the vector type being loaded or stored.
4628
4629 This function only handles gathers and scatters that were recognized
4630 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4631
4632 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4633 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4634 stmt_vec_info stmt_info, tree *type_out)
4635 {
4636 /* Currently we only support this for loop vectorization. */
4637 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4638 if (!loop_vinfo)
4639 return NULL;
4640
4641 /* Make sure that we're looking at a gather load or scatter store. */
4642 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4643 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4644 return NULL;
4645
4646 /* Get the boolean that controls whether the load or store happens.
4647 This is null if the operation is unconditional. */
4648 tree mask = vect_get_load_store_mask (stmt_info);
4649
4650 /* Make sure that the target supports an appropriate internal
4651 function for the gather/scatter operation. */
4652 gather_scatter_info gs_info;
4653 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4654 || gs_info.decl)
4655 return NULL;
4656
4657 /* Convert the mask to the right form. */
4658 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4659 gs_info.element_type);
4660 if (mask)
4661 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4662 loop_vinfo);
4663
4664 /* Get the invariant base and non-invariant offset, converting the
4665 latter to the same width as the vector elements. */
4666 tree base = gs_info.base;
4667 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4668 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4669 gs_info.offset, stmt_info);
4670
4671 /* Build the new pattern statement. */
4672 tree scale = size_int (gs_info.scale);
4673 gcall *pattern_stmt;
4674 if (DR_IS_READ (dr))
4675 {
4676 tree zero = build_zero_cst (gs_info.element_type);
4677 if (mask != NULL)
4678 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4679 offset, scale, zero, mask);
4680 else
4681 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4682 offset, scale, zero);
4683 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4684 gimple_call_set_lhs (pattern_stmt, load_lhs);
4685 }
4686 else
4687 {
4688 tree rhs = vect_get_store_rhs (stmt_info);
4689 if (mask != NULL)
4690 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4691 base, offset, scale, rhs,
4692 mask);
4693 else
4694 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4695 base, offset, scale, rhs);
4696 }
4697 gimple_call_set_nothrow (pattern_stmt, true);
4698
4699 /* Copy across relevant vectorization info and associate DR with the
4700 new pattern statement instead of the original statement. */
4701 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4702 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4703
4704 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4705 *type_out = vectype;
4706 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4707
4708 return pattern_stmt;
4709 }
4710
4711 /* Return true if TYPE is a non-boolean integer type. These are the types
4712 that we want to consider for narrowing. */
4713
4714 static bool
vect_narrowable_type_p(tree type)4715 vect_narrowable_type_p (tree type)
4716 {
4717 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4718 }
4719
4720 /* Return true if the operation given by CODE can be truncated to N bits
4721 when only N bits of the output are needed. This is only true if bit N+1
4722 of the inputs has no effect on the low N bits of the result. */
4723
4724 static bool
vect_truncatable_operation_p(tree_code code)4725 vect_truncatable_operation_p (tree_code code)
4726 {
4727 switch (code)
4728 {
4729 case PLUS_EXPR:
4730 case MINUS_EXPR:
4731 case MULT_EXPR:
4732 case BIT_AND_EXPR:
4733 case BIT_IOR_EXPR:
4734 case BIT_XOR_EXPR:
4735 case COND_EXPR:
4736 return true;
4737
4738 default:
4739 return false;
4740 }
4741 }
4742
4743 /* Record that STMT_INFO could be changed from operating on TYPE to
4744 operating on a type with the precision and sign given by PRECISION
4745 and SIGN respectively. PRECISION is an arbitrary bit precision;
4746 it might not be a whole number of bytes. */
4747
4748 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4749 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4750 unsigned int precision, signop sign)
4751 {
4752 /* Round the precision up to a whole number of bytes. */
4753 precision = vect_element_precision (precision);
4754 if (precision < TYPE_PRECISION (type)
4755 && (!stmt_info->operation_precision
4756 || stmt_info->operation_precision > precision))
4757 {
4758 stmt_info->operation_precision = precision;
4759 stmt_info->operation_sign = sign;
4760 }
4761 }
4762
4763 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4764 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4765 is an arbitrary bit precision; it might not be a whole number of bytes. */
4766
4767 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4768 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4769 unsigned int min_input_precision)
4770 {
4771 /* This operation in isolation only requires the inputs to have
4772 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4773 that MIN_INPUT_PRECISION is a natural precision for the chain
4774 as a whole. E.g. consider something like:
4775
4776 unsigned short *x, *y;
4777 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4778
4779 The right shift can be done on unsigned chars, and only requires the
4780 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4781 approach would mean turning a natural chain of single-vector unsigned
4782 short operations into one that truncates "*x" and then extends
4783 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4784 operation and one vector for each unsigned char operation.
4785 This would be a significant pessimization.
4786
4787 Instead only propagate the maximum of this precision and the precision
4788 required by the users of the result. This means that we don't pessimize
4789 the case above but continue to optimize things like:
4790
4791 unsigned char *y;
4792 unsigned short *x;
4793 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4794
4795 Here we would truncate two vectors of *x to a single vector of
4796 unsigned chars and use single-vector unsigned char operations for
4797 everything else, rather than doing two unsigned short copies of
4798 "(*x & 0xf0) >> 4" and then truncating the result. */
4799 min_input_precision = MAX (min_input_precision,
4800 stmt_info->min_output_precision);
4801
4802 if (min_input_precision < TYPE_PRECISION (type)
4803 && (!stmt_info->min_input_precision
4804 || stmt_info->min_input_precision > min_input_precision))
4805 stmt_info->min_input_precision = min_input_precision;
4806 }
4807
4808 /* Subroutine of vect_determine_min_output_precision. Return true if
4809 we can calculate a reduced number of output bits for STMT_INFO,
4810 whose result is LHS. */
4811
4812 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)4813 vect_determine_min_output_precision_1 (vec_info *vinfo,
4814 stmt_vec_info stmt_info, tree lhs)
4815 {
4816 /* Take the maximum precision required by users of the result. */
4817 unsigned int precision = 0;
4818 imm_use_iterator iter;
4819 use_operand_p use;
4820 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4821 {
4822 gimple *use_stmt = USE_STMT (use);
4823 if (is_gimple_debug (use_stmt))
4824 continue;
4825 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4826 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4827 return false;
4828 /* The input precision recorded for COND_EXPRs applies only to the
4829 "then" and "else" values. */
4830 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4831 if (assign
4832 && gimple_assign_rhs_code (assign) == COND_EXPR
4833 && use->use != gimple_assign_rhs2_ptr (assign)
4834 && use->use != gimple_assign_rhs3_ptr (assign))
4835 return false;
4836 precision = MAX (precision, use_stmt_info->min_input_precision);
4837 }
4838
4839 if (dump_enabled_p ())
4840 dump_printf_loc (MSG_NOTE, vect_location,
4841 "only the low %d bits of %T are significant\n",
4842 precision, lhs);
4843 stmt_info->min_output_precision = precision;
4844 return true;
4845 }
4846
4847 /* Calculate min_output_precision for STMT_INFO. */
4848
4849 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)4850 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4851 {
4852 /* We're only interested in statements with a narrowable result. */
4853 tree lhs = gimple_get_lhs (stmt_info->stmt);
4854 if (!lhs
4855 || TREE_CODE (lhs) != SSA_NAME
4856 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4857 return;
4858
4859 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4860 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4861 }
4862
4863 /* Use range information to decide whether STMT (described by STMT_INFO)
4864 could be done in a narrower type. This is effectively a forward
4865 propagation, since it uses context-independent information that applies
4866 to all users of an SSA name. */
4867
4868 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4869 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4870 {
4871 tree lhs = gimple_assign_lhs (stmt);
4872 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4873 return;
4874
4875 tree type = TREE_TYPE (lhs);
4876 if (!vect_narrowable_type_p (type))
4877 return;
4878
4879 /* First see whether we have any useful range information for the result. */
4880 unsigned int precision = TYPE_PRECISION (type);
4881 signop sign = TYPE_SIGN (type);
4882 wide_int min_value, max_value;
4883 if (!vect_get_range_info (lhs, &min_value, &max_value))
4884 return;
4885
4886 tree_code code = gimple_assign_rhs_code (stmt);
4887 unsigned int nops = gimple_num_ops (stmt);
4888
4889 if (!vect_truncatable_operation_p (code))
4890 /* Check that all relevant input operands are compatible, and update
4891 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4892 for (unsigned int i = 1; i < nops; ++i)
4893 {
4894 tree op = gimple_op (stmt, i);
4895 if (TREE_CODE (op) == INTEGER_CST)
4896 {
4897 /* Don't require the integer to have RHS_TYPE (which it might
4898 not for things like shift amounts, etc.), but do require it
4899 to fit the type. */
4900 if (!int_fits_type_p (op, type))
4901 return;
4902
4903 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4904 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4905 }
4906 else if (TREE_CODE (op) == SSA_NAME)
4907 {
4908 /* Ignore codes that don't take uniform arguments. */
4909 if (!types_compatible_p (TREE_TYPE (op), type))
4910 return;
4911
4912 wide_int op_min_value, op_max_value;
4913 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4914 return;
4915
4916 min_value = wi::min (min_value, op_min_value, sign);
4917 max_value = wi::max (max_value, op_max_value, sign);
4918 }
4919 else
4920 return;
4921 }
4922
4923 /* Try to switch signed types for unsigned types if we can.
4924 This is better for two reasons. First, unsigned ops tend
4925 to be cheaper than signed ops. Second, it means that we can
4926 handle things like:
4927
4928 signed char c;
4929 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4930
4931 as:
4932
4933 signed char c;
4934 unsigned short res_1 = (unsigned short) c & 0xff00;
4935 int res = (int) res_1;
4936
4937 where the intermediate result res_1 has unsigned rather than
4938 signed type. */
4939 if (sign == SIGNED && !wi::neg_p (min_value))
4940 sign = UNSIGNED;
4941
4942 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4943 unsigned int precision1 = wi::min_precision (min_value, sign);
4944 unsigned int precision2 = wi::min_precision (max_value, sign);
4945 unsigned int value_precision = MAX (precision1, precision2);
4946 if (value_precision >= precision)
4947 return;
4948
4949 if (dump_enabled_p ())
4950 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4951 " without loss of precision: %G",
4952 sign == SIGNED ? "signed" : "unsigned",
4953 value_precision, stmt);
4954
4955 vect_set_operation_type (stmt_info, type, value_precision, sign);
4956 vect_set_min_input_precision (stmt_info, type, value_precision);
4957 }
4958
4959 /* Use information about the users of STMT's result to decide whether
4960 STMT (described by STMT_INFO) could be done in a narrower type.
4961 This is effectively a backward propagation. */
4962
4963 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4964 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4965 {
4966 tree_code code = gimple_assign_rhs_code (stmt);
4967 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4968 tree type = TREE_TYPE (gimple_op (stmt, opno));
4969 if (!vect_narrowable_type_p (type))
4970 return;
4971
4972 unsigned int precision = TYPE_PRECISION (type);
4973 unsigned int operation_precision, min_input_precision;
4974 switch (code)
4975 {
4976 CASE_CONVERT:
4977 /* Only the bits that contribute to the output matter. Don't change
4978 the precision of the operation itself. */
4979 operation_precision = precision;
4980 min_input_precision = stmt_info->min_output_precision;
4981 break;
4982
4983 case LSHIFT_EXPR:
4984 case RSHIFT_EXPR:
4985 {
4986 tree shift = gimple_assign_rhs2 (stmt);
4987 if (TREE_CODE (shift) != INTEGER_CST
4988 || !wi::ltu_p (wi::to_widest (shift), precision))
4989 return;
4990 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4991 if (code == LSHIFT_EXPR)
4992 {
4993 /* Avoid creating an undefined shift.
4994
4995 ??? We could instead use min_output_precision as-is and
4996 optimize out-of-range shifts to zero. However, only
4997 degenerate testcases shift away all their useful input data,
4998 and it isn't natural to drop input operations in the middle
4999 of vectorization. This sort of thing should really be
5000 handled before vectorization. */
5001 operation_precision = MAX (stmt_info->min_output_precision,
5002 const_shift + 1);
5003 /* We need CONST_SHIFT fewer bits of the input. */
5004 min_input_precision = (MAX (operation_precision, const_shift)
5005 - const_shift);
5006 }
5007 else
5008 {
5009 /* We need CONST_SHIFT extra bits to do the operation. */
5010 operation_precision = (stmt_info->min_output_precision
5011 + const_shift);
5012 min_input_precision = operation_precision;
5013 }
5014 break;
5015 }
5016
5017 default:
5018 if (vect_truncatable_operation_p (code))
5019 {
5020 /* Input bit N has no effect on output bits N-1 and lower. */
5021 operation_precision = stmt_info->min_output_precision;
5022 min_input_precision = operation_precision;
5023 break;
5024 }
5025 return;
5026 }
5027
5028 if (operation_precision < precision)
5029 {
5030 if (dump_enabled_p ())
5031 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5032 " without affecting users: %G",
5033 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5034 operation_precision, stmt);
5035 vect_set_operation_type (stmt_info, type, operation_precision,
5036 TYPE_SIGN (type));
5037 }
5038 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5039 }
5040
5041 /* Return true if the statement described by STMT_INFO sets a boolean
5042 SSA_NAME and if we know how to vectorize this kind of statement using
5043 vector mask types. */
5044
5045 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5046 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5047 {
5048 tree lhs = gimple_get_lhs (stmt_info->stmt);
5049 if (!lhs
5050 || TREE_CODE (lhs) != SSA_NAME
5051 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5052 return false;
5053
5054 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5055 {
5056 tree_code rhs_code = gimple_assign_rhs_code (assign);
5057 switch (rhs_code)
5058 {
5059 CASE_CONVERT:
5060 case SSA_NAME:
5061 case BIT_NOT_EXPR:
5062 case BIT_IOR_EXPR:
5063 case BIT_XOR_EXPR:
5064 case BIT_AND_EXPR:
5065 return true;
5066
5067 default:
5068 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5069 }
5070 }
5071 else if (is_a <gphi *> (stmt_info->stmt))
5072 return true;
5073 return false;
5074 }
5075
5076 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5077 a vector mask type instead of a normal vector type. Record the
5078 result in STMT_INFO->mask_precision. */
5079
5080 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5081 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5082 {
5083 if (!possible_vector_mask_operation_p (stmt_info))
5084 return;
5085
5086 /* If at least one boolean input uses a vector mask type,
5087 pick the mask type with the narrowest elements.
5088
5089 ??? This is the traditional behavior. It should always produce
5090 the smallest number of operations, but isn't necessarily the
5091 optimal choice. For example, if we have:
5092
5093 a = b & c
5094
5095 where:
5096
5097 - the user of a wants it to have a mask type for 16-bit elements (M16)
5098 - b also uses M16
5099 - c uses a mask type for 8-bit elements (M8)
5100
5101 then picking M8 gives:
5102
5103 - 1 M16->M8 pack for b
5104 - 1 M8 AND for a
5105 - 2 M8->M16 unpacks for the user of a
5106
5107 whereas picking M16 would have given:
5108
5109 - 2 M8->M16 unpacks for c
5110 - 2 M16 ANDs for a
5111
5112 The number of operations are equal, but M16 would have given
5113 a shorter dependency chain and allowed more ILP. */
5114 unsigned int precision = ~0U;
5115 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5116 {
5117 unsigned int nops = gimple_num_ops (assign);
5118 for (unsigned int i = 1; i < nops; ++i)
5119 {
5120 tree rhs = gimple_op (assign, i);
5121 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5122 continue;
5123
5124 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5125 if (!def_stmt_info)
5126 /* Don't let external or constant operands influence the choice.
5127 We can convert them to whichever vector type we pick. */
5128 continue;
5129
5130 if (def_stmt_info->mask_precision)
5131 {
5132 if (precision > def_stmt_info->mask_precision)
5133 precision = def_stmt_info->mask_precision;
5134 }
5135 }
5136
5137 /* If the statement compares two values that shouldn't use vector masks,
5138 try comparing the values as normal scalars instead. */
5139 tree_code rhs_code = gimple_assign_rhs_code (assign);
5140 if (precision == ~0U
5141 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5142 {
5143 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5144 scalar_mode mode;
5145 tree vectype, mask_type;
5146 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5147 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5148 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5149 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5150 precision = GET_MODE_BITSIZE (mode);
5151 }
5152 }
5153 else
5154 {
5155 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5156 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5157 {
5158 tree rhs = gimple_phi_arg_def (phi, i);
5159
5160 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5161 if (!def_stmt_info)
5162 /* Don't let external or constant operands influence the choice.
5163 We can convert them to whichever vector type we pick. */
5164 continue;
5165
5166 if (def_stmt_info->mask_precision)
5167 {
5168 if (precision > def_stmt_info->mask_precision)
5169 precision = def_stmt_info->mask_precision;
5170 }
5171 }
5172 }
5173
5174 if (dump_enabled_p ())
5175 {
5176 if (precision == ~0U)
5177 dump_printf_loc (MSG_NOTE, vect_location,
5178 "using normal nonmask vectors for %G",
5179 stmt_info->stmt);
5180 else
5181 dump_printf_loc (MSG_NOTE, vect_location,
5182 "using boolean precision %d for %G",
5183 precision, stmt_info->stmt);
5184 }
5185
5186 stmt_info->mask_precision = precision;
5187 }
5188
5189 /* Handle vect_determine_precisions for STMT_INFO, given that we
5190 have already done so for the users of its result. */
5191
5192 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5193 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5194 {
5195 vect_determine_min_output_precision (vinfo, stmt_info);
5196 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5197 {
5198 vect_determine_precisions_from_range (stmt_info, stmt);
5199 vect_determine_precisions_from_users (stmt_info, stmt);
5200 }
5201 }
5202
5203 /* Walk backwards through the vectorizable region to determine the
5204 values of these fields:
5205
5206 - min_output_precision
5207 - min_input_precision
5208 - operation_precision
5209 - operation_sign. */
5210
5211 void
vect_determine_precisions(vec_info * vinfo)5212 vect_determine_precisions (vec_info *vinfo)
5213 {
5214 DUMP_VECT_SCOPE ("vect_determine_precisions");
5215
5216 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5217 {
5218 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5219 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5220 unsigned int nbbs = loop->num_nodes;
5221
5222 for (unsigned int i = 0; i < nbbs; i++)
5223 {
5224 basic_block bb = bbs[i];
5225 for (auto gsi = gsi_start_phis (bb);
5226 !gsi_end_p (gsi); gsi_next (&gsi))
5227 {
5228 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5229 if (stmt_info)
5230 vect_determine_mask_precision (vinfo, stmt_info);
5231 }
5232 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5233 if (!is_gimple_debug (gsi_stmt (si)))
5234 vect_determine_mask_precision
5235 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5236 }
5237 for (unsigned int i = 0; i < nbbs; i++)
5238 {
5239 basic_block bb = bbs[nbbs - i - 1];
5240 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5241 !gsi_end_p (si); gsi_prev (&si))
5242 if (!is_gimple_debug (gsi_stmt (si)))
5243 vect_determine_stmt_precisions
5244 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5245 for (auto gsi = gsi_start_phis (bb);
5246 !gsi_end_p (gsi); gsi_next (&gsi))
5247 {
5248 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5249 if (stmt_info)
5250 vect_determine_stmt_precisions (vinfo, stmt_info);
5251 }
5252 }
5253 }
5254 else
5255 {
5256 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5257 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5258 {
5259 basic_block bb = bb_vinfo->bbs[i];
5260 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5261 {
5262 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5263 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5264 vect_determine_mask_precision (vinfo, stmt_info);
5265 }
5266 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5267 {
5268 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5269 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5270 vect_determine_mask_precision (vinfo, stmt_info);
5271 }
5272 }
5273 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5274 {
5275 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5276 !gsi_end_p (gsi); gsi_prev (&gsi))
5277 {
5278 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5279 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5280 vect_determine_stmt_precisions (vinfo, stmt_info);
5281 }
5282 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5283 !gsi_end_p (gsi); gsi_next (&gsi))
5284 {
5285 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5286 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5287 vect_determine_stmt_precisions (vinfo, stmt_info);
5288 }
5289 }
5290 }
5291 }
5292
5293 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5294
5295 struct vect_recog_func
5296 {
5297 vect_recog_func_ptr fn;
5298 const char *name;
5299 };
5300
5301 /* Note that ordering matters - the first pattern matching on a stmt is
5302 taken which means usually the more complex one needs to preceed the
5303 less comples onex (widen_sum only after dot_prod or sad for example). */
5304 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5305 { vect_recog_over_widening_pattern, "over_widening" },
5306 /* Must come after over_widening, which narrows the shift as much as
5307 possible beforehand. */
5308 { vect_recog_average_pattern, "average" },
5309 { vect_recog_mulhs_pattern, "mult_high" },
5310 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5311 { vect_recog_widen_mult_pattern, "widen_mult" },
5312 { vect_recog_dot_prod_pattern, "dot_prod" },
5313 { vect_recog_sad_pattern, "sad" },
5314 { vect_recog_widen_sum_pattern, "widen_sum" },
5315 { vect_recog_pow_pattern, "pow" },
5316 { vect_recog_widen_shift_pattern, "widen_shift" },
5317 { vect_recog_rotate_pattern, "rotate" },
5318 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5319 { vect_recog_divmod_pattern, "divmod" },
5320 { vect_recog_mult_pattern, "mult" },
5321 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5322 { vect_recog_bool_pattern, "bool" },
5323 /* This must come before mask conversion, and includes the parts
5324 of mask conversion that are needed for gather and scatter
5325 internal functions. */
5326 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5327 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5328 { vect_recog_widen_plus_pattern, "widen_plus" },
5329 { vect_recog_widen_minus_pattern, "widen_minus" },
5330 };
5331
5332 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5333
5334 /* Mark statements that are involved in a pattern. */
5335
5336 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5337 vect_mark_pattern_stmts (vec_info *vinfo,
5338 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5339 tree pattern_vectype)
5340 {
5341 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5342 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5343
5344 gimple *orig_pattern_stmt = NULL;
5345 if (is_pattern_stmt_p (orig_stmt_info))
5346 {
5347 /* We're replacing a statement in an existing pattern definition
5348 sequence. */
5349 orig_pattern_stmt = orig_stmt_info->stmt;
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_NOTE, vect_location,
5352 "replacing earlier pattern %G", orig_pattern_stmt);
5353
5354 /* To keep the book-keeping simple, just swap the lhs of the
5355 old and new statements, so that the old one has a valid but
5356 unused lhs. */
5357 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5358 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5359 gimple_set_lhs (pattern_stmt, old_lhs);
5360
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5363
5364 /* Switch to the statement that ORIG replaces. */
5365 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5366
5367 /* We shouldn't be replacing the main pattern statement. */
5368 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5369 != orig_pattern_stmt);
5370 }
5371
5372 if (def_seq)
5373 for (gimple_stmt_iterator si = gsi_start (def_seq);
5374 !gsi_end_p (si); gsi_next (&si))
5375 {
5376 if (dump_enabled_p ())
5377 dump_printf_loc (MSG_NOTE, vect_location,
5378 "extra pattern stmt: %G", gsi_stmt (si));
5379 stmt_vec_info pattern_stmt_info
5380 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5381 orig_stmt_info, pattern_vectype);
5382 /* Stmts in the def sequence are not vectorizable cycle or
5383 induction defs, instead they should all be vect_internal_def
5384 feeding the main pattern stmt which retains this def type. */
5385 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5386 }
5387
5388 if (orig_pattern_stmt)
5389 {
5390 vect_init_pattern_stmt (vinfo, pattern_stmt,
5391 orig_stmt_info, pattern_vectype);
5392
5393 /* Insert all the new pattern statements before the original one. */
5394 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5395 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5396 orig_def_seq);
5397 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5398 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5399
5400 /* Remove the pattern statement that this new pattern replaces. */
5401 gsi_remove (&gsi, false);
5402 }
5403 else
5404 vect_set_pattern_stmt (vinfo,
5405 pattern_stmt, orig_stmt_info, pattern_vectype);
5406
5407 /* Transfer reduction path info to the pattern. */
5408 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5409 {
5410 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5411 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5412 /* Search the pattern def sequence and the main pattern stmt. Note
5413 we may have inserted all into a containing pattern def sequence
5414 so the following is a bit awkward. */
5415 gimple_stmt_iterator si;
5416 gimple *s;
5417 if (def_seq)
5418 {
5419 si = gsi_start (def_seq);
5420 s = gsi_stmt (si);
5421 gsi_next (&si);
5422 }
5423 else
5424 {
5425 si = gsi_none ();
5426 s = pattern_stmt;
5427 }
5428 do
5429 {
5430 bool found = false;
5431 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5432 if (gimple_op (s, i) == lookfor)
5433 {
5434 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5435 lookfor = gimple_get_lhs (s);
5436 found = true;
5437 break;
5438 }
5439 if (s == pattern_stmt)
5440 {
5441 if (!found && dump_enabled_p ())
5442 dump_printf_loc (MSG_NOTE, vect_location,
5443 "failed to update reduction index.\n");
5444 break;
5445 }
5446 if (gsi_end_p (si))
5447 s = pattern_stmt;
5448 else
5449 {
5450 s = gsi_stmt (si);
5451 if (s == pattern_stmt)
5452 /* Found the end inside a bigger pattern def seq. */
5453 si = gsi_none ();
5454 else
5455 gsi_next (&si);
5456 }
5457 } while (1);
5458 }
5459 }
5460
5461 /* Function vect_pattern_recog_1
5462
5463 Input:
5464 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5465 computation pattern.
5466 STMT_INFO: A stmt from which the pattern search should start.
5467
5468 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5469 a sequence of statements that has the same functionality and can be
5470 used to replace STMT_INFO. It returns the last statement in the sequence
5471 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5472 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5473 statement, having first checked that the target supports the new operation
5474 in that type.
5475
5476 This function also does some bookkeeping, as explained in the documentation
5477 for vect_recog_pattern. */
5478
5479 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5480 vect_pattern_recog_1 (vec_info *vinfo,
5481 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5482 {
5483 gimple *pattern_stmt;
5484 loop_vec_info loop_vinfo;
5485 tree pattern_vectype;
5486
5487 /* If this statement has already been replaced with pattern statements,
5488 leave the original statement alone, since the first match wins.
5489 Instead try to match against the definition statements that feed
5490 the main pattern statement. */
5491 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5492 {
5493 gimple_stmt_iterator gsi;
5494 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5495 !gsi_end_p (gsi); gsi_next (&gsi))
5496 vect_pattern_recog_1 (vinfo, recog_func,
5497 vinfo->lookup_stmt (gsi_stmt (gsi)));
5498 return;
5499 }
5500
5501 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5502 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5503 if (!pattern_stmt)
5504 {
5505 /* Clear any half-formed pattern definition sequence. */
5506 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5507 return;
5508 }
5509
5510 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5511 gcc_assert (pattern_vectype);
5512
5513 /* Found a vectorizable pattern. */
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_NOTE, vect_location,
5516 "%s pattern recognized: %G",
5517 recog_func->name, pattern_stmt);
5518
5519 /* Mark the stmts that are involved in the pattern. */
5520 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5521
5522 /* Patterns cannot be vectorized using SLP, because they change the order of
5523 computation. */
5524 if (loop_vinfo)
5525 {
5526 unsigned ix, ix2;
5527 stmt_vec_info *elem_ptr;
5528 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5529 elem_ptr, *elem_ptr == stmt_info);
5530 }
5531 }
5532
5533
5534 /* Function vect_pattern_recog
5535
5536 Input:
5537 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5538 computation idioms.
5539
5540 Output - for each computation idiom that is detected we create a new stmt
5541 that provides the same functionality and that can be vectorized. We
5542 also record some information in the struct_stmt_info of the relevant
5543 stmts, as explained below:
5544
5545 At the entry to this function we have the following stmts, with the
5546 following initial value in the STMT_VINFO fields:
5547
5548 stmt in_pattern_p related_stmt vec_stmt
5549 S1: a_i = .... - - -
5550 S2: a_2 = ..use(a_i).. - - -
5551 S3: a_1 = ..use(a_2).. - - -
5552 S4: a_0 = ..use(a_1).. - - -
5553 S5: ... = ..use(a_0).. - - -
5554
5555 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5556 represented by a single stmt. We then:
5557 - create a new stmt S6 equivalent to the pattern (the stmt is not
5558 inserted into the code)
5559 - fill in the STMT_VINFO fields as follows:
5560
5561 in_pattern_p related_stmt vec_stmt
5562 S1: a_i = .... - - -
5563 S2: a_2 = ..use(a_i).. - - -
5564 S3: a_1 = ..use(a_2).. - - -
5565 S4: a_0 = ..use(a_1).. true S6 -
5566 '---> S6: a_new = .... - S4 -
5567 S5: ... = ..use(a_0).. - - -
5568
5569 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5570 to each other through the RELATED_STMT field).
5571
5572 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5573 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5574 remain irrelevant unless used by stmts other than S4.
5575
5576 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5577 (because they are marked as irrelevant). It will vectorize S6, and record
5578 a pointer to the new vector stmt VS6 from S6 (as usual).
5579 S4 will be skipped, and S5 will be vectorized as usual:
5580
5581 in_pattern_p related_stmt vec_stmt
5582 S1: a_i = .... - - -
5583 S2: a_2 = ..use(a_i).. - - -
5584 S3: a_1 = ..use(a_2).. - - -
5585 > VS6: va_new = .... - - -
5586 S4: a_0 = ..use(a_1).. true S6 VS6
5587 '---> S6: a_new = .... - S4 VS6
5588 > VS5: ... = ..vuse(va_new).. - - -
5589 S5: ... = ..use(a_0).. - - -
5590
5591 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5592 elsewhere), and we'll end up with:
5593
5594 VS6: va_new = ....
5595 VS5: ... = ..vuse(va_new)..
5596
5597 In case of more than one pattern statements, e.g., widen-mult with
5598 intermediate type:
5599
5600 S1 a_t = ;
5601 S2 a_T = (TYPE) a_t;
5602 '--> S3: a_it = (interm_type) a_t;
5603 S4 prod_T = a_T * CONST;
5604 '--> S5: prod_T' = a_it w* CONST;
5605
5606 there may be other users of a_T outside the pattern. In that case S2 will
5607 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5608 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5609 be recorded in S3. */
5610
5611 void
vect_pattern_recog(vec_info * vinfo)5612 vect_pattern_recog (vec_info *vinfo)
5613 {
5614 class loop *loop;
5615 basic_block *bbs;
5616 unsigned int nbbs;
5617 gimple_stmt_iterator si;
5618 unsigned int i, j;
5619
5620 vect_determine_precisions (vinfo);
5621
5622 DUMP_VECT_SCOPE ("vect_pattern_recog");
5623
5624 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5625 {
5626 loop = LOOP_VINFO_LOOP (loop_vinfo);
5627 bbs = LOOP_VINFO_BBS (loop_vinfo);
5628 nbbs = loop->num_nodes;
5629
5630 /* Scan through the loop stmts, applying the pattern recognition
5631 functions starting at each stmt visited: */
5632 for (i = 0; i < nbbs; i++)
5633 {
5634 basic_block bb = bbs[i];
5635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5636 {
5637 if (is_gimple_debug (gsi_stmt (si)))
5638 continue;
5639 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5640 /* Scan over all generic vect_recog_xxx_pattern functions. */
5641 for (j = 0; j < NUM_PATTERNS; j++)
5642 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5643 stmt_info);
5644 }
5645 }
5646 }
5647 else
5648 {
5649 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5650 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5651 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5652 !gsi_end_p (gsi); gsi_next (&gsi))
5653 {
5654 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5655 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5656 continue;
5657
5658 /* Scan over all generic vect_recog_xxx_pattern functions. */
5659 for (j = 0; j < NUM_PATTERNS; j++)
5660 vect_pattern_recog_1 (vinfo,
5661 &vect_vect_recog_func_ptrs[j], stmt_info);
5662 }
5663 }
5664
5665 /* After this no more add_stmt calls are allowed. */
5666 vinfo->stmt_vec_info_ro = true;
5667 }
5668