1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84 }
85
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
99
100 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 stmt_vec_info orig_stmt_info, tree vectype)
103 {
104 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
106 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
108
109 pattern_stmt_info->pattern_stmt_p = true;
110 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
114 {
115 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 == vect_use_mask_type_p (orig_stmt_info));
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
119 }
120 return pattern_stmt_info;
121 }
122
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 have one already. */
126
127 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 stmt_vec_info orig_stmt_info, tree vectype)
130 {
131 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132 STMT_VINFO_RELATED_STMT (orig_stmt_info)
133 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
134 }
135
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
137 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138 be different from the vector type of the final pattern statement.
139 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140 from which it was derived. */
141
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
147 {
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 if (vectype)
151 {
152 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154 if (scalar_type_for_mask)
155 new_stmt_info->mask_precision
156 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
157 }
158 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 new_stmt);
160 }
161
162 /* The caller wants to perform new operations on vect_external variable
163 VAR, so that the result of the operations would also be vect_external.
164 Return the edge on which the operations can be performed, if one exists.
165 Return null if the operations should instead be treated as part of
166 the pattern that needs them. */
167
168 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)169 vect_get_external_def_edge (vec_info *vinfo, tree var)
170 {
171 edge e = NULL;
172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
173 {
174 e = loop_preheader_edge (loop_vinfo->loop);
175 if (!SSA_NAME_IS_DEFAULT_DEF (var))
176 {
177 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 if (bb == NULL
179 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 e = NULL;
181 }
182 }
183 return e;
184 }
185
186 /* Return true if the target supports a vector version of CODE,
187 where CODE is known to map to a direct optab. ITYPE specifies
188 the type of (some of) the scalar inputs and OTYPE specifies the
189 type of the scalar result.
190
191 If CODE allows the inputs and outputs to have different type
192 (such as for WIDEN_SUM_EXPR), it is the input mode rather
193 than the output mode that determines the appropriate target pattern.
194 Operand 0 of the target pattern then specifies the mode that the output
195 must have.
196
197 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199 is nonnull. */
200
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 tree itype, tree *vecotype_out,
204 tree *vecitype_out = NULL)
205 {
206 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207 if (!vecitype)
208 return false;
209
210 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211 if (!vecotype)
212 return false;
213
214 optab optab = optab_for_tree_code (code, vecitype, optab_default);
215 if (!optab)
216 return false;
217
218 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219 if (icode == CODE_FOR_nothing
220 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221 return false;
222
223 *vecotype_out = vecotype;
224 if (vecitype_out)
225 *vecitype_out = vecitype;
226 return true;
227 }
228
229 /* Round bit precision PRECISION up to a full element. */
230
231 static unsigned int
vect_element_precision(unsigned int precision)232 vect_element_precision (unsigned int precision)
233 {
234 precision = 1 << ceil_log2 (precision);
235 return MAX (precision, BITS_PER_UNIT);
236 }
237
238 /* If OP is defined by a statement that's being considered for vectorization,
239 return information about that statement, otherwise return NULL. */
240
241 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)242 vect_get_internal_def (vec_info *vinfo, tree op)
243 {
244 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245 if (def_stmt_info
246 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247 return def_stmt_info;
248 return NULL;
249 }
250
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252 is a result of a type promotion, such that:
253 DEF_STMT: NAME = NOP (name0)
254 If CHECK_SIGN is TRUE, check that either both types are signed or both are
255 unsigned. */
256
257 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 tree *orig_type, gimple **def_stmt, bool *promotion)
260 {
261 tree type = TREE_TYPE (name);
262 tree oprnd0;
263 enum vect_def_type dt;
264
265 stmt_vec_info def_stmt_info;
266 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267 return false;
268
269 if (dt != vect_internal_def
270 && dt != vect_external_def && dt != vect_constant_def)
271 return false;
272
273 if (!*def_stmt)
274 return false;
275
276 if (!is_gimple_assign (*def_stmt))
277 return false;
278
279 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280 return false;
281
282 oprnd0 = gimple_assign_rhs1 (*def_stmt);
283
284 *orig_type = TREE_TYPE (oprnd0);
285 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287 return false;
288
289 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290 *promotion = true;
291 else
292 *promotion = false;
293
294 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295 return false;
296
297 return true;
298 }
299
300 /* Holds information about an input operand after some sign changes
301 and type promotions have been peeled away. */
302 class vect_unpromoted_value {
303 public:
304 vect_unpromoted_value ();
305
306 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
307
308 /* The value obtained after peeling away zero or more casts. */
309 tree op;
310
311 /* The type of OP. */
312 tree type;
313
314 /* The definition type of OP. */
315 vect_def_type dt;
316
317 /* If OP is the result of peeling at least one cast, and if the cast
318 of OP itself is a vectorizable statement, CASTER identifies that
319 statement, otherwise it is null. */
320 stmt_vec_info caster;
321 };
322
vect_unpromoted_value()323 inline vect_unpromoted_value::vect_unpromoted_value ()
324 : op (NULL_TREE),
325 type (NULL_TREE),
326 dt (vect_uninitialized_def),
327 caster (NULL)
328 {
329 }
330
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332 statement that casts it to CASTER_IN. */
333
334 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 stmt_vec_info caster_in)
337 {
338 op = op_in;
339 type = TREE_TYPE (op);
340 dt = dt_in;
341 caster = caster_in;
342 }
343
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345 to reach some vectorizable inner operand OP', continuing as long as it
346 is possible to convert OP' back to OP using a possible sign change
347 followed by a possible promotion P. Return this OP', or null if OP is
348 not a vectorizable SSA name. If there is a promotion P, describe its
349 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
350 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351 have more than one user.
352
353 A successful return means that it is possible to go from OP' to OP
354 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
355 whereas the cast from UNPROM to OP might be a promotion, a sign
356 change, or a nop.
357
358 E.g. say we have:
359
360 signed short *ptr = ...;
361 signed short C = *ptr;
362 unsigned short B = (unsigned short) C; // sign change
363 signed int A = (signed int) B; // unsigned promotion
364 ...possible other uses of A...
365 unsigned int OP = (unsigned int) A; // sign change
366
367 In this case it's possible to go directly from C to OP using:
368
369 OP = (unsigned int) (unsigned short) C;
370 +------------+ +--------------+
371 promotion sign change
372
373 so OP' would be C. The input to the promotion is B, so UNPROM
374 would describe B. */
375
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 vect_unpromoted_value *unprom,
379 bool *single_use_p = NULL)
380 {
381 tree res = NULL_TREE;
382 tree op_type = TREE_TYPE (op);
383 unsigned int orig_precision = TYPE_PRECISION (op_type);
384 unsigned int min_precision = orig_precision;
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
387 {
388 /* See whether OP is simple enough to vectorize. */
389 stmt_vec_info def_stmt_info;
390 gimple *def_stmt;
391 vect_def_type dt;
392 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 break;
394
395 /* If OP is the input of a demotion, skip over it to see whether
396 OP is itself the result of a promotion. If so, the combined
397 effect of the promotion and the demotion might fit the required
398 pattern, otherwise neither operation fits.
399
400 This copes with cases such as the result of an arithmetic
401 operation being truncated before being stored, and where that
402 arithmetic operation has been recognized as an over-widened one. */
403 if (TYPE_PRECISION (op_type) <= min_precision)
404 {
405 /* Use OP as the UNPROM described above if we haven't yet
406 found a promotion, or if using the new input preserves the
407 sign of the previous promotion. */
408 if (!res
409 || TYPE_PRECISION (unprom->type) == orig_precision
410 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
411 {
412 unprom->set_op (op, dt, caster);
413 min_precision = TYPE_PRECISION (op_type);
414 }
415 /* Stop if we've already seen a promotion and if this
416 conversion does more than change the sign. */
417 else if (TYPE_PRECISION (op_type)
418 != TYPE_PRECISION (unprom->type))
419 break;
420
421 /* The sequence now extends to OP. */
422 res = op;
423 }
424
425 /* See whether OP is defined by a cast. Record it as CASTER if
426 the cast is potentially vectorizable. */
427 if (!def_stmt)
428 break;
429 caster = def_stmt_info;
430
431 /* Ignore pattern statements, since we don't link uses for them. */
432 if (caster
433 && single_use_p
434 && !STMT_VINFO_RELATED_STMT (caster)
435 && !has_single_use (res))
436 *single_use_p = false;
437
438 gassign *assign = dyn_cast <gassign *> (def_stmt);
439 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 break;
441
442 /* Continue with the input to the cast. */
443 op = gimple_assign_rhs1 (def_stmt);
444 op_type = TREE_TYPE (op);
445 }
446 return res;
447 }
448
449 /* OP is an integer operand to an operation that returns TYPE, and we
450 want to treat the operation as a widening one. So far we can treat
451 it as widening from *COMMON_TYPE.
452
453 Return true if OP is suitable for such a widening operation,
454 either widening from *COMMON_TYPE or from some supertype of it.
455 Update *COMMON_TYPE to the supertype in the latter case.
456
457 SHIFT_P is true if OP is a shift amount. */
458
459 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 tree *common_type)
462 {
463 /* Calculate the minimum precision required by OP, without changing
464 the sign of either operand. */
465 unsigned int precision;
466 if (shift_p)
467 {
468 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 return false;
470 precision = TREE_INT_CST_LOW (op);
471 }
472 else
473 {
474 precision = wi::min_precision (wi::to_widest (op),
475 TYPE_SIGN (*common_type));
476 if (precision * 2 > TYPE_PRECISION (type))
477 return false;
478 }
479
480 /* If OP requires a wider type, switch to that type. The checks
481 above ensure that this is still narrower than the result. */
482 precision = vect_element_precision (precision);
483 if (TYPE_PRECISION (*common_type) < precision)
484 *common_type = build_nonstandard_integer_type
485 (precision, TYPE_UNSIGNED (*common_type));
486 return true;
487 }
488
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
491
492 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
494 {
495 if (types_compatible_p (*common_type, new_type))
496 return true;
497
498 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
499 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501 return true;
502
503 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
504 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
506 {
507 *common_type = new_type;
508 return true;
509 }
510
511 /* We have mismatched signs, with the signed type being
512 no wider than the unsigned type. In this case we need
513 a wider signed type. */
514 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 TYPE_PRECISION (new_type));
516 precision *= 2;
517 if (precision * 2 > TYPE_PRECISION (type))
518 return false;
519
520 *common_type = build_nonstandard_integer_type (precision, false);
521 return true;
522 }
523
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525 in which each node either performs CODE or WIDENED_CODE, and where
526 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
527 specifies the maximum number of leaf operands. SHIFT_P says whether
528 CODE and WIDENED_CODE are some sort of shift.
529
530 If STMT_INFO is such a tree, return the number of leaf operands
531 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
532 to a type that (a) is narrower than the result of STMT_INFO and
533 (b) can hold all leaf operand values.
534
535 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536 exists. */
537
538 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 tree_code widened_code, bool shift_p,
541 unsigned int max_nops,
542 vect_unpromoted_value *unprom, tree *common_type)
543 {
544 /* Check for an integer operation with the right code. */
545 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546 if (!assign)
547 return 0;
548
549 tree_code rhs_code = gimple_assign_rhs_code (assign);
550 if (rhs_code != code && rhs_code != widened_code)
551 return 0;
552
553 tree type = gimple_expr_type (assign);
554 if (!INTEGRAL_TYPE_P (type))
555 return 0;
556
557 /* Assume that both operands will be leaf operands. */
558 max_nops -= 2;
559
560 /* Check the operands. */
561 unsigned int next_op = 0;
562 for (unsigned int i = 0; i < 2; ++i)
563 {
564 vect_unpromoted_value *this_unprom = &unprom[next_op];
565 unsigned int nops = 1;
566 tree op = gimple_op (assign, i + 1);
567 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
568 {
569 /* We already have a common type from earlier operands.
570 Update it to account for OP. */
571 this_unprom->set_op (op, vect_constant_def);
572 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 return 0;
574 }
575 else
576 {
577 /* Only allow shifts by constants. */
578 if (shift_p && i == 1)
579 return 0;
580
581 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 return 0;
583
584 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
585 {
586 /* The operand isn't widened. If STMT_INFO has the code
587 for an unwidened operation, recursively check whether
588 this operand is a node of the tree. */
589 if (rhs_code != code
590 || max_nops == 0
591 || this_unprom->dt != vect_internal_def)
592 return 0;
593
594 /* Give back the leaf slot allocated above now that we're
595 not treating this as a leaf operand. */
596 max_nops += 1;
597
598 /* Recursively process the definition of the operand. */
599 stmt_vec_info def_stmt_info
600 = vinfo->lookup_def (this_unprom->op);
601 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 widened_code, shift_p, max_nops,
603 this_unprom, common_type);
604 if (nops == 0)
605 return 0;
606
607 max_nops -= nops;
608 }
609 else
610 {
611 /* Make sure that the operand is narrower than the result. */
612 if (TYPE_PRECISION (this_unprom->type) * 2
613 > TYPE_PRECISION (type))
614 return 0;
615
616 /* Update COMMON_TYPE for the new operand. */
617 if (i == 0)
618 *common_type = this_unprom->type;
619 else if (!vect_joust_widened_type (type, this_unprom->type,
620 common_type))
621 return 0;
622 }
623 }
624 next_op += nops;
625 }
626 return next_op;
627 }
628
629 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
630 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
631
632 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
634 {
635 return make_temp_ssa_name (type, stmt, "patt");
636 }
637
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639 followed by a version of STMT2_INFO that takes NEW_RHS as its first
640 input. Try to do this using pattern statements, returning true on
641 success. */
642
643 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 gimple *stmt1, tree vectype)
646 {
647 if (is_pattern_stmt_p (stmt2_info))
648 {
649 /* STMT2_INFO is part of a pattern. Get the statement to which
650 the pattern is attached. */
651 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
653
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "Splitting pattern statement: %G", stmt2_info->stmt);
657
658 /* Since STMT2_INFO is a pattern statement, we can change it
659 in-situ without worrying about changing the code for the
660 containing block. */
661 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
662
663 if (dump_enabled_p ())
664 {
665 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 stmt2_info->stmt);
668 }
669
670 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 /* STMT2_INFO is the actual pattern statement. Add STMT1
673 to the end of the definition sequence. */
674 gimple_seq_add_stmt_without_update (def_seq, stmt1);
675 else
676 {
677 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
678 before it. */
679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
681 }
682 return true;
683 }
684 else
685 {
686 /* STMT2_INFO doesn't yet have a pattern. Try to create a
687 two-statement pattern now. */
688 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691 if (!lhs_vectype)
692 return false;
693
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE, vect_location,
696 "Splitting statement: %G", stmt2_info->stmt);
697
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
702
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
707
708 if (dump_enabled_p ())
709 {
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: %G", stmt1);
712 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
713 }
714
715 return true;
716 }
717 }
718
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720 to STMT_INFO's pattern definition statements if no better way is
721 available. VECTYPE is the vector form of TYPE. */
722
723 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 vect_unpromoted_value *unprom, tree vectype)
726 {
727 /* Check for a no-op conversion. */
728 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729 return unprom->op;
730
731 /* Allow the caller to create constant vect_unpromoted_values. */
732 if (TREE_CODE (unprom->op) == INTEGER_CST)
733 return wide_int_to_tree (type, wi::to_widest (unprom->op));
734
735 tree input = unprom->op;
736 if (unprom->caster)
737 {
738 tree lhs = gimple_get_lhs (unprom->caster->stmt);
739 tree lhs_type = TREE_TYPE (lhs);
740
741 /* If the result of the existing cast is the right width, use it
742 instead of the source of the cast. */
743 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 input = lhs;
745 /* If the precision we want is between the source and result
746 precisions of the existing cast, try splitting the cast into
747 two and tapping into a mid-way point. */
748 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
750 {
751 /* In order to preserve the semantics of the original cast,
752 give the mid-way point the same signedness as the input value.
753
754 It would be possible to use a signed type here instead if
755 TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 make the sign of the midtype sensitive to the order in
757 which we process the statements, since the signedness of
758 TYPE is the signedness required by just one of possibly
759 many users. Also, unsigned promotions are usually as cheap
760 as or cheaper than signed ones, so it's better to keep an
761 unsigned promotion. */
762 tree midtype = build_nonstandard_integer_type
763 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 if (vec_midtype)
766 {
767 input = vect_recog_temp_ssa_var (midtype, NULL);
768 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 unprom->op);
770 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 vec_midtype))
772 append_pattern_def_seq (vinfo, stmt_info,
773 new_stmt, vec_midtype);
774 }
775 }
776
777 /* See if we can reuse an existing result. */
778 if (types_compatible_p (type, TREE_TYPE (input)))
779 return input;
780 }
781
782 /* We need a new conversion statement. */
783 tree new_op = vect_recog_temp_ssa_var (type, NULL);
784 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
785
786 /* If OP is an external value, see if we can insert the new statement
787 on an incoming edge. */
788 if (input == unprom->op && unprom->dt == vect_external_def)
789 if (edge e = vect_get_external_def_edge (vinfo, input))
790 {
791 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 gcc_assert (!new_bb);
793 return new_op;
794 }
795
796 /* As a (common) last resort, add the statement to the pattern itself. */
797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798 return new_op;
799 }
800
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802 result in the corresponding elements of RESULT. */
803
804 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 tree *result, tree type, vect_unpromoted_value *unprom,
807 tree vectype)
808 {
809 for (unsigned int i = 0; i < n; ++i)
810 {
811 unsigned int j;
812 for (j = 0; j < i; ++j)
813 if (unprom[j].op == unprom[i].op)
814 break;
815 if (j < i)
816 result[i] = result[j];
817 else
818 result[i] = vect_convert_input (vinfo, stmt_info,
819 type, &unprom[i], vectype);
820 }
821 }
822
823 /* The caller has created a (possibly empty) sequence of pattern definition
824 statements followed by a single statement PATTERN_STMT. Cast the result
825 of this final statement to TYPE. If a new statement is needed, add
826 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827 and return the new statement, otherwise return PATTERN_STMT as-is.
828 VECITYPE is the vector form of PATTERN_STMT's result type. */
829
830 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 gimple *pattern_stmt, tree vecitype)
833 {
834 tree lhs = gimple_get_lhs (pattern_stmt);
835 if (!types_compatible_p (type, TREE_TYPE (lhs)))
836 {
837 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
840 }
841 return pattern_stmt;
842 }
843
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845 is allowed. If STMT_INFO is part of a group, assume that it's part of
846 a reduction chain and optimistically assume that all statements
847 except the last allow reassociation.
848 Also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
851
852 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)853 vect_reassociating_reduction_p (vec_info *vinfo,
854 stmt_vec_info stmt_info, tree_code code,
855 tree *op0_out, tree *op1_out)
856 {
857 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858 if (!loop_info)
859 return false;
860
861 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862 if (!assign || gimple_assign_rhs_code (assign) != code)
863 return false;
864
865 /* We don't allow changing the order of the computation in the inner-loop
866 when doing outer-loop vectorization. */
867 class loop *loop = LOOP_VINFO_LOOP (loop_info);
868 if (loop && nested_in_vect_loop_p (loop, stmt_info))
869 return false;
870
871 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
872 {
873 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 code))
875 return false;
876 }
877 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878 return false;
879
880 *op0_out = gimple_assign_rhs1 (assign);
881 *op1_out = gimple_assign_rhs2 (assign);
882 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883 std::swap (*op0_out, *op1_out);
884 return true;
885 }
886
887 /* Function vect_recog_dot_prod_pattern
888
889 Try to find the following pattern:
890
891 type x_t, y_t;
892 TYPE1 prod;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 prod = x_T * y_T;
901 [S6 prod = (TYPE2) prod; #optional]
902 S7 sum_1 = prod + sum_0;
903
904 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905 same size of 'TYPE1' or bigger. This is a special case of a reduction
906 computation.
907
908 Input:
909
910 * STMT_VINFO: The stmt from which the pattern search begins. In the
911 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912 will be detected.
913
914 Output:
915
916 * TYPE_OUT: The type of the output of this pattern.
917
918 * Return value: A new stmt that will be used to replace the sequence of
919 stmts that constitute the pattern. In this case it will be:
920 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
921
922 Note: The dot-prod idiom is a widening reduction pattern that is
923 vectorized without preserving all the intermediate results. It
924 produces only N/2 (widened) results (by summing up pairs of
925 intermediate results) rather than all N results. Therefore, we
926 cannot allow this pattern when we want to get all the results and in
927 the correct order (as is the case when this computation is in an
928 inner-loop nested in an outer-loop that us being vectorized). */
929
930 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 stmt_vec_info stmt_vinfo, tree *type_out)
933 {
934 tree oprnd0, oprnd1;
935 gimple *last_stmt = stmt_vinfo->stmt;
936 tree type, half_type;
937 gimple *pattern_stmt;
938 tree var;
939
940 /* Look for the following pattern
941 DX = (TYPE1) X;
942 DY = (TYPE1) Y;
943 DPROD = DX * DY;
944 DDPROD = (TYPE2) DPROD;
945 sum_1 = DDPROD + sum_0;
946 In which
947 - DX is double the size of X
948 - DY is double the size of Y
949 - DX, DY, DPROD all have the same type
950 - sum is the same size of DPROD or bigger
951 - sum has been recognized as a reduction variable.
952
953 This is equivalent to:
954 DPROD = X w* Y; #widen mult
955 sum_1 = DPROD w+ sum_0; #widen summation
956 or
957 DPROD = X w* Y; #widen mult
958 sum_1 = DPROD + sum_0; #summation
959 */
960
961 /* Starting from LAST_STMT, follow the defs of its uses in search
962 of the above pattern. */
963
964 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 &oprnd0, &oprnd1))
966 return NULL;
967
968 type = gimple_expr_type (last_stmt);
969
970 vect_unpromoted_value unprom_mult;
971 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
972
973 /* So far so good. Since last_stmt was detected as a (summation) reduction,
974 we know that oprnd1 is the reduction variable (defined by a loop-header
975 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976 Left to check that oprnd0 is defined by a (widen_)mult_expr */
977 if (!oprnd0)
978 return NULL;
979
980 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981 if (!mult_vinfo)
982 return NULL;
983
984 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
985 inside the loop (in case we are analyzing an outer-loop). */
986 vect_unpromoted_value unprom0[2];
987 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 false, 2, unprom0, &half_type))
989 return NULL;
990
991 /* If there are two widening operations, make sure they agree on
992 the sign of the extension. */
993 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995 return NULL;
996
997 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
998
999 tree half_vectype;
1000 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 type_out, &half_vectype))
1002 return NULL;
1003
1004 /* Get the inputs in the appropriate types. */
1005 tree mult_oprnd[2];
1006 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 unprom0, half_vectype);
1008
1009 var = vect_recog_temp_ssa_var (type, NULL);
1010 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 mult_oprnd[0], mult_oprnd[1], oprnd1);
1012
1013 return pattern_stmt;
1014 }
1015
1016
1017 /* Function vect_recog_sad_pattern
1018
1019 Try to find the following Sum of Absolute Difference (SAD) pattern:
1020
1021 type x_t, y_t;
1022 signed TYPE1 diff, abs_diff;
1023 TYPE2 sum = init;
1024 loop:
1025 sum_0 = phi <init, sum_1>
1026 S1 x_t = ...
1027 S2 y_t = ...
1028 S3 x_T = (TYPE1) x_t;
1029 S4 y_T = (TYPE1) y_t;
1030 S5 diff = x_T - y_T;
1031 S6 abs_diff = ABS_EXPR <diff>;
1032 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1033 S8 sum_1 = abs_diff + sum_0;
1034
1035 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036 same size of 'TYPE1' or bigger. This is a special case of a reduction
1037 computation.
1038
1039 Input:
1040
1041 * STMT_VINFO: The stmt from which the pattern search begins. In the
1042 example, when this function is called with S8, the pattern
1043 {S3,S4,S5,S6,S7,S8} will be detected.
1044
1045 Output:
1046
1047 * TYPE_OUT: The type of the output of this pattern.
1048
1049 * Return value: A new stmt that will be used to replace the sequence of
1050 stmts that constitute the pattern. In this case it will be:
1051 SAD_EXPR <x_t, y_t, sum_0>
1052 */
1053
1054 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 stmt_vec_info stmt_vinfo, tree *type_out)
1057 {
1058 gimple *last_stmt = stmt_vinfo->stmt;
1059 tree half_type;
1060
1061 /* Look for the following pattern
1062 DX = (TYPE1) X;
1063 DY = (TYPE1) Y;
1064 DDIFF = DX - DY;
1065 DAD = ABS_EXPR <DDIFF>;
1066 DDPROD = (TYPE2) DPROD;
1067 sum_1 = DAD + sum_0;
1068 In which
1069 - DX is at least double the size of X
1070 - DY is at least double the size of Y
1071 - DX, DY, DDIFF, DAD all have the same type
1072 - sum is the same size of DAD or bigger
1073 - sum has been recognized as a reduction variable.
1074
1075 This is equivalent to:
1076 DDIFF = X w- Y; #widen sub
1077 DAD = ABS_EXPR <DDIFF>;
1078 sum_1 = DAD w+ sum_0; #widen summation
1079 or
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD + sum_0; #summation
1083 */
1084
1085 /* Starting from LAST_STMT, follow the defs of its uses in search
1086 of the above pattern. */
1087
1088 tree plus_oprnd0, plus_oprnd1;
1089 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 &plus_oprnd0, &plus_oprnd1))
1091 return NULL;
1092
1093 tree sum_type = gimple_expr_type (last_stmt);
1094
1095 /* Any non-truncating sequence of conversions is OK here, since
1096 with a successful match, the result of the ABS(U) is known to fit
1097 within the nonnegative range of the result type. (It cannot be the
1098 negative of the minimum signed value due to the range of the widening
1099 MINUS_EXPR.) */
1100 vect_unpromoted_value unprom_abs;
1101 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 &unprom_abs);
1103
1104 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1105 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107 Then check that plus_oprnd0 is defined by an abs_expr. */
1108
1109 if (!plus_oprnd0)
1110 return NULL;
1111
1112 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113 if (!abs_stmt_vinfo)
1114 return NULL;
1115
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119 if (!abs_stmt
1120 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122 return NULL;
1123
1124 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125 tree abs_type = TREE_TYPE (abs_oprnd);
1126 if (TYPE_UNSIGNED (abs_type))
1127 return NULL;
1128
1129 /* Peel off conversions from the ABS input. This can involve sign
1130 changes (e.g. from an unsigned subtraction to a signed ABS input)
1131 or signed promotion, but it can't include unsigned promotion.
1132 (Note that ABS of an unsigned promotion should have been folded
1133 away before now anyway.) */
1134 vect_unpromoted_value unprom_diff;
1135 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 &unprom_diff);
1137 if (!abs_oprnd)
1138 return NULL;
1139 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140 && TYPE_UNSIGNED (unprom_diff.type))
1141 return NULL;
1142
1143 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1144 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145 if (!diff_stmt_vinfo)
1146 return NULL;
1147
1148 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1149 inside the loop (in case we are analyzing an outer-loop). */
1150 vect_unpromoted_value unprom[2];
1151 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1152 false, 2, unprom, &half_type))
1153 return NULL;
1154
1155 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1156
1157 tree half_vectype;
1158 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 type_out, &half_vectype))
1160 return NULL;
1161
1162 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1163 tree sad_oprnd[2];
1164 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 unprom, half_vectype);
1166
1167 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 sad_oprnd[1], plus_oprnd1);
1170
1171 return pattern_stmt;
1172 }
1173
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175 so that it can be treated as though it had the form:
1176
1177 A_TYPE a;
1178 B_TYPE b;
1179 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1180 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1181 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1182 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1183 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1184
1185 Try to replace the pattern with:
1186
1187 A_TYPE a;
1188 B_TYPE b;
1189 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1190 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1191 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1193
1194 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1195
1196 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1197 name of the pattern being matched, for dump purposes. */
1198
1199 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 stmt_vec_info last_stmt_info, tree *type_out,
1202 tree_code orig_code, tree_code wide_code,
1203 bool shift_p, const char *name)
1204 {
1205 gimple *last_stmt = last_stmt_info->stmt;
1206
1207 vect_unpromoted_value unprom[2];
1208 tree half_type;
1209 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 shift_p, 2, unprom, &half_type))
1211 return NULL;
1212
1213 /* Pattern detected. */
1214 vect_pattern_detected (name, last_stmt);
1215
1216 tree type = gimple_expr_type (last_stmt);
1217 tree itype = type;
1218 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 TYPE_UNSIGNED (half_type));
1222
1223 /* Check target support */
1224 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226 enum tree_code dummy_code;
1227 int dummy_int;
1228 auto_vec<tree> dummy_vec;
1229 if (!vectype
1230 || !vecitype
1231 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1232 vecitype, vectype,
1233 &dummy_code, &dummy_code,
1234 &dummy_int, &dummy_vec))
1235 return NULL;
1236
1237 *type_out = get_vectype_for_scalar_type (vinfo, type);
1238 if (!*type_out)
1239 return NULL;
1240
1241 tree oprnd[2];
1242 vect_convert_inputs (vinfo, last_stmt_info,
1243 2, oprnd, half_type, unprom, vectype);
1244
1245 tree var = vect_recog_temp_ssa_var (itype, NULL);
1246 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1247 oprnd[0], oprnd[1]);
1248
1249 return vect_convert_output (vinfo, last_stmt_info,
1250 type, pattern_stmt, vecitype);
1251 }
1252
1253 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1254 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1255
1256 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1257 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1258 tree *type_out)
1259 {
1260 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1261 MULT_EXPR, WIDEN_MULT_EXPR, false,
1262 "vect_recog_widen_mult_pattern");
1263 }
1264
1265 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1266 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1267
1268 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1269 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1270 tree *type_out)
1271 {
1272 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1273 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1274 "vect_recog_widen_plus_pattern");
1275 }
1276
1277 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1278 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1279 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1280 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1281 tree *type_out)
1282 {
1283 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1284 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1285 "vect_recog_widen_minus_pattern");
1286 }
1287
1288 /* Function vect_recog_pow_pattern
1289
1290 Try to find the following pattern:
1291
1292 x = POW (y, N);
1293
1294 with POW being one of pow, powf, powi, powif and N being
1295 either 2 or 0.5.
1296
1297 Input:
1298
1299 * STMT_VINFO: The stmt from which the pattern search begins.
1300
1301 Output:
1302
1303 * TYPE_OUT: The type of the output of this pattern.
1304
1305 * Return value: A new stmt that will be used to replace the sequence of
1306 stmts that constitute the pattern. In this case it will be:
1307 x = x * x
1308 or
1309 x = sqrt (x)
1310 */
1311
1312 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1313 vect_recog_pow_pattern (vec_info *vinfo,
1314 stmt_vec_info stmt_vinfo, tree *type_out)
1315 {
1316 gimple *last_stmt = stmt_vinfo->stmt;
1317 tree base, exp;
1318 gimple *stmt;
1319 tree var;
1320
1321 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1322 return NULL;
1323
1324 switch (gimple_call_combined_fn (last_stmt))
1325 {
1326 CASE_CFN_POW:
1327 CASE_CFN_POWI:
1328 break;
1329
1330 default:
1331 return NULL;
1332 }
1333
1334 base = gimple_call_arg (last_stmt, 0);
1335 exp = gimple_call_arg (last_stmt, 1);
1336 if (TREE_CODE (exp) != REAL_CST
1337 && TREE_CODE (exp) != INTEGER_CST)
1338 {
1339 if (flag_unsafe_math_optimizations
1340 && TREE_CODE (base) == REAL_CST
1341 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1342 {
1343 combined_fn log_cfn;
1344 built_in_function exp_bfn;
1345 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1346 {
1347 case BUILT_IN_POW:
1348 log_cfn = CFN_BUILT_IN_LOG;
1349 exp_bfn = BUILT_IN_EXP;
1350 break;
1351 case BUILT_IN_POWF:
1352 log_cfn = CFN_BUILT_IN_LOGF;
1353 exp_bfn = BUILT_IN_EXPF;
1354 break;
1355 case BUILT_IN_POWL:
1356 log_cfn = CFN_BUILT_IN_LOGL;
1357 exp_bfn = BUILT_IN_EXPL;
1358 break;
1359 default:
1360 return NULL;
1361 }
1362 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1363 tree exp_decl = builtin_decl_implicit (exp_bfn);
1364 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1365 does that, but if C is a power of 2, we want to use
1366 exp2 (log2 (C) * x) in the non-vectorized version, but for
1367 vectorization we don't have vectorized exp2. */
1368 if (logc
1369 && TREE_CODE (logc) == REAL_CST
1370 && exp_decl
1371 && lookup_attribute ("omp declare simd",
1372 DECL_ATTRIBUTES (exp_decl)))
1373 {
1374 cgraph_node *node = cgraph_node::get_create (exp_decl);
1375 if (node->simd_clones == NULL)
1376 {
1377 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1378 || node->definition)
1379 return NULL;
1380 expand_simd_clones (node);
1381 if (node->simd_clones == NULL)
1382 return NULL;
1383 }
1384 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1385 if (!*type_out)
1386 return NULL;
1387 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1388 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1389 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1390 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1391 g = gimple_build_call (exp_decl, 1, def);
1392 gimple_call_set_lhs (g, res);
1393 return g;
1394 }
1395 }
1396
1397 return NULL;
1398 }
1399
1400 /* We now have a pow or powi builtin function call with a constant
1401 exponent. */
1402
1403 /* Catch squaring. */
1404 if ((tree_fits_shwi_p (exp)
1405 && tree_to_shwi (exp) == 2)
1406 || (TREE_CODE (exp) == REAL_CST
1407 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1408 {
1409 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1410 TREE_TYPE (base), type_out))
1411 return NULL;
1412
1413 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1414 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1415 return stmt;
1416 }
1417
1418 /* Catch square root. */
1419 if (TREE_CODE (exp) == REAL_CST
1420 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1421 {
1422 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1423 if (*type_out
1424 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1425 OPTIMIZE_FOR_SPEED))
1426 {
1427 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1428 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1429 gimple_call_set_lhs (stmt, var);
1430 gimple_call_set_nothrow (stmt, true);
1431 return stmt;
1432 }
1433 }
1434
1435 return NULL;
1436 }
1437
1438
1439 /* Function vect_recog_widen_sum_pattern
1440
1441 Try to find the following pattern:
1442
1443 type x_t;
1444 TYPE x_T, sum = init;
1445 loop:
1446 sum_0 = phi <init, sum_1>
1447 S1 x_t = *p;
1448 S2 x_T = (TYPE) x_t;
1449 S3 sum_1 = x_T + sum_0;
1450
1451 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1452 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1453 a special case of a reduction computation.
1454
1455 Input:
1456
1457 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1458 when this function is called with S3, the pattern {S2,S3} will be detected.
1459
1460 Output:
1461
1462 * TYPE_OUT: The type of the output of this pattern.
1463
1464 * Return value: A new stmt that will be used to replace the sequence of
1465 stmts that constitute the pattern. In this case it will be:
1466 WIDEN_SUM <x_t, sum_0>
1467
1468 Note: The widening-sum idiom is a widening reduction pattern that is
1469 vectorized without preserving all the intermediate results. It
1470 produces only N/2 (widened) results (by summing up pairs of
1471 intermediate results) rather than all N results. Therefore, we
1472 cannot allow this pattern when we want to get all the results and in
1473 the correct order (as is the case when this computation is in an
1474 inner-loop nested in an outer-loop that us being vectorized). */
1475
1476 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1477 vect_recog_widen_sum_pattern (vec_info *vinfo,
1478 stmt_vec_info stmt_vinfo, tree *type_out)
1479 {
1480 gimple *last_stmt = stmt_vinfo->stmt;
1481 tree oprnd0, oprnd1;
1482 tree type;
1483 gimple *pattern_stmt;
1484 tree var;
1485
1486 /* Look for the following pattern
1487 DX = (TYPE) X;
1488 sum_1 = DX + sum_0;
1489 In which DX is at least double the size of X, and sum_1 has been
1490 recognized as a reduction variable.
1491 */
1492
1493 /* Starting from LAST_STMT, follow the defs of its uses in search
1494 of the above pattern. */
1495
1496 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1497 &oprnd0, &oprnd1))
1498 return NULL;
1499
1500 type = gimple_expr_type (last_stmt);
1501
1502 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1503 we know that oprnd1 is the reduction variable (defined by a loop-header
1504 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1505 Left to check that oprnd0 is defined by a cast from type 'type' to type
1506 'TYPE'. */
1507
1508 vect_unpromoted_value unprom0;
1509 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1510 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1511 return NULL;
1512
1513 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1514
1515 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1516 unprom0.type, type_out))
1517 return NULL;
1518
1519 var = vect_recog_temp_ssa_var (type, NULL);
1520 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1521
1522 return pattern_stmt;
1523 }
1524
1525 /* Recognize cases in which an operation is performed in one type WTYPE
1526 but could be done more efficiently in a narrower type NTYPE. For example,
1527 if we have:
1528
1529 ATYPE a; // narrower than NTYPE
1530 BTYPE b; // narrower than NTYPE
1531 WTYPE aw = (WTYPE) a;
1532 WTYPE bw = (WTYPE) b;
1533 WTYPE res = aw + bw; // only uses of aw and bw
1534
1535 then it would be more efficient to do:
1536
1537 NTYPE an = (NTYPE) a;
1538 NTYPE bn = (NTYPE) b;
1539 NTYPE resn = an + bn;
1540 WTYPE res = (WTYPE) resn;
1541
1542 Other situations include things like:
1543
1544 ATYPE a; // NTYPE or narrower
1545 WTYPE aw = (WTYPE) a;
1546 WTYPE res = aw + b;
1547
1548 when only "(NTYPE) res" is significant. In that case it's more efficient
1549 to truncate "b" and do the operation on NTYPE instead:
1550
1551 NTYPE an = (NTYPE) a;
1552 NTYPE bn = (NTYPE) b; // truncation
1553 NTYPE resn = an + bn;
1554 WTYPE res = (WTYPE) resn;
1555
1556 All users of "res" should then use "resn" instead, making the final
1557 statement dead (not marked as relevant). The final statement is still
1558 needed to maintain the type correctness of the IR.
1559
1560 vect_determine_precisions has already determined the minimum
1561 precison of the operation and the minimum precision required
1562 by users of the result. */
1563
1564 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1565 vect_recog_over_widening_pattern (vec_info *vinfo,
1566 stmt_vec_info last_stmt_info, tree *type_out)
1567 {
1568 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1569 if (!last_stmt)
1570 return NULL;
1571
1572 /* See whether we have found that this operation can be done on a
1573 narrower type without changing its semantics. */
1574 unsigned int new_precision = last_stmt_info->operation_precision;
1575 if (!new_precision)
1576 return NULL;
1577
1578 tree lhs = gimple_assign_lhs (last_stmt);
1579 tree type = TREE_TYPE (lhs);
1580 tree_code code = gimple_assign_rhs_code (last_stmt);
1581
1582 /* Punt for reductions where we don't handle the type conversions. */
1583 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1584 return NULL;
1585
1586 /* Keep the first operand of a COND_EXPR as-is: only the other two
1587 operands are interesting. */
1588 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1589
1590 /* Check the operands. */
1591 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1592 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1593 unprom.quick_grow (nops);
1594 unsigned int min_precision = 0;
1595 bool single_use_p = false;
1596 for (unsigned int i = 0; i < nops; ++i)
1597 {
1598 tree op = gimple_op (last_stmt, first_op + i);
1599 if (TREE_CODE (op) == INTEGER_CST)
1600 unprom[i].set_op (op, vect_constant_def);
1601 else if (TREE_CODE (op) == SSA_NAME)
1602 {
1603 bool op_single_use_p = true;
1604 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1605 &op_single_use_p))
1606 return NULL;
1607 /* If:
1608
1609 (1) N bits of the result are needed;
1610 (2) all inputs are widened from M<N bits; and
1611 (3) one operand OP is a single-use SSA name
1612
1613 we can shift the M->N widening from OP to the output
1614 without changing the number or type of extensions involved.
1615 This then reduces the number of copies of STMT_INFO.
1616
1617 If instead of (3) more than one operand is a single-use SSA name,
1618 shifting the extension to the output is even more of a win.
1619
1620 If instead:
1621
1622 (1) N bits of the result are needed;
1623 (2) one operand OP2 is widened from M2<N bits;
1624 (3) another operand OP1 is widened from M1<M2 bits; and
1625 (4) both OP1 and OP2 are single-use
1626
1627 the choice is between:
1628
1629 (a) truncating OP2 to M1, doing the operation on M1,
1630 and then widening the result to N
1631
1632 (b) widening OP1 to M2, doing the operation on M2, and then
1633 widening the result to N
1634
1635 Both shift the M2->N widening of the inputs to the output.
1636 (a) additionally shifts the M1->M2 widening to the output;
1637 it requires fewer copies of STMT_INFO but requires an extra
1638 M2->M1 truncation.
1639
1640 Which is better will depend on the complexity and cost of
1641 STMT_INFO, which is hard to predict at this stage. However,
1642 a clear tie-breaker in favor of (b) is the fact that the
1643 truncation in (a) increases the length of the operation chain.
1644
1645 If instead of (4) only one of OP1 or OP2 is single-use,
1646 (b) is still a win over doing the operation in N bits:
1647 it still shifts the M2->N widening on the single-use operand
1648 to the output and reduces the number of STMT_INFO copies.
1649
1650 If neither operand is single-use then operating on fewer than
1651 N bits might lead to more extensions overall. Whether it does
1652 or not depends on global information about the vectorization
1653 region, and whether that's a good trade-off would again
1654 depend on the complexity and cost of the statements involved,
1655 as well as things like register pressure that are not normally
1656 modelled at this stage. We therefore ignore these cases
1657 and just optimize the clear single-use wins above.
1658
1659 Thus we take the maximum precision of the unpromoted operands
1660 and record whether any operand is single-use. */
1661 if (unprom[i].dt == vect_internal_def)
1662 {
1663 min_precision = MAX (min_precision,
1664 TYPE_PRECISION (unprom[i].type));
1665 single_use_p |= op_single_use_p;
1666 }
1667 }
1668 else
1669 return NULL;
1670 }
1671
1672 /* Although the operation could be done in operation_precision, we have
1673 to balance that against introducing extra truncations or extensions.
1674 Calculate the minimum precision that can be handled efficiently.
1675
1676 The loop above determined that the operation could be handled
1677 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1678 extension from the inputs to the output without introducing more
1679 instructions, and would reduce the number of instructions required
1680 for STMT_INFO itself.
1681
1682 vect_determine_precisions has also determined that the result only
1683 needs min_output_precision bits. Truncating by a factor of N times
1684 requires a tree of N - 1 instructions, so if TYPE is N times wider
1685 than min_output_precision, doing the operation in TYPE and truncating
1686 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1687 In contrast:
1688
1689 - truncating the input to a unary operation and doing the operation
1690 in the new type requires at most N - 1 + 1 = N instructions per
1691 output vector
1692
1693 - doing the same for a binary operation requires at most
1694 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1695
1696 Both unary and binary operations require fewer instructions than
1697 this if the operands were extended from a suitable truncated form.
1698 Thus there is usually nothing to lose by doing operations in
1699 min_output_precision bits, but there can be something to gain. */
1700 if (!single_use_p)
1701 min_precision = last_stmt_info->min_output_precision;
1702 else
1703 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1704
1705 /* Apply the minimum efficient precision we just calculated. */
1706 if (new_precision < min_precision)
1707 new_precision = min_precision;
1708 new_precision = vect_element_precision (new_precision);
1709 if (new_precision >= TYPE_PRECISION (type))
1710 return NULL;
1711
1712 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1713
1714 *type_out = get_vectype_for_scalar_type (vinfo, type);
1715 if (!*type_out)
1716 return NULL;
1717
1718 /* We've found a viable pattern. Get the new type of the operation. */
1719 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1720 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1721
1722 /* If we're truncating an operation, we need to make sure that we
1723 don't introduce new undefined overflow. The codes tested here are
1724 a subset of those accepted by vect_truncatable_operation_p. */
1725 tree op_type = new_type;
1726 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1727 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1728 op_type = build_nonstandard_integer_type (new_precision, true);
1729
1730 /* We specifically don't check here whether the target supports the
1731 new operation, since it might be something that a later pattern
1732 wants to rewrite anyway. If targets have a minimum element size
1733 for some optabs, we should pattern-match smaller ops to larger ops
1734 where beneficial. */
1735 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1736 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1737 if (!new_vectype || !op_vectype)
1738 return NULL;
1739
1740 if (dump_enabled_p ())
1741 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1742 type, new_type);
1743
1744 /* Calculate the rhs operands for an operation on OP_TYPE. */
1745 tree ops[3] = {};
1746 for (unsigned int i = 1; i < first_op; ++i)
1747 ops[i - 1] = gimple_op (last_stmt, i);
1748 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1749 op_type, &unprom[0], op_vectype);
1750
1751 /* Use the operation to produce a result of type OP_TYPE. */
1752 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1753 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1754 ops[0], ops[1], ops[2]);
1755 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1756
1757 if (dump_enabled_p ())
1758 dump_printf_loc (MSG_NOTE, vect_location,
1759 "created pattern stmt: %G", pattern_stmt);
1760
1761 /* Convert back to the original signedness, if OP_TYPE is different
1762 from NEW_TYPE. */
1763 if (op_type != new_type)
1764 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1765 pattern_stmt, op_vectype);
1766
1767 /* Promote the result to the original type. */
1768 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1769 pattern_stmt, new_vectype);
1770
1771 return pattern_stmt;
1772 }
1773
1774 /* Recognize the following patterns:
1775
1776 ATYPE a; // narrower than TYPE
1777 BTYPE b; // narrower than TYPE
1778
1779 1) Multiply high with scaling
1780 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1781 2) ... or also with rounding
1782 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1783
1784 where only the bottom half of res is used. */
1785
1786 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1787 vect_recog_mulhs_pattern (vec_info *vinfo,
1788 stmt_vec_info last_stmt_info, tree *type_out)
1789 {
1790 /* Check for a right shift. */
1791 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1792 if (!last_stmt
1793 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1794 return NULL;
1795
1796 /* Check that the shift result is wider than the users of the
1797 result need (i.e. that narrowing would be a natural choice). */
1798 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1799 unsigned int target_precision
1800 = vect_element_precision (last_stmt_info->min_output_precision);
1801 if (!INTEGRAL_TYPE_P (lhs_type)
1802 || target_precision >= TYPE_PRECISION (lhs_type))
1803 return NULL;
1804
1805 /* Look through any change in sign on the outer shift input. */
1806 vect_unpromoted_value unprom_rshift_input;
1807 tree rshift_input = vect_look_through_possible_promotion
1808 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1809 if (!rshift_input
1810 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1811 != TYPE_PRECISION (lhs_type))
1812 return NULL;
1813
1814 /* Get the definition of the shift input. */
1815 stmt_vec_info rshift_input_stmt_info
1816 = vect_get_internal_def (vinfo, rshift_input);
1817 if (!rshift_input_stmt_info)
1818 return NULL;
1819 gassign *rshift_input_stmt
1820 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1821 if (!rshift_input_stmt)
1822 return NULL;
1823
1824 stmt_vec_info mulh_stmt_info;
1825 tree scale_term;
1826 internal_fn ifn;
1827 unsigned int expect_offset;
1828
1829 /* Check for the presence of the rounding term. */
1830 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1831 {
1832 /* Check that the outer shift was by 1. */
1833 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1834 return NULL;
1835
1836 /* Check that the second operand of the PLUS_EXPR is 1. */
1837 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1838 return NULL;
1839
1840 /* Look through any change in sign on the addition input. */
1841 vect_unpromoted_value unprom_plus_input;
1842 tree plus_input = vect_look_through_possible_promotion
1843 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1844 if (!plus_input
1845 || TYPE_PRECISION (TREE_TYPE (plus_input))
1846 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1847 return NULL;
1848
1849 /* Get the definition of the multiply-high-scale part. */
1850 stmt_vec_info plus_input_stmt_info
1851 = vect_get_internal_def (vinfo, plus_input);
1852 if (!plus_input_stmt_info)
1853 return NULL;
1854 gassign *plus_input_stmt
1855 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1856 if (!plus_input_stmt
1857 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1858 return NULL;
1859
1860 /* Look through any change in sign on the scaling input. */
1861 vect_unpromoted_value unprom_scale_input;
1862 tree scale_input = vect_look_through_possible_promotion
1863 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1864 if (!scale_input
1865 || TYPE_PRECISION (TREE_TYPE (scale_input))
1866 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1867 return NULL;
1868
1869 /* Get the definition of the multiply-high part. */
1870 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1871 if (!mulh_stmt_info)
1872 return NULL;
1873
1874 /* Get the scaling term. */
1875 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1876
1877 expect_offset = target_precision + 2;
1878 ifn = IFN_MULHRS;
1879 }
1880 else
1881 {
1882 mulh_stmt_info = rshift_input_stmt_info;
1883 scale_term = gimple_assign_rhs2 (last_stmt);
1884
1885 expect_offset = target_precision + 1;
1886 ifn = IFN_MULHS;
1887 }
1888
1889 /* Check that the scaling factor is correct. */
1890 if (TREE_CODE (scale_term) != INTEGER_CST
1891 || wi::to_widest (scale_term) + expect_offset
1892 != TYPE_PRECISION (lhs_type))
1893 return NULL;
1894
1895 /* Check whether the scaling input term can be seen as two widened
1896 inputs multiplied together. */
1897 vect_unpromoted_value unprom_mult[2];
1898 tree new_type;
1899 unsigned int nops
1900 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1901 false, 2, unprom_mult, &new_type);
1902 if (nops != 2)
1903 return NULL;
1904
1905 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1906
1907 /* Adjust output precision. */
1908 if (TYPE_PRECISION (new_type) < target_precision)
1909 new_type = build_nonstandard_integer_type
1910 (target_precision, TYPE_UNSIGNED (new_type));
1911
1912 /* Check for target support. */
1913 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1914 if (!new_vectype
1915 || !direct_internal_fn_supported_p
1916 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1917 return NULL;
1918
1919 /* The IR requires a valid vector type for the cast result, even though
1920 it's likely to be discarded. */
1921 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1922 if (!*type_out)
1923 return NULL;
1924
1925 /* Generate the IFN_MULHRS call. */
1926 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1927 tree new_ops[2];
1928 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1929 unprom_mult, new_vectype);
1930 gcall *mulhrs_stmt
1931 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1932 gimple_call_set_lhs (mulhrs_stmt, new_var);
1933 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1934
1935 if (dump_enabled_p ())
1936 dump_printf_loc (MSG_NOTE, vect_location,
1937 "created pattern stmt: %G", mulhrs_stmt);
1938
1939 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1940 mulhrs_stmt, new_vectype);
1941 }
1942
1943 /* Recognize the patterns:
1944
1945 ATYPE a; // narrower than TYPE
1946 BTYPE b; // narrower than TYPE
1947 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1948 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1949
1950 where only the bottom half of avg is used. Try to transform them into:
1951
1952 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1953 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1954
1955 followed by:
1956
1957 TYPE avg = (TYPE) avg';
1958
1959 where NTYPE is no wider than half of TYPE. Since only the bottom half
1960 of avg is used, all or part of the cast of avg' should become redundant.
1961
1962 If there is no target support available, generate code to distribute rshift
1963 over plus and add a carry. */
1964
1965 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1966 vect_recog_average_pattern (vec_info *vinfo,
1967 stmt_vec_info last_stmt_info, tree *type_out)
1968 {
1969 /* Check for a shift right by one bit. */
1970 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1971 if (!last_stmt
1972 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1973 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1974 return NULL;
1975
1976 /* Check that the shift result is wider than the users of the
1977 result need (i.e. that narrowing would be a natural choice). */
1978 tree lhs = gimple_assign_lhs (last_stmt);
1979 tree type = TREE_TYPE (lhs);
1980 unsigned int target_precision
1981 = vect_element_precision (last_stmt_info->min_output_precision);
1982 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1983 return NULL;
1984
1985 /* Look through any change in sign on the shift input. */
1986 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1987 vect_unpromoted_value unprom_plus;
1988 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1989 &unprom_plus);
1990 if (!rshift_rhs
1991 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1992 return NULL;
1993
1994 /* Get the definition of the shift input. */
1995 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1996 if (!plus_stmt_info)
1997 return NULL;
1998
1999 /* Check whether the shift input can be seen as a tree of additions on
2000 2 or 3 widened inputs.
2001
2002 Note that the pattern should be a win even if the result of one or
2003 more additions is reused elsewhere: if the pattern matches, we'd be
2004 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2005 internal_fn ifn = IFN_AVG_FLOOR;
2006 vect_unpromoted_value unprom[3];
2007 tree new_type;
2008 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2009 WIDEN_PLUS_EXPR, false, 3,
2010 unprom, &new_type);
2011 if (nops == 0)
2012 return NULL;
2013 if (nops == 3)
2014 {
2015 /* Check that one operand is 1. */
2016 unsigned int i;
2017 for (i = 0; i < 3; ++i)
2018 if (integer_onep (unprom[i].op))
2019 break;
2020 if (i == 3)
2021 return NULL;
2022 /* Throw away the 1 operand and keep the other two. */
2023 if (i < 2)
2024 unprom[i] = unprom[2];
2025 ifn = IFN_AVG_CEIL;
2026 }
2027
2028 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2029
2030 /* We know that:
2031
2032 (a) the operation can be viewed as:
2033
2034 TYPE widened0 = (TYPE) UNPROM[0];
2035 TYPE widened1 = (TYPE) UNPROM[1];
2036 TYPE tmp1 = widened0 + widened1 {+ 1};
2037 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2038
2039 (b) the first two statements are equivalent to:
2040
2041 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2042 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2043
2044 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2045 where sensible;
2046
2047 (d) all the operations can be performed correctly at twice the width of
2048 NEW_TYPE, due to the nature of the average operation; and
2049
2050 (e) users of the result of the right shift need only TARGET_PRECISION
2051 bits, where TARGET_PRECISION is no more than half of TYPE's
2052 precision.
2053
2054 Under these circumstances, the only situation in which NEW_TYPE
2055 could be narrower than TARGET_PRECISION is if widened0, widened1
2056 and an addition result are all used more than once. Thus we can
2057 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2058 as "free", whereas widening the result of the average instruction
2059 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2060 therefore better not to go narrower than TARGET_PRECISION. */
2061 if (TYPE_PRECISION (new_type) < target_precision)
2062 new_type = build_nonstandard_integer_type (target_precision,
2063 TYPE_UNSIGNED (new_type));
2064
2065 /* Check for target support. */
2066 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2067 if (!new_vectype)
2068 return NULL;
2069
2070 bool fallback_p = false;
2071
2072 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2073 ;
2074 else if (TYPE_UNSIGNED (new_type)
2075 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2076 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2077 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2078 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2079 fallback_p = true;
2080 else
2081 return NULL;
2082
2083 /* The IR requires a valid vector type for the cast result, even though
2084 it's likely to be discarded. */
2085 *type_out = get_vectype_for_scalar_type (vinfo, type);
2086 if (!*type_out)
2087 return NULL;
2088
2089 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2090 tree new_ops[2];
2091 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2092 unprom, new_vectype);
2093
2094 if (fallback_p)
2095 {
2096 /* As a fallback, generate code for following sequence:
2097
2098 shifted_op0 = new_ops[0] >> 1;
2099 shifted_op1 = new_ops[1] >> 1;
2100 sum_of_shifted = shifted_op0 + shifted_op1;
2101 unmasked_carry = new_ops[0] and/or new_ops[1];
2102 carry = unmasked_carry & 1;
2103 new_var = sum_of_shifted + carry;
2104 */
2105
2106 tree one_cst = build_one_cst (new_type);
2107 gassign *g;
2108
2109 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2110 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2111 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2112
2113 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2114 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2115 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2116
2117 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2118 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2119 shifted_op0, shifted_op1);
2120 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2121
2122 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2123 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2124 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2125 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2126
2127 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2128 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2129 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2130
2131 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2132 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2133 }
2134
2135 /* Generate the IFN_AVG* call. */
2136 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2137 new_ops[1]);
2138 gimple_call_set_lhs (average_stmt, new_var);
2139 gimple_set_location (average_stmt, gimple_location (last_stmt));
2140
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_NOTE, vect_location,
2143 "created pattern stmt: %G", average_stmt);
2144
2145 return vect_convert_output (vinfo, last_stmt_info,
2146 type, average_stmt, new_vectype);
2147 }
2148
2149 /* Recognize cases in which the input to a cast is wider than its
2150 output, and the input is fed by a widening operation. Fold this
2151 by removing the unnecessary intermediate widening. E.g.:
2152
2153 unsigned char a;
2154 unsigned int b = (unsigned int) a;
2155 unsigned short c = (unsigned short) b;
2156
2157 -->
2158
2159 unsigned short c = (unsigned short) a;
2160
2161 Although this is rare in input IR, it is an expected side-effect
2162 of the over-widening pattern above.
2163
2164 This is beneficial also for integer-to-float conversions, if the
2165 widened integer has more bits than the float, and if the unwidened
2166 input doesn't. */
2167
2168 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2169 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2170 stmt_vec_info last_stmt_info, tree *type_out)
2171 {
2172 /* Check for a cast, including an integer-to-float conversion. */
2173 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2174 if (!last_stmt)
2175 return NULL;
2176 tree_code code = gimple_assign_rhs_code (last_stmt);
2177 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2178 return NULL;
2179
2180 /* Make sure that the rhs is a scalar with a natural bitsize. */
2181 tree lhs = gimple_assign_lhs (last_stmt);
2182 if (!lhs)
2183 return NULL;
2184 tree lhs_type = TREE_TYPE (lhs);
2185 scalar_mode lhs_mode;
2186 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2187 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2188 return NULL;
2189
2190 /* Check for a narrowing operation (from a vector point of view). */
2191 tree rhs = gimple_assign_rhs1 (last_stmt);
2192 tree rhs_type = TREE_TYPE (rhs);
2193 if (!INTEGRAL_TYPE_P (rhs_type)
2194 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2195 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2196 return NULL;
2197
2198 /* Try to find an unpromoted input. */
2199 vect_unpromoted_value unprom;
2200 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2201 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2202 return NULL;
2203
2204 /* If the bits above RHS_TYPE matter, make sure that they're the
2205 same when extending from UNPROM as they are when extending from RHS. */
2206 if (!INTEGRAL_TYPE_P (lhs_type)
2207 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2208 return NULL;
2209
2210 /* We can get the same result by casting UNPROM directly, to avoid
2211 the unnecessary widening and narrowing. */
2212 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2213
2214 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2215 if (!*type_out)
2216 return NULL;
2217
2218 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2219 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2220 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2221
2222 return pattern_stmt;
2223 }
2224
2225 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2226 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2227
2228 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2229 vect_recog_widen_shift_pattern (vec_info *vinfo,
2230 stmt_vec_info last_stmt_info, tree *type_out)
2231 {
2232 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2233 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2234 "vect_recog_widen_shift_pattern");
2235 }
2236
2237 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2238
2239 type a_t, b_t, c_t;
2240
2241 S0 a_t = b_t r<< c_t;
2242
2243 Input/Output:
2244
2245 * STMT_VINFO: The stmt from which the pattern search begins,
2246 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2247 with a sequence:
2248
2249 S1 d_t = -c_t;
2250 S2 e_t = d_t & (B - 1);
2251 S3 f_t = b_t << c_t;
2252 S4 g_t = b_t >> e_t;
2253 S0 a_t = f_t | g_t;
2254
2255 where B is element bitsize of type.
2256
2257 Output:
2258
2259 * TYPE_OUT: The type of the output of this pattern.
2260
2261 * Return value: A new stmt that will be used to replace the rotate
2262 S0 stmt. */
2263
2264 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2265 vect_recog_rotate_pattern (vec_info *vinfo,
2266 stmt_vec_info stmt_vinfo, tree *type_out)
2267 {
2268 gimple *last_stmt = stmt_vinfo->stmt;
2269 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2270 gimple *pattern_stmt, *def_stmt;
2271 enum tree_code rhs_code;
2272 enum vect_def_type dt;
2273 optab optab1, optab2;
2274 edge ext_def = NULL;
2275 bool bswap16_p = false;
2276
2277 if (is_gimple_assign (last_stmt))
2278 {
2279 rhs_code = gimple_assign_rhs_code (last_stmt);
2280 switch (rhs_code)
2281 {
2282 case LROTATE_EXPR:
2283 case RROTATE_EXPR:
2284 break;
2285 default:
2286 return NULL;
2287 }
2288
2289 lhs = gimple_assign_lhs (last_stmt);
2290 oprnd0 = gimple_assign_rhs1 (last_stmt);
2291 type = TREE_TYPE (oprnd0);
2292 oprnd1 = gimple_assign_rhs2 (last_stmt);
2293 }
2294 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2295 {
2296 /* __builtin_bswap16 (x) is another form of x r>> 8.
2297 The vectorizer has bswap support, but only if the argument isn't
2298 promoted. */
2299 lhs = gimple_call_lhs (last_stmt);
2300 oprnd0 = gimple_call_arg (last_stmt, 0);
2301 type = TREE_TYPE (oprnd0);
2302 if (!lhs
2303 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2304 || TYPE_PRECISION (type) <= 16
2305 || TREE_CODE (oprnd0) != SSA_NAME
2306 || BITS_PER_UNIT != 8
2307 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2308 return NULL;
2309
2310 stmt_vec_info def_stmt_info;
2311 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2312 return NULL;
2313
2314 if (dt != vect_internal_def)
2315 return NULL;
2316
2317 if (gimple_assign_cast_p (def_stmt))
2318 {
2319 def = gimple_assign_rhs1 (def_stmt);
2320 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2321 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2322 oprnd0 = def;
2323 }
2324
2325 type = TREE_TYPE (lhs);
2326 vectype = get_vectype_for_scalar_type (vinfo, type);
2327 if (vectype == NULL_TREE)
2328 return NULL;
2329
2330 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2331 {
2332 /* The encoding uses one stepped pattern for each byte in the
2333 16-bit word. */
2334 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2335 for (unsigned i = 0; i < 3; ++i)
2336 for (unsigned j = 0; j < 2; ++j)
2337 elts.quick_push ((i + 1) * 2 - j - 1);
2338
2339 vec_perm_indices indices (elts, 1,
2340 TYPE_VECTOR_SUBPARTS (char_vectype));
2341 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2342 {
2343 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2344 undo the argument promotion. */
2345 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2346 {
2347 def = vect_recog_temp_ssa_var (type, NULL);
2348 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2349 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2350 oprnd0 = def;
2351 }
2352
2353 /* Pattern detected. */
2354 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2355
2356 *type_out = vectype;
2357
2358 /* Pattern supported. Create a stmt to be used to replace the
2359 pattern, with the unpromoted argument. */
2360 var = vect_recog_temp_ssa_var (type, NULL);
2361 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2362 1, oprnd0);
2363 gimple_call_set_lhs (pattern_stmt, var);
2364 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2365 gimple_call_fntype (last_stmt));
2366 return pattern_stmt;
2367 }
2368 }
2369
2370 oprnd1 = build_int_cst (integer_type_node, 8);
2371 rhs_code = LROTATE_EXPR;
2372 bswap16_p = true;
2373 }
2374 else
2375 return NULL;
2376
2377 if (TREE_CODE (oprnd0) != SSA_NAME
2378 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2379 || !INTEGRAL_TYPE_P (type)
2380 || !TYPE_UNSIGNED (type))
2381 return NULL;
2382
2383 stmt_vec_info def_stmt_info;
2384 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2385 return NULL;
2386
2387 if (dt != vect_internal_def
2388 && dt != vect_constant_def
2389 && dt != vect_external_def)
2390 return NULL;
2391
2392 vectype = get_vectype_for_scalar_type (vinfo, type);
2393 if (vectype == NULL_TREE)
2394 return NULL;
2395
2396 /* If vector/vector or vector/scalar rotate is supported by the target,
2397 don't do anything here. */
2398 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2399 if (optab1
2400 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2401 {
2402 use_rotate:
2403 if (bswap16_p)
2404 {
2405 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2406 {
2407 def = vect_recog_temp_ssa_var (type, NULL);
2408 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2409 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2410 oprnd0 = def;
2411 }
2412
2413 /* Pattern detected. */
2414 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2415
2416 *type_out = vectype;
2417
2418 /* Pattern supported. Create a stmt to be used to replace the
2419 pattern. */
2420 var = vect_recog_temp_ssa_var (type, NULL);
2421 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2422 oprnd1);
2423 return pattern_stmt;
2424 }
2425 return NULL;
2426 }
2427
2428 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2429 {
2430 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2431 if (optab2
2432 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2433 goto use_rotate;
2434 }
2435
2436 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2437 don't do anything here either. */
2438 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2439 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2440 if (!optab1
2441 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2442 || !optab2
2443 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2444 {
2445 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2446 return NULL;
2447 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2448 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2449 if (!optab1
2450 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2451 || !optab2
2452 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2453 return NULL;
2454 }
2455
2456 *type_out = vectype;
2457
2458 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2459 {
2460 def = vect_recog_temp_ssa_var (type, NULL);
2461 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2462 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2463 oprnd0 = def;
2464 }
2465
2466 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2467 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2468
2469 def = NULL_TREE;
2470 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2471 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2472 def = oprnd1;
2473 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2474 {
2475 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2476 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2477 && TYPE_PRECISION (TREE_TYPE (rhs1))
2478 == TYPE_PRECISION (type))
2479 def = rhs1;
2480 }
2481
2482 if (def == NULL_TREE)
2483 {
2484 def = vect_recog_temp_ssa_var (type, NULL);
2485 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2486 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2487 }
2488 stype = TREE_TYPE (def);
2489
2490 if (TREE_CODE (def) == INTEGER_CST)
2491 {
2492 if (!tree_fits_uhwi_p (def)
2493 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2494 || integer_zerop (def))
2495 return NULL;
2496 def2 = build_int_cst (stype,
2497 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2498 }
2499 else
2500 {
2501 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2502
2503 if (vecstype == NULL_TREE)
2504 return NULL;
2505 def2 = vect_recog_temp_ssa_var (stype, NULL);
2506 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2507 if (ext_def)
2508 {
2509 basic_block new_bb
2510 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2511 gcc_assert (!new_bb);
2512 }
2513 else
2514 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2515
2516 def2 = vect_recog_temp_ssa_var (stype, NULL);
2517 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2518 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2519 gimple_assign_lhs (def_stmt), mask);
2520 if (ext_def)
2521 {
2522 basic_block new_bb
2523 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2524 gcc_assert (!new_bb);
2525 }
2526 else
2527 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2528 }
2529
2530 var1 = vect_recog_temp_ssa_var (type, NULL);
2531 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2532 ? LSHIFT_EXPR : RSHIFT_EXPR,
2533 oprnd0, def);
2534 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2535
2536 var2 = vect_recog_temp_ssa_var (type, NULL);
2537 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2538 ? RSHIFT_EXPR : LSHIFT_EXPR,
2539 oprnd0, def2);
2540 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2541
2542 /* Pattern detected. */
2543 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2544
2545 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2546 var = vect_recog_temp_ssa_var (type, NULL);
2547 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2548
2549 return pattern_stmt;
2550 }
2551
2552 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2553 vectorized:
2554
2555 type a_t;
2556 TYPE b_T, res_T;
2557
2558 S1 a_t = ;
2559 S2 b_T = ;
2560 S3 res_T = b_T op a_t;
2561
2562 where type 'TYPE' is a type with different size than 'type',
2563 and op is <<, >> or rotate.
2564
2565 Also detect cases:
2566
2567 type a_t;
2568 TYPE b_T, c_T, res_T;
2569
2570 S0 c_T = ;
2571 S1 a_t = (type) c_T;
2572 S2 b_T = ;
2573 S3 res_T = b_T op a_t;
2574
2575 Input/Output:
2576
2577 * STMT_VINFO: The stmt from which the pattern search begins,
2578 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2579 with a shift/rotate which has same type on both operands, in the
2580 second case just b_T op c_T, in the first case with added cast
2581 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2582
2583 Output:
2584
2585 * TYPE_OUT: The type of the output of this pattern.
2586
2587 * Return value: A new stmt that will be used to replace the shift/rotate
2588 S3 stmt. */
2589
2590 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2591 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2592 stmt_vec_info stmt_vinfo,
2593 tree *type_out)
2594 {
2595 gimple *last_stmt = stmt_vinfo->stmt;
2596 tree oprnd0, oprnd1, lhs, var;
2597 gimple *pattern_stmt;
2598 enum tree_code rhs_code;
2599
2600 if (!is_gimple_assign (last_stmt))
2601 return NULL;
2602
2603 rhs_code = gimple_assign_rhs_code (last_stmt);
2604 switch (rhs_code)
2605 {
2606 case LSHIFT_EXPR:
2607 case RSHIFT_EXPR:
2608 case LROTATE_EXPR:
2609 case RROTATE_EXPR:
2610 break;
2611 default:
2612 return NULL;
2613 }
2614
2615 lhs = gimple_assign_lhs (last_stmt);
2616 oprnd0 = gimple_assign_rhs1 (last_stmt);
2617 oprnd1 = gimple_assign_rhs2 (last_stmt);
2618 if (TREE_CODE (oprnd0) != SSA_NAME
2619 || TREE_CODE (oprnd1) != SSA_NAME
2620 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2621 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2622 || TYPE_PRECISION (TREE_TYPE (lhs))
2623 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2624 return NULL;
2625
2626 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2627 if (!def_vinfo)
2628 return NULL;
2629
2630 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2631 if (*type_out == NULL_TREE)
2632 return NULL;
2633
2634 tree def = NULL_TREE;
2635 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2636 if (def_stmt && gimple_assign_cast_p (def_stmt))
2637 {
2638 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2639 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2640 && TYPE_PRECISION (TREE_TYPE (rhs1))
2641 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2642 {
2643 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2644 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2645 def = rhs1;
2646 else
2647 {
2648 tree mask
2649 = build_low_bits_mask (TREE_TYPE (rhs1),
2650 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2651 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2652 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2653 tree vecstype = get_vectype_for_scalar_type (vinfo,
2654 TREE_TYPE (rhs1));
2655 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2656 }
2657 }
2658 }
2659
2660 if (def == NULL_TREE)
2661 {
2662 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2663 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2664 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2665 }
2666
2667 /* Pattern detected. */
2668 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2669
2670 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2671 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2672 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2673
2674 return pattern_stmt;
2675 }
2676
2677 /* Return true iff the target has a vector optab implementing the operation
2678 CODE on type VECTYPE. */
2679
2680 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2681 target_has_vecop_for_code (tree_code code, tree vectype)
2682 {
2683 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2684 return voptab
2685 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2686 }
2687
2688 /* Verify that the target has optabs of VECTYPE to perform all the steps
2689 needed by the multiplication-by-immediate synthesis algorithm described by
2690 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2691 present. Return true iff the target supports all the steps. */
2692
2693 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2694 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2695 tree vectype, bool synth_shift_p)
2696 {
2697 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2698 return false;
2699
2700 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2701 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2702
2703 if (var == negate_variant
2704 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2705 return false;
2706
2707 /* If we must synthesize shifts with additions make sure that vector
2708 addition is available. */
2709 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2710 return false;
2711
2712 for (int i = 1; i < alg->ops; i++)
2713 {
2714 switch (alg->op[i])
2715 {
2716 case alg_shift:
2717 break;
2718 case alg_add_t_m2:
2719 case alg_add_t2_m:
2720 case alg_add_factor:
2721 if (!supports_vplus)
2722 return false;
2723 break;
2724 case alg_sub_t_m2:
2725 case alg_sub_t2_m:
2726 case alg_sub_factor:
2727 if (!supports_vminus)
2728 return false;
2729 break;
2730 case alg_unknown:
2731 case alg_m:
2732 case alg_zero:
2733 case alg_impossible:
2734 return false;
2735 default:
2736 gcc_unreachable ();
2737 }
2738 }
2739
2740 return true;
2741 }
2742
2743 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2744 putting the final result in DEST. Append all statements but the last into
2745 VINFO. Return the last statement. */
2746
2747 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)2748 synth_lshift_by_additions (vec_info *vinfo,
2749 tree dest, tree op, HOST_WIDE_INT amnt,
2750 stmt_vec_info stmt_info)
2751 {
2752 HOST_WIDE_INT i;
2753 tree itype = TREE_TYPE (op);
2754 tree prev_res = op;
2755 gcc_assert (amnt >= 0);
2756 for (i = 0; i < amnt; i++)
2757 {
2758 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2759 : dest;
2760 gimple *stmt
2761 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2762 prev_res = tmp_var;
2763 if (i < amnt - 1)
2764 append_pattern_def_seq (vinfo, stmt_info, stmt);
2765 else
2766 return stmt;
2767 }
2768 gcc_unreachable ();
2769 return NULL;
2770 }
2771
2772 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2773 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2774 the process if necessary. Append the resulting assignment statements
2775 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2776 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2777 left shifts using additions. */
2778
2779 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2780 apply_binop_and_append_stmt (vec_info *vinfo,
2781 tree_code code, tree op1, tree op2,
2782 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2783 {
2784 if (integer_zerop (op2)
2785 && (code == LSHIFT_EXPR
2786 || code == PLUS_EXPR))
2787 {
2788 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2789 return op1;
2790 }
2791
2792 gimple *stmt;
2793 tree itype = TREE_TYPE (op1);
2794 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2795
2796 if (code == LSHIFT_EXPR
2797 && synth_shift_p)
2798 {
2799 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2800 TREE_INT_CST_LOW (op2), stmt_vinfo);
2801 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2802 return tmp_var;
2803 }
2804
2805 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2806 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2807 return tmp_var;
2808 }
2809
2810 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2811 and simple arithmetic operations to be vectorized. Record the statements
2812 produced in STMT_VINFO and return the last statement in the sequence or
2813 NULL if it's not possible to synthesize such a multiplication.
2814 This function mirrors the behavior of expand_mult_const in expmed.c but
2815 works on tree-ssa form. */
2816
2817 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)2818 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2819 stmt_vec_info stmt_vinfo)
2820 {
2821 tree itype = TREE_TYPE (op);
2822 machine_mode mode = TYPE_MODE (itype);
2823 struct algorithm alg;
2824 mult_variant variant;
2825 if (!tree_fits_shwi_p (val))
2826 return NULL;
2827
2828 /* Multiplication synthesis by shifts, adds and subs can introduce
2829 signed overflow where the original operation didn't. Perform the
2830 operations on an unsigned type and cast back to avoid this.
2831 In the future we may want to relax this for synthesis algorithms
2832 that we can prove do not cause unexpected overflow. */
2833 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2834
2835 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2836
2837 /* Targets that don't support vector shifts but support vector additions
2838 can synthesize shifts that way. */
2839 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2840
2841 HOST_WIDE_INT hwval = tree_to_shwi (val);
2842 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2843 The vectorizer's benefit analysis will decide whether it's beneficial
2844 to do this. */
2845 bool possible = choose_mult_variant (mode, hwval, &alg,
2846 &variant, MAX_COST);
2847 if (!possible)
2848 return NULL;
2849
2850 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2851
2852 if (!vectype
2853 || !target_supports_mult_synth_alg (&alg, variant,
2854 vectype, synth_shift_p))
2855 return NULL;
2856
2857 tree accumulator;
2858
2859 /* Clear out the sequence of statements so we can populate it below. */
2860 gimple *stmt = NULL;
2861
2862 if (cast_to_unsigned_p)
2863 {
2864 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2865 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2866 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2867 op = tmp_op;
2868 }
2869
2870 if (alg.op[0] == alg_zero)
2871 accumulator = build_int_cst (multtype, 0);
2872 else
2873 accumulator = op;
2874
2875 bool needs_fixup = (variant == negate_variant)
2876 || (variant == add_variant);
2877
2878 for (int i = 1; i < alg.ops; i++)
2879 {
2880 tree shft_log = build_int_cst (multtype, alg.log[i]);
2881 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2882 tree tmp_var = NULL_TREE;
2883
2884 switch (alg.op[i])
2885 {
2886 case alg_shift:
2887 if (synth_shift_p)
2888 stmt
2889 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2890 alg.log[i], stmt_vinfo);
2891 else
2892 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2893 shft_log);
2894 break;
2895 case alg_add_t_m2:
2896 tmp_var
2897 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2898 stmt_vinfo, synth_shift_p);
2899 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2900 tmp_var);
2901 break;
2902 case alg_sub_t_m2:
2903 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2904 shft_log, stmt_vinfo,
2905 synth_shift_p);
2906 /* In some algorithms the first step involves zeroing the
2907 accumulator. If subtracting from such an accumulator
2908 just emit the negation directly. */
2909 if (integer_zerop (accumulator))
2910 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2911 else
2912 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2913 tmp_var);
2914 break;
2915 case alg_add_t2_m:
2916 tmp_var
2917 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2918 shft_log, stmt_vinfo, synth_shift_p);
2919 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2920 break;
2921 case alg_sub_t2_m:
2922 tmp_var
2923 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2924 shft_log, stmt_vinfo, synth_shift_p);
2925 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2926 break;
2927 case alg_add_factor:
2928 tmp_var
2929 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2930 shft_log, stmt_vinfo, synth_shift_p);
2931 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2932 tmp_var);
2933 break;
2934 case alg_sub_factor:
2935 tmp_var
2936 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2937 shft_log, stmt_vinfo, synth_shift_p);
2938 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2939 accumulator);
2940 break;
2941 default:
2942 gcc_unreachable ();
2943 }
2944 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2945 but rather return it directly. */
2946
2947 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2948 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2949 accumulator = accum_tmp;
2950 }
2951 if (variant == negate_variant)
2952 {
2953 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2954 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2955 accumulator = accum_tmp;
2956 if (cast_to_unsigned_p)
2957 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2958 }
2959 else if (variant == add_variant)
2960 {
2961 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2962 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2963 accumulator = accum_tmp;
2964 if (cast_to_unsigned_p)
2965 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2966 }
2967 /* Move back to a signed if needed. */
2968 if (cast_to_unsigned_p)
2969 {
2970 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2971 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2972 }
2973
2974 return stmt;
2975 }
2976
2977 /* Detect multiplication by constant and convert it into a sequence of
2978 shifts and additions, subtractions, negations. We reuse the
2979 choose_mult_variant algorithms from expmed.c
2980
2981 Input/Output:
2982
2983 STMT_VINFO: The stmt from which the pattern search begins,
2984 i.e. the mult stmt.
2985
2986 Output:
2987
2988 * TYPE_OUT: The type of the output of this pattern.
2989
2990 * Return value: A new stmt that will be used to replace
2991 the multiplication. */
2992
2993 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2994 vect_recog_mult_pattern (vec_info *vinfo,
2995 stmt_vec_info stmt_vinfo, tree *type_out)
2996 {
2997 gimple *last_stmt = stmt_vinfo->stmt;
2998 tree oprnd0, oprnd1, vectype, itype;
2999 gimple *pattern_stmt;
3000
3001 if (!is_gimple_assign (last_stmt))
3002 return NULL;
3003
3004 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3005 return NULL;
3006
3007 oprnd0 = gimple_assign_rhs1 (last_stmt);
3008 oprnd1 = gimple_assign_rhs2 (last_stmt);
3009 itype = TREE_TYPE (oprnd0);
3010
3011 if (TREE_CODE (oprnd0) != SSA_NAME
3012 || TREE_CODE (oprnd1) != INTEGER_CST
3013 || !INTEGRAL_TYPE_P (itype)
3014 || !type_has_mode_precision_p (itype))
3015 return NULL;
3016
3017 vectype = get_vectype_for_scalar_type (vinfo, itype);
3018 if (vectype == NULL_TREE)
3019 return NULL;
3020
3021 /* If the target can handle vectorized multiplication natively,
3022 don't attempt to optimize this. */
3023 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3024 if (mul_optab != unknown_optab)
3025 {
3026 machine_mode vec_mode = TYPE_MODE (vectype);
3027 int icode = (int) optab_handler (mul_optab, vec_mode);
3028 if (icode != CODE_FOR_nothing)
3029 return NULL;
3030 }
3031
3032 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3033 oprnd0, oprnd1, stmt_vinfo);
3034 if (!pattern_stmt)
3035 return NULL;
3036
3037 /* Pattern detected. */
3038 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3039
3040 *type_out = vectype;
3041
3042 return pattern_stmt;
3043 }
3044
3045 /* Detect a signed division by a constant that wouldn't be
3046 otherwise vectorized:
3047
3048 type a_t, b_t;
3049
3050 S1 a_t = b_t / N;
3051
3052 where type 'type' is an integral type and N is a constant.
3053
3054 Similarly handle modulo by a constant:
3055
3056 S4 a_t = b_t % N;
3057
3058 Input/Output:
3059
3060 * STMT_VINFO: The stmt from which the pattern search begins,
3061 i.e. the division stmt. S1 is replaced by if N is a power
3062 of two constant and type is signed:
3063 S3 y_t = b_t < 0 ? N - 1 : 0;
3064 S2 x_t = b_t + y_t;
3065 S1' a_t = x_t >> log2 (N);
3066
3067 S4 is replaced if N is a power of two constant and
3068 type is signed by (where *_T temporaries have unsigned type):
3069 S9 y_T = b_t < 0 ? -1U : 0U;
3070 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3071 S7 z_t = (type) z_T;
3072 S6 w_t = b_t + z_t;
3073 S5 x_t = w_t & (N - 1);
3074 S4' a_t = x_t - z_t;
3075
3076 Output:
3077
3078 * TYPE_OUT: The type of the output of this pattern.
3079
3080 * Return value: A new stmt that will be used to replace the division
3081 S1 or modulo S4 stmt. */
3082
3083 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3084 vect_recog_divmod_pattern (vec_info *vinfo,
3085 stmt_vec_info stmt_vinfo, tree *type_out)
3086 {
3087 gimple *last_stmt = stmt_vinfo->stmt;
3088 tree oprnd0, oprnd1, vectype, itype, cond;
3089 gimple *pattern_stmt, *def_stmt;
3090 enum tree_code rhs_code;
3091 optab optab;
3092 tree q;
3093 int dummy_int, prec;
3094
3095 if (!is_gimple_assign (last_stmt))
3096 return NULL;
3097
3098 rhs_code = gimple_assign_rhs_code (last_stmt);
3099 switch (rhs_code)
3100 {
3101 case TRUNC_DIV_EXPR:
3102 case EXACT_DIV_EXPR:
3103 case TRUNC_MOD_EXPR:
3104 break;
3105 default:
3106 return NULL;
3107 }
3108
3109 oprnd0 = gimple_assign_rhs1 (last_stmt);
3110 oprnd1 = gimple_assign_rhs2 (last_stmt);
3111 itype = TREE_TYPE (oprnd0);
3112 if (TREE_CODE (oprnd0) != SSA_NAME
3113 || TREE_CODE (oprnd1) != INTEGER_CST
3114 || TREE_CODE (itype) != INTEGER_TYPE
3115 || !type_has_mode_precision_p (itype))
3116 return NULL;
3117
3118 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3119 vectype = get_vectype_for_scalar_type (vinfo, itype);
3120 if (vectype == NULL_TREE)
3121 return NULL;
3122
3123 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3124 {
3125 /* If the target can handle vectorized division or modulo natively,
3126 don't attempt to optimize this, since native division is likely
3127 to give smaller code. */
3128 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3129 if (optab != unknown_optab)
3130 {
3131 machine_mode vec_mode = TYPE_MODE (vectype);
3132 int icode = (int) optab_handler (optab, vec_mode);
3133 if (icode != CODE_FOR_nothing)
3134 return NULL;
3135 }
3136 }
3137
3138 prec = TYPE_PRECISION (itype);
3139 if (integer_pow2p (oprnd1))
3140 {
3141 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3142 return NULL;
3143
3144 /* Pattern detected. */
3145 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3146
3147 *type_out = vectype;
3148
3149 /* Check if the target supports this internal function. */
3150 internal_fn ifn = IFN_DIV_POW2;
3151 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3152 {
3153 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3154
3155 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3156 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3157 gimple_call_set_lhs (div_stmt, var_div);
3158
3159 if (rhs_code == TRUNC_MOD_EXPR)
3160 {
3161 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3162 def_stmt
3163 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3164 LSHIFT_EXPR, var_div, shift);
3165 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3166 pattern_stmt
3167 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 MINUS_EXPR, oprnd0,
3169 gimple_assign_lhs (def_stmt));
3170 }
3171 else
3172 pattern_stmt = div_stmt;
3173 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3174
3175 return pattern_stmt;
3176 }
3177
3178 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3179 build_int_cst (itype, 0));
3180 if (rhs_code == TRUNC_DIV_EXPR
3181 || rhs_code == EXACT_DIV_EXPR)
3182 {
3183 tree var = vect_recog_temp_ssa_var (itype, NULL);
3184 tree shift;
3185 def_stmt
3186 = gimple_build_assign (var, COND_EXPR, cond,
3187 fold_build2 (MINUS_EXPR, itype, oprnd1,
3188 build_int_cst (itype, 1)),
3189 build_int_cst (itype, 0));
3190 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3191 var = vect_recog_temp_ssa_var (itype, NULL);
3192 def_stmt
3193 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3194 gimple_assign_lhs (def_stmt));
3195 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3196
3197 shift = build_int_cst (itype, tree_log2 (oprnd1));
3198 pattern_stmt
3199 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3200 RSHIFT_EXPR, var, shift);
3201 }
3202 else
3203 {
3204 tree signmask;
3205 if (compare_tree_int (oprnd1, 2) == 0)
3206 {
3207 signmask = vect_recog_temp_ssa_var (itype, NULL);
3208 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3209 build_int_cst (itype, 1),
3210 build_int_cst (itype, 0));
3211 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3212 }
3213 else
3214 {
3215 tree utype
3216 = build_nonstandard_integer_type (prec, 1);
3217 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3218 tree shift
3219 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3220 - tree_log2 (oprnd1));
3221 tree var = vect_recog_temp_ssa_var (utype, NULL);
3222
3223 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3224 build_int_cst (utype, -1),
3225 build_int_cst (utype, 0));
3226 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3227 var = vect_recog_temp_ssa_var (utype, NULL);
3228 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3229 gimple_assign_lhs (def_stmt),
3230 shift);
3231 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3232 signmask = vect_recog_temp_ssa_var (itype, NULL);
3233 def_stmt
3234 = gimple_build_assign (signmask, NOP_EXPR, var);
3235 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3236 }
3237 def_stmt
3238 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3239 PLUS_EXPR, oprnd0, signmask);
3240 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3241 def_stmt
3242 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3243 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3244 fold_build2 (MINUS_EXPR, itype, oprnd1,
3245 build_int_cst (itype, 1)));
3246 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3247
3248 pattern_stmt
3249 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3250 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3251 signmask);
3252 }
3253
3254 return pattern_stmt;
3255 }
3256
3257 if (prec > HOST_BITS_PER_WIDE_INT
3258 || integer_zerop (oprnd1))
3259 return NULL;
3260
3261 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3262 return NULL;
3263
3264 if (TYPE_UNSIGNED (itype))
3265 {
3266 unsigned HOST_WIDE_INT mh, ml;
3267 int pre_shift, post_shift;
3268 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3269 & GET_MODE_MASK (itype_mode));
3270 tree t1, t2, t3, t4;
3271
3272 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3273 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3274 return NULL;
3275
3276 /* Find a suitable multiplier and right shift count
3277 instead of multiplying with D. */
3278 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3279
3280 /* If the suggested multiplier is more than SIZE bits, we can do better
3281 for even divisors, using an initial right shift. */
3282 if (mh != 0 && (d & 1) == 0)
3283 {
3284 pre_shift = ctz_or_zero (d);
3285 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3286 &ml, &post_shift, &dummy_int);
3287 gcc_assert (!mh);
3288 }
3289 else
3290 pre_shift = 0;
3291
3292 if (mh != 0)
3293 {
3294 if (post_shift - 1 >= prec)
3295 return NULL;
3296
3297 /* t1 = oprnd0 h* ml;
3298 t2 = oprnd0 - t1;
3299 t3 = t2 >> 1;
3300 t4 = t1 + t3;
3301 q = t4 >> (post_shift - 1); */
3302 t1 = vect_recog_temp_ssa_var (itype, NULL);
3303 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3304 build_int_cst (itype, ml));
3305 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3306
3307 t2 = vect_recog_temp_ssa_var (itype, NULL);
3308 def_stmt
3309 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3310 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3311
3312 t3 = vect_recog_temp_ssa_var (itype, NULL);
3313 def_stmt
3314 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3315 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3316
3317 t4 = vect_recog_temp_ssa_var (itype, NULL);
3318 def_stmt
3319 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3320
3321 if (post_shift != 1)
3322 {
3323 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3324
3325 q = vect_recog_temp_ssa_var (itype, NULL);
3326 pattern_stmt
3327 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3328 build_int_cst (itype, post_shift - 1));
3329 }
3330 else
3331 {
3332 q = t4;
3333 pattern_stmt = def_stmt;
3334 }
3335 }
3336 else
3337 {
3338 if (pre_shift >= prec || post_shift >= prec)
3339 return NULL;
3340
3341 /* t1 = oprnd0 >> pre_shift;
3342 t2 = t1 h* ml;
3343 q = t2 >> post_shift; */
3344 if (pre_shift)
3345 {
3346 t1 = vect_recog_temp_ssa_var (itype, NULL);
3347 def_stmt
3348 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3349 build_int_cst (NULL, pre_shift));
3350 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3351 }
3352 else
3353 t1 = oprnd0;
3354
3355 t2 = vect_recog_temp_ssa_var (itype, NULL);
3356 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3357 build_int_cst (itype, ml));
3358
3359 if (post_shift)
3360 {
3361 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3362
3363 q = vect_recog_temp_ssa_var (itype, NULL);
3364 def_stmt
3365 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3366 build_int_cst (itype, post_shift));
3367 }
3368 else
3369 q = t2;
3370
3371 pattern_stmt = def_stmt;
3372 }
3373 }
3374 else
3375 {
3376 unsigned HOST_WIDE_INT ml;
3377 int post_shift;
3378 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3379 unsigned HOST_WIDE_INT abs_d;
3380 bool add = false;
3381 tree t1, t2, t3, t4;
3382
3383 /* Give up for -1. */
3384 if (d == -1)
3385 return NULL;
3386
3387 /* Since d might be INT_MIN, we have to cast to
3388 unsigned HOST_WIDE_INT before negating to avoid
3389 undefined signed overflow. */
3390 abs_d = (d >= 0
3391 ? (unsigned HOST_WIDE_INT) d
3392 : - (unsigned HOST_WIDE_INT) d);
3393
3394 /* n rem d = n rem -d */
3395 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3396 {
3397 d = abs_d;
3398 oprnd1 = build_int_cst (itype, abs_d);
3399 }
3400 if (HOST_BITS_PER_WIDE_INT >= prec
3401 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3402 /* This case is not handled correctly below. */
3403 return NULL;
3404
3405 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3406 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3407 {
3408 add = true;
3409 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3410 }
3411 if (post_shift >= prec)
3412 return NULL;
3413
3414 /* t1 = oprnd0 h* ml; */
3415 t1 = vect_recog_temp_ssa_var (itype, NULL);
3416 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3417 build_int_cst (itype, ml));
3418
3419 if (add)
3420 {
3421 /* t2 = t1 + oprnd0; */
3422 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3423 t2 = vect_recog_temp_ssa_var (itype, NULL);
3424 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3425 }
3426 else
3427 t2 = t1;
3428
3429 if (post_shift)
3430 {
3431 /* t3 = t2 >> post_shift; */
3432 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3433 t3 = vect_recog_temp_ssa_var (itype, NULL);
3434 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3435 build_int_cst (itype, post_shift));
3436 }
3437 else
3438 t3 = t2;
3439
3440 wide_int oprnd0_min, oprnd0_max;
3441 int msb = 1;
3442 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3443 {
3444 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3445 msb = 0;
3446 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3447 msb = -1;
3448 }
3449
3450 if (msb == 0 && d >= 0)
3451 {
3452 /* q = t3; */
3453 q = t3;
3454 pattern_stmt = def_stmt;
3455 }
3456 else
3457 {
3458 /* t4 = oprnd0 >> (prec - 1);
3459 or if we know from VRP that oprnd0 >= 0
3460 t4 = 0;
3461 or if we know from VRP that oprnd0 < 0
3462 t4 = -1; */
3463 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3464 t4 = vect_recog_temp_ssa_var (itype, NULL);
3465 if (msb != 1)
3466 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3467 build_int_cst (itype, msb));
3468 else
3469 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3470 build_int_cst (itype, prec - 1));
3471 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3472
3473 /* q = t3 - t4; or q = t4 - t3; */
3474 q = vect_recog_temp_ssa_var (itype, NULL);
3475 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3476 d < 0 ? t3 : t4);
3477 }
3478 }
3479
3480 if (rhs_code == TRUNC_MOD_EXPR)
3481 {
3482 tree r, t1;
3483
3484 /* We divided. Now finish by:
3485 t1 = q * oprnd1;
3486 r = oprnd0 - t1; */
3487 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3488
3489 t1 = vect_recog_temp_ssa_var (itype, NULL);
3490 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3491 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3492
3493 r = vect_recog_temp_ssa_var (itype, NULL);
3494 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3495 }
3496
3497 /* Pattern detected. */
3498 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3499
3500 *type_out = vectype;
3501 return pattern_stmt;
3502 }
3503
3504 /* Function vect_recog_mixed_size_cond_pattern
3505
3506 Try to find the following pattern:
3507
3508 type x_t, y_t;
3509 TYPE a_T, b_T, c_T;
3510 loop:
3511 S1 a_T = x_t CMP y_t ? b_T : c_T;
3512
3513 where type 'TYPE' is an integral type which has different size
3514 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3515 than 'type', the constants need to fit into an integer type
3516 with the same width as 'type') or results of conversion from 'type'.
3517
3518 Input:
3519
3520 * STMT_VINFO: The stmt from which the pattern search begins.
3521
3522 Output:
3523
3524 * TYPE_OUT: The type of the output of this pattern.
3525
3526 * Return value: A new stmt that will be used to replace the pattern.
3527 Additionally a def_stmt is added.
3528
3529 a_it = x_t CMP y_t ? b_it : c_it;
3530 a_T = (TYPE) a_it; */
3531
3532 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3533 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3534 stmt_vec_info stmt_vinfo, tree *type_out)
3535 {
3536 gimple *last_stmt = stmt_vinfo->stmt;
3537 tree cond_expr, then_clause, else_clause;
3538 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3539 gimple *pattern_stmt, *def_stmt;
3540 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3541 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3542 bool promotion;
3543 tree comp_scalar_type;
3544
3545 if (!is_gimple_assign (last_stmt)
3546 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3547 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3548 return NULL;
3549
3550 cond_expr = gimple_assign_rhs1 (last_stmt);
3551 then_clause = gimple_assign_rhs2 (last_stmt);
3552 else_clause = gimple_assign_rhs3 (last_stmt);
3553
3554 if (!COMPARISON_CLASS_P (cond_expr))
3555 return NULL;
3556
3557 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3558 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3559 if (comp_vectype == NULL_TREE)
3560 return NULL;
3561
3562 type = gimple_expr_type (last_stmt);
3563 if (types_compatible_p (type, comp_scalar_type)
3564 || ((TREE_CODE (then_clause) != INTEGER_CST
3565 || TREE_CODE (else_clause) != INTEGER_CST)
3566 && !INTEGRAL_TYPE_P (comp_scalar_type))
3567 || !INTEGRAL_TYPE_P (type))
3568 return NULL;
3569
3570 if ((TREE_CODE (then_clause) != INTEGER_CST
3571 && !type_conversion_p (vinfo, then_clause, false,
3572 &orig_type0, &def_stmt0, &promotion))
3573 || (TREE_CODE (else_clause) != INTEGER_CST
3574 && !type_conversion_p (vinfo, else_clause, false,
3575 &orig_type1, &def_stmt1, &promotion)))
3576 return NULL;
3577
3578 if (orig_type0 && orig_type1
3579 && !types_compatible_p (orig_type0, orig_type1))
3580 return NULL;
3581
3582 if (orig_type0)
3583 {
3584 if (!types_compatible_p (orig_type0, comp_scalar_type))
3585 return NULL;
3586 then_clause = gimple_assign_rhs1 (def_stmt0);
3587 itype = orig_type0;
3588 }
3589
3590 if (orig_type1)
3591 {
3592 if (!types_compatible_p (orig_type1, comp_scalar_type))
3593 return NULL;
3594 else_clause = gimple_assign_rhs1 (def_stmt1);
3595 itype = orig_type1;
3596 }
3597
3598
3599 HOST_WIDE_INT cmp_mode_size
3600 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3601
3602 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3603 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3604 return NULL;
3605
3606 vectype = get_vectype_for_scalar_type (vinfo, type);
3607 if (vectype == NULL_TREE)
3608 return NULL;
3609
3610 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3611 return NULL;
3612
3613 if (itype == NULL_TREE)
3614 itype = build_nonstandard_integer_type (cmp_mode_size,
3615 TYPE_UNSIGNED (type));
3616
3617 if (itype == NULL_TREE
3618 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3619 return NULL;
3620
3621 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3622 if (vecitype == NULL_TREE)
3623 return NULL;
3624
3625 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3626 return NULL;
3627
3628 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3629 {
3630 if ((TREE_CODE (then_clause) == INTEGER_CST
3631 && !int_fits_type_p (then_clause, itype))
3632 || (TREE_CODE (else_clause) == INTEGER_CST
3633 && !int_fits_type_p (else_clause, itype)))
3634 return NULL;
3635 }
3636
3637 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3638 COND_EXPR, unshare_expr (cond_expr),
3639 fold_convert (itype, then_clause),
3640 fold_convert (itype, else_clause));
3641 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3642 NOP_EXPR, gimple_assign_lhs (def_stmt));
3643
3644 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3645 *type_out = vectype;
3646
3647 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3648
3649 return pattern_stmt;
3650 }
3651
3652
3653 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3654 true if bool VAR can and should be optimized that way. Assume it shouldn't
3655 in case it's a result of a comparison which can be directly vectorized into
3656 a vector comparison. Fills in STMTS with all stmts visited during the
3657 walk. */
3658
3659 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3660 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3661 {
3662 tree rhs1;
3663 enum tree_code rhs_code;
3664
3665 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3666 if (!def_stmt_info)
3667 return false;
3668
3669 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3670 if (!def_stmt)
3671 return false;
3672
3673 if (stmts.contains (def_stmt))
3674 return true;
3675
3676 rhs1 = gimple_assign_rhs1 (def_stmt);
3677 rhs_code = gimple_assign_rhs_code (def_stmt);
3678 switch (rhs_code)
3679 {
3680 case SSA_NAME:
3681 if (! check_bool_pattern (rhs1, vinfo, stmts))
3682 return false;
3683 break;
3684
3685 CASE_CONVERT:
3686 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3687 return false;
3688 if (! check_bool_pattern (rhs1, vinfo, stmts))
3689 return false;
3690 break;
3691
3692 case BIT_NOT_EXPR:
3693 if (! check_bool_pattern (rhs1, vinfo, stmts))
3694 return false;
3695 break;
3696
3697 case BIT_AND_EXPR:
3698 case BIT_IOR_EXPR:
3699 case BIT_XOR_EXPR:
3700 if (! check_bool_pattern (rhs1, vinfo, stmts)
3701 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3702 return false;
3703 break;
3704
3705 default:
3706 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3707 {
3708 tree vecitype, comp_vectype;
3709
3710 /* If the comparison can throw, then is_gimple_condexpr will be
3711 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3712 if (stmt_could_throw_p (cfun, def_stmt))
3713 return false;
3714
3715 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3716 if (comp_vectype == NULL_TREE)
3717 return false;
3718
3719 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3720 TREE_TYPE (rhs1));
3721 if (mask_type
3722 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3723 return false;
3724
3725 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3726 {
3727 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3728 tree itype
3729 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3730 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3731 if (vecitype == NULL_TREE)
3732 return false;
3733 }
3734 else
3735 vecitype = comp_vectype;
3736 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3737 return false;
3738 }
3739 else
3740 return false;
3741 break;
3742 }
3743
3744 bool res = stmts.add (def_stmt);
3745 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3746 gcc_assert (!res);
3747
3748 return true;
3749 }
3750
3751
3752 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3753 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3754 pattern sequence. */
3755
3756 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)3757 adjust_bool_pattern_cast (vec_info *vinfo,
3758 tree type, tree var, stmt_vec_info stmt_info)
3759 {
3760 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3761 NOP_EXPR, var);
3762 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3763 get_vectype_for_scalar_type (vinfo, type));
3764 return gimple_assign_lhs (cast_stmt);
3765 }
3766
3767 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3768 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3769 type, OUT_TYPE is the desired final integer type of the whole pattern.
3770 STMT_INFO is the info of the pattern root and is where pattern stmts should
3771 be associated with. DEFS is a map of pattern defs. */
3772
3773 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3774 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3775 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3776 {
3777 gimple *stmt = SSA_NAME_DEF_STMT (var);
3778 enum tree_code rhs_code, def_rhs_code;
3779 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3780 location_t loc;
3781 gimple *pattern_stmt, *def_stmt;
3782 tree trueval = NULL_TREE;
3783
3784 rhs1 = gimple_assign_rhs1 (stmt);
3785 rhs2 = gimple_assign_rhs2 (stmt);
3786 rhs_code = gimple_assign_rhs_code (stmt);
3787 loc = gimple_location (stmt);
3788 switch (rhs_code)
3789 {
3790 case SSA_NAME:
3791 CASE_CONVERT:
3792 irhs1 = *defs.get (rhs1);
3793 itype = TREE_TYPE (irhs1);
3794 pattern_stmt
3795 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3796 SSA_NAME, irhs1);
3797 break;
3798
3799 case BIT_NOT_EXPR:
3800 irhs1 = *defs.get (rhs1);
3801 itype = TREE_TYPE (irhs1);
3802 pattern_stmt
3803 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3804 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3805 break;
3806
3807 case BIT_AND_EXPR:
3808 /* Try to optimize x = y & (a < b ? 1 : 0); into
3809 x = (a < b ? y : 0);
3810
3811 E.g. for:
3812 bool a_b, b_b, c_b;
3813 TYPE d_T;
3814
3815 S1 a_b = x1 CMP1 y1;
3816 S2 b_b = x2 CMP2 y2;
3817 S3 c_b = a_b & b_b;
3818 S4 d_T = (TYPE) c_b;
3819
3820 we would normally emit:
3821
3822 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3823 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3824 S3' c_T = a_T & b_T;
3825 S4' d_T = c_T;
3826
3827 but we can save one stmt by using the
3828 result of one of the COND_EXPRs in the other COND_EXPR and leave
3829 BIT_AND_EXPR stmt out:
3830
3831 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3832 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3833 S4' f_T = c_T;
3834
3835 At least when VEC_COND_EXPR is implemented using masks
3836 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3837 computes the comparison masks and ands it, in one case with
3838 all ones vector, in the other case with a vector register.
3839 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3840 often more expensive. */
3841 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3842 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3843 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3844 {
3845 irhs1 = *defs.get (rhs1);
3846 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3847 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3848 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3849 {
3850 rhs_code = def_rhs_code;
3851 rhs1 = def_rhs1;
3852 rhs2 = gimple_assign_rhs2 (def_stmt);
3853 trueval = irhs1;
3854 goto do_compare;
3855 }
3856 else
3857 irhs2 = *defs.get (rhs2);
3858 goto and_ior_xor;
3859 }
3860 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3861 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3862 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3863 {
3864 irhs2 = *defs.get (rhs2);
3865 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3866 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3867 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3868 {
3869 rhs_code = def_rhs_code;
3870 rhs1 = def_rhs1;
3871 rhs2 = gimple_assign_rhs2 (def_stmt);
3872 trueval = irhs2;
3873 goto do_compare;
3874 }
3875 else
3876 irhs1 = *defs.get (rhs1);
3877 goto and_ior_xor;
3878 }
3879 /* FALLTHRU */
3880 case BIT_IOR_EXPR:
3881 case BIT_XOR_EXPR:
3882 irhs1 = *defs.get (rhs1);
3883 irhs2 = *defs.get (rhs2);
3884 and_ior_xor:
3885 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3886 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3887 {
3888 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3889 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3890 int out_prec = TYPE_PRECISION (out_type);
3891 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3892 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3893 stmt_info);
3894 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3895 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3896 stmt_info);
3897 else
3898 {
3899 irhs1 = adjust_bool_pattern_cast (vinfo,
3900 out_type, irhs1, stmt_info);
3901 irhs2 = adjust_bool_pattern_cast (vinfo,
3902 out_type, irhs2, stmt_info);
3903 }
3904 }
3905 itype = TREE_TYPE (irhs1);
3906 pattern_stmt
3907 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3908 rhs_code, irhs1, irhs2);
3909 break;
3910
3911 default:
3912 do_compare:
3913 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3914 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3915 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3916 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3917 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3918 {
3919 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3920 itype
3921 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3922 }
3923 else
3924 itype = TREE_TYPE (rhs1);
3925 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3926 if (trueval == NULL_TREE)
3927 trueval = build_int_cst (itype, 1);
3928 else
3929 gcc_checking_assert (useless_type_conversion_p (itype,
3930 TREE_TYPE (trueval)));
3931 pattern_stmt
3932 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3933 COND_EXPR, cond_expr, trueval,
3934 build_int_cst (itype, 0));
3935 break;
3936 }
3937
3938 gimple_set_location (pattern_stmt, loc);
3939 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3940 get_vectype_for_scalar_type (vinfo, itype));
3941 defs.put (var, gimple_assign_lhs (pattern_stmt));
3942 }
3943
3944 /* Comparison function to qsort a vector of gimple stmts after UID. */
3945
3946 static int
sort_after_uid(const void * p1,const void * p2)3947 sort_after_uid (const void *p1, const void *p2)
3948 {
3949 const gimple *stmt1 = *(const gimple * const *)p1;
3950 const gimple *stmt2 = *(const gimple * const *)p2;
3951 return gimple_uid (stmt1) - gimple_uid (stmt2);
3952 }
3953
3954 /* Create pattern stmts for all stmts participating in the bool pattern
3955 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3956 OUT_TYPE. Return the def of the pattern root. */
3957
3958 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3959 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3960 tree out_type, stmt_vec_info stmt_info)
3961 {
3962 /* Gather original stmts in the bool pattern in their order of appearance
3963 in the IL. */
3964 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3965 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3966 i != bool_stmt_set.end (); ++i)
3967 bool_stmts.quick_push (*i);
3968 bool_stmts.qsort (sort_after_uid);
3969
3970 /* Now process them in that order, producing pattern stmts. */
3971 hash_map <tree, tree> defs;
3972 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3973 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3974 out_type, stmt_info, defs);
3975
3976 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3977 gimple *pattern_stmt
3978 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3979 return gimple_assign_lhs (pattern_stmt);
3980 }
3981
3982 /* Return the proper type for converting bool VAR into
3983 an integer value or NULL_TREE if no such type exists.
3984 The type is chosen so that the converted value has the
3985 same number of elements as VAR's vector type. */
3986
3987 static tree
integer_type_for_mask(tree var,vec_info * vinfo)3988 integer_type_for_mask (tree var, vec_info *vinfo)
3989 {
3990 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3991 return NULL_TREE;
3992
3993 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3994 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3995 return NULL_TREE;
3996
3997 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3998 }
3999
4000 /* Function vect_recog_bool_pattern
4001
4002 Try to find pattern like following:
4003
4004 bool a_b, b_b, c_b, d_b, e_b;
4005 TYPE f_T;
4006 loop:
4007 S1 a_b = x1 CMP1 y1;
4008 S2 b_b = x2 CMP2 y2;
4009 S3 c_b = a_b & b_b;
4010 S4 d_b = x3 CMP3 y3;
4011 S5 e_b = c_b | d_b;
4012 S6 f_T = (TYPE) e_b;
4013
4014 where type 'TYPE' is an integral type. Or a similar pattern
4015 ending in
4016
4017 S6 f_Y = e_b ? r_Y : s_Y;
4018
4019 as results from if-conversion of a complex condition.
4020
4021 Input:
4022
4023 * STMT_VINFO: The stmt at the end from which the pattern
4024 search begins, i.e. cast of a bool to
4025 an integer type.
4026
4027 Output:
4028
4029 * TYPE_OUT: The type of the output of this pattern.
4030
4031 * Return value: A new stmt that will be used to replace the pattern.
4032
4033 Assuming size of TYPE is the same as size of all comparisons
4034 (otherwise some casts would be added where needed), the above
4035 sequence we create related pattern stmts:
4036 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4037 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4038 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4039 S5' e_T = c_T | d_T;
4040 S6' f_T = e_T;
4041
4042 Instead of the above S3' we could emit:
4043 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4044 S3' c_T = a_T | b_T;
4045 but the above is more efficient. */
4046
4047 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4048 vect_recog_bool_pattern (vec_info *vinfo,
4049 stmt_vec_info stmt_vinfo, tree *type_out)
4050 {
4051 gimple *last_stmt = stmt_vinfo->stmt;
4052 enum tree_code rhs_code;
4053 tree var, lhs, rhs, vectype;
4054 gimple *pattern_stmt;
4055
4056 if (!is_gimple_assign (last_stmt))
4057 return NULL;
4058
4059 var = gimple_assign_rhs1 (last_stmt);
4060 lhs = gimple_assign_lhs (last_stmt);
4061 rhs_code = gimple_assign_rhs_code (last_stmt);
4062
4063 if (rhs_code == VIEW_CONVERT_EXPR)
4064 var = TREE_OPERAND (var, 0);
4065
4066 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4067 return NULL;
4068
4069 hash_set<gimple *> bool_stmts;
4070
4071 if (CONVERT_EXPR_CODE_P (rhs_code)
4072 || rhs_code == VIEW_CONVERT_EXPR)
4073 {
4074 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4075 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4076 return NULL;
4077 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4078 if (vectype == NULL_TREE)
4079 return NULL;
4080
4081 if (check_bool_pattern (var, vinfo, bool_stmts))
4082 {
4083 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4084 TREE_TYPE (lhs), stmt_vinfo);
4085 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4086 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4087 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4088 else
4089 pattern_stmt
4090 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4091 }
4092 else
4093 {
4094 tree type = integer_type_for_mask (var, vinfo);
4095 tree cst0, cst1, tmp;
4096
4097 if (!type)
4098 return NULL;
4099
4100 /* We may directly use cond with narrowed type to avoid
4101 multiple cond exprs with following result packing and
4102 perform single cond with packed mask instead. In case
4103 of widening we better make cond first and then extract
4104 results. */
4105 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4106 type = TREE_TYPE (lhs);
4107
4108 cst0 = build_int_cst (type, 0);
4109 cst1 = build_int_cst (type, 1);
4110 tmp = vect_recog_temp_ssa_var (type, NULL);
4111 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4112
4113 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4114 {
4115 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4116 append_pattern_def_seq (vinfo, stmt_vinfo,
4117 pattern_stmt, new_vectype);
4118
4119 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4120 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4121 }
4122 }
4123
4124 *type_out = vectype;
4125 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4126
4127 return pattern_stmt;
4128 }
4129 else if (rhs_code == COND_EXPR
4130 && TREE_CODE (var) == SSA_NAME)
4131 {
4132 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4133 if (vectype == NULL_TREE)
4134 return NULL;
4135
4136 /* Build a scalar type for the boolean result that when
4137 vectorized matches the vector type of the result in
4138 size and number of elements. */
4139 unsigned prec
4140 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4141 TYPE_VECTOR_SUBPARTS (vectype));
4142
4143 tree type
4144 = build_nonstandard_integer_type (prec,
4145 TYPE_UNSIGNED (TREE_TYPE (var)));
4146 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4147 return NULL;
4148
4149 if (!check_bool_pattern (var, vinfo, bool_stmts))
4150 return NULL;
4151
4152 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4153
4154 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4155 pattern_stmt
4156 = gimple_build_assign (lhs, COND_EXPR,
4157 build2 (NE_EXPR, boolean_type_node,
4158 rhs, build_int_cst (type, 0)),
4159 gimple_assign_rhs2 (last_stmt),
4160 gimple_assign_rhs3 (last_stmt));
4161 *type_out = vectype;
4162 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4163
4164 return pattern_stmt;
4165 }
4166 else if (rhs_code == SSA_NAME
4167 && STMT_VINFO_DATA_REF (stmt_vinfo))
4168 {
4169 stmt_vec_info pattern_stmt_info;
4170 tree nunits_vectype;
4171 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4172 &nunits_vectype)
4173 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4174 return NULL;
4175
4176 if (check_bool_pattern (var, vinfo, bool_stmts))
4177 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4178 TREE_TYPE (vectype), stmt_vinfo);
4179 else
4180 {
4181 tree type = integer_type_for_mask (var, vinfo);
4182 tree cst0, cst1, new_vectype;
4183
4184 if (!type)
4185 return NULL;
4186
4187 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4188 type = TREE_TYPE (vectype);
4189
4190 cst0 = build_int_cst (type, 0);
4191 cst1 = build_int_cst (type, 1);
4192 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4193
4194 rhs = vect_recog_temp_ssa_var (type, NULL);
4195 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4196 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4197 }
4198
4199 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4200 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4201 {
4202 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4203 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4204 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4205 rhs = rhs2;
4206 }
4207 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4208 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4209 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4210 *type_out = vectype;
4211 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4212
4213 return pattern_stmt;
4214 }
4215 else
4216 return NULL;
4217 }
4218
4219
4220 /* A helper for vect_recog_mask_conversion_pattern. Build
4221 conversion of MASK to a type suitable for masking VECTYPE.
4222 Built statement gets required vectype and is appended to
4223 a pattern sequence of STMT_VINFO.
4224
4225 Return converted mask. */
4226
4227 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4228 build_mask_conversion (vec_info *vinfo,
4229 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4230 {
4231 gimple *stmt;
4232 tree masktype, tmp;
4233
4234 masktype = truth_type_for (vectype);
4235 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4236 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4237 append_pattern_def_seq (vinfo, stmt_vinfo,
4238 stmt, masktype, TREE_TYPE (vectype));
4239
4240 return tmp;
4241 }
4242
4243
4244 /* Function vect_recog_mask_conversion_pattern
4245
4246 Try to find statements which require boolean type
4247 converison. Additional conversion statements are
4248 added to handle such cases. For example:
4249
4250 bool m_1, m_2, m_3;
4251 int i_4, i_5;
4252 double d_6, d_7;
4253 char c_1, c_2, c_3;
4254
4255 S1 m_1 = i_4 > i_5;
4256 S2 m_2 = d_6 < d_7;
4257 S3 m_3 = m_1 & m_2;
4258 S4 c_1 = m_3 ? c_2 : c_3;
4259
4260 Will be transformed into:
4261
4262 S1 m_1 = i_4 > i_5;
4263 S2 m_2 = d_6 < d_7;
4264 S3'' m_2' = (_Bool[bitsize=32])m_2
4265 S3' m_3' = m_1 & m_2';
4266 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4267 S4' c_1' = m_3'' ? c_2 : c_3; */
4268
4269 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4270 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4271 stmt_vec_info stmt_vinfo, tree *type_out)
4272 {
4273 gimple *last_stmt = stmt_vinfo->stmt;
4274 enum tree_code rhs_code;
4275 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4276 tree vectype1, vectype2;
4277 stmt_vec_info pattern_stmt_info;
4278 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4279 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4280
4281 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4282 if (is_gimple_call (last_stmt)
4283 && gimple_call_internal_p (last_stmt))
4284 {
4285 gcall *pattern_stmt;
4286
4287 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4288 int mask_argno = internal_fn_mask_index (ifn);
4289 if (mask_argno < 0)
4290 return NULL;
4291
4292 bool store_p = internal_store_fn_p (ifn);
4293 if (store_p)
4294 {
4295 int rhs_index = internal_fn_stored_value_index (ifn);
4296 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4297 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4298 }
4299 else
4300 {
4301 lhs = gimple_call_lhs (last_stmt);
4302 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4303 }
4304
4305 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4306 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4307 if (!mask_arg_type)
4308 return NULL;
4309 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4310
4311 if (!vectype1 || !vectype2
4312 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4313 TYPE_VECTOR_SUBPARTS (vectype2)))
4314 return NULL;
4315
4316 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4317
4318 auto_vec<tree, 8> args;
4319 unsigned int nargs = gimple_call_num_args (last_stmt);
4320 args.safe_grow (nargs, true);
4321 for (unsigned int i = 0; i < nargs; ++i)
4322 args[i] = ((int) i == mask_argno
4323 ? tmp
4324 : gimple_call_arg (last_stmt, i));
4325 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4326
4327 if (!store_p)
4328 {
4329 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4330 gimple_call_set_lhs (pattern_stmt, lhs);
4331 }
4332 gimple_call_set_nothrow (pattern_stmt, true);
4333
4334 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4335 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4336 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4337
4338 *type_out = vectype1;
4339 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4340
4341 return pattern_stmt;
4342 }
4343
4344 if (!is_gimple_assign (last_stmt))
4345 return NULL;
4346
4347 gimple *pattern_stmt;
4348 lhs = gimple_assign_lhs (last_stmt);
4349 rhs1 = gimple_assign_rhs1 (last_stmt);
4350 rhs_code = gimple_assign_rhs_code (last_stmt);
4351
4352 /* Check for cond expression requiring mask conversion. */
4353 if (rhs_code == COND_EXPR)
4354 {
4355 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4356
4357 if (TREE_CODE (rhs1) == SSA_NAME)
4358 {
4359 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4360 if (!rhs1_type)
4361 return NULL;
4362 }
4363 else if (COMPARISON_CLASS_P (rhs1))
4364 {
4365 /* Check whether we're comparing scalar booleans and (if so)
4366 whether a better mask type exists than the mask associated
4367 with boolean-sized elements. This avoids unnecessary packs
4368 and unpacks if the booleans are set from comparisons of
4369 wider types. E.g. in:
4370
4371 int x1, x2, x3, x4, y1, y1;
4372 ...
4373 bool b1 = (x1 == x2);
4374 bool b2 = (x3 == x4);
4375 ... = b1 == b2 ? y1 : y2;
4376
4377 it is better for b1 and b2 to use the mask type associated
4378 with int elements rather bool (byte) elements. */
4379 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4380 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4381 if (!rhs1_op0 || !rhs1_op1)
4382 return NULL;
4383 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4384 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4385
4386 if (!rhs1_op0_type)
4387 rhs1_type = TREE_TYPE (rhs1_op0);
4388 else if (!rhs1_op1_type)
4389 rhs1_type = TREE_TYPE (rhs1_op1);
4390 else if (TYPE_PRECISION (rhs1_op0_type)
4391 != TYPE_PRECISION (rhs1_op1_type))
4392 {
4393 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4394 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4395 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4396 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4397 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4398 {
4399 if (abs (tmp0) > abs (tmp1))
4400 rhs1_type = rhs1_op1_type;
4401 else
4402 rhs1_type = rhs1_op0_type;
4403 }
4404 else
4405 rhs1_type = build_nonstandard_integer_type
4406 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4407 }
4408 else
4409 rhs1_type = rhs1_op0_type;
4410 }
4411 else
4412 return NULL;
4413
4414 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4415
4416 if (!vectype1 || !vectype2)
4417 return NULL;
4418
4419 /* Continue if a conversion is needed. Also continue if we have
4420 a comparison whose vector type would normally be different from
4421 VECTYPE2 when considered in isolation. In that case we'll
4422 replace the comparison with an SSA name (so that we can record
4423 its vector type) and behave as though the comparison was an SSA
4424 name from the outset. */
4425 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4426 TYPE_VECTOR_SUBPARTS (vectype2))
4427 && !rhs1_op0_type
4428 && !rhs1_op1_type)
4429 return NULL;
4430
4431 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4432 in place, we can handle it in vectorizable_condition. This avoids
4433 unnecessary promotion stmts and increased vectorization factor. */
4434 if (COMPARISON_CLASS_P (rhs1)
4435 && INTEGRAL_TYPE_P (rhs1_type)
4436 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4437 TYPE_VECTOR_SUBPARTS (vectype2)))
4438 {
4439 enum vect_def_type dt;
4440 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4441 && dt == vect_external_def
4442 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4443 && (dt == vect_external_def
4444 || dt == vect_constant_def))
4445 {
4446 tree wide_scalar_type = build_nonstandard_integer_type
4447 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4448 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4449 wide_scalar_type);
4450 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4451 return NULL;
4452 }
4453 }
4454
4455 /* If rhs1 is a comparison we need to move it into a
4456 separate statement. */
4457 if (TREE_CODE (rhs1) != SSA_NAME)
4458 {
4459 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4460 if (rhs1_op0_type
4461 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4462 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4463 vectype2, stmt_vinfo);
4464 if (rhs1_op1_type
4465 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4466 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4467 vectype2, stmt_vinfo);
4468 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4469 rhs1_op0, rhs1_op1);
4470 rhs1 = tmp;
4471 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4472 rhs1_type);
4473 }
4474
4475 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4476 TYPE_VECTOR_SUBPARTS (vectype2)))
4477 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4478 else
4479 tmp = rhs1;
4480
4481 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4482 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4483 gimple_assign_rhs2 (last_stmt),
4484 gimple_assign_rhs3 (last_stmt));
4485
4486 *type_out = vectype1;
4487 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4488
4489 return pattern_stmt;
4490 }
4491
4492 /* Now check for binary boolean operations requiring conversion for
4493 one of operands. */
4494 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4495 return NULL;
4496
4497 if (rhs_code != BIT_IOR_EXPR
4498 && rhs_code != BIT_XOR_EXPR
4499 && rhs_code != BIT_AND_EXPR
4500 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4501 return NULL;
4502
4503 rhs2 = gimple_assign_rhs2 (last_stmt);
4504
4505 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4506 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4507
4508 if (!rhs1_type || !rhs2_type
4509 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4510 return NULL;
4511
4512 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4513 {
4514 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4515 if (!vectype1)
4516 return NULL;
4517 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4518 }
4519 else
4520 {
4521 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4522 if (!vectype1)
4523 return NULL;
4524 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4525 }
4526
4527 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4528 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4529
4530 *type_out = vectype1;
4531 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4532
4533 return pattern_stmt;
4534 }
4535
4536 /* STMT_INFO is a load or store. If the load or store is conditional, return
4537 the boolean condition under which it occurs, otherwise return null. */
4538
4539 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4540 vect_get_load_store_mask (stmt_vec_info stmt_info)
4541 {
4542 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4543 {
4544 gcc_assert (gimple_assign_single_p (def_assign));
4545 return NULL_TREE;
4546 }
4547
4548 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4549 {
4550 internal_fn ifn = gimple_call_internal_fn (def_call);
4551 int mask_index = internal_fn_mask_index (ifn);
4552 return gimple_call_arg (def_call, mask_index);
4553 }
4554
4555 gcc_unreachable ();
4556 }
4557
4558 /* Return MASK if MASK is suitable for masking an operation on vectors
4559 of type VECTYPE, otherwise convert it into such a form and return
4560 the result. Associate any conversion statements with STMT_INFO's
4561 pattern. */
4562
4563 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4564 vect_convert_mask_for_vectype (tree mask, tree vectype,
4565 stmt_vec_info stmt_info, vec_info *vinfo)
4566 {
4567 tree mask_type = integer_type_for_mask (mask, vinfo);
4568 if (mask_type)
4569 {
4570 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4571 if (mask_vectype
4572 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4573 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4574 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4575 }
4576 return mask;
4577 }
4578
4579 /* Return the equivalent of:
4580
4581 fold_convert (TYPE, VALUE)
4582
4583 with the expectation that the operation will be vectorized.
4584 If new statements are needed, add them as pattern statements
4585 to STMT_INFO. */
4586
4587 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4588 vect_add_conversion_to_pattern (vec_info *vinfo,
4589 tree type, tree value, stmt_vec_info stmt_info)
4590 {
4591 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4592 return value;
4593
4594 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4595 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4596 append_pattern_def_seq (vinfo, stmt_info, conversion,
4597 get_vectype_for_scalar_type (vinfo, type));
4598 return new_value;
4599 }
4600
4601 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4602 internal function. Return the final statement on success and set
4603 *TYPE_OUT to the vector type being loaded or stored.
4604
4605 This function only handles gathers and scatters that were recognized
4606 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4607
4608 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4609 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4610 stmt_vec_info stmt_info, tree *type_out)
4611 {
4612 /* Currently we only support this for loop vectorization. */
4613 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4614 if (!loop_vinfo)
4615 return NULL;
4616
4617 /* Make sure that we're looking at a gather load or scatter store. */
4618 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4619 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4620 return NULL;
4621
4622 /* Get the boolean that controls whether the load or store happens.
4623 This is null if the operation is unconditional. */
4624 tree mask = vect_get_load_store_mask (stmt_info);
4625
4626 /* Make sure that the target supports an appropriate internal
4627 function for the gather/scatter operation. */
4628 gather_scatter_info gs_info;
4629 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4630 || gs_info.decl)
4631 return NULL;
4632
4633 /* Convert the mask to the right form. */
4634 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4635 gs_info.element_type);
4636 if (mask)
4637 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4638 loop_vinfo);
4639
4640 /* Get the invariant base and non-invariant offset, converting the
4641 latter to the same width as the vector elements. */
4642 tree base = gs_info.base;
4643 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4644 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4645 gs_info.offset, stmt_info);
4646
4647 /* Build the new pattern statement. */
4648 tree scale = size_int (gs_info.scale);
4649 gcall *pattern_stmt;
4650 if (DR_IS_READ (dr))
4651 {
4652 tree zero = build_zero_cst (gs_info.element_type);
4653 if (mask != NULL)
4654 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4655 offset, scale, zero, mask);
4656 else
4657 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4658 offset, scale, zero);
4659 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4660 gimple_call_set_lhs (pattern_stmt, load_lhs);
4661 }
4662 else
4663 {
4664 tree rhs = vect_get_store_rhs (stmt_info);
4665 if (mask != NULL)
4666 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4667 base, offset, scale, rhs,
4668 mask);
4669 else
4670 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4671 base, offset, scale, rhs);
4672 }
4673 gimple_call_set_nothrow (pattern_stmt, true);
4674
4675 /* Copy across relevant vectorization info and associate DR with the
4676 new pattern statement instead of the original statement. */
4677 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4678 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4679
4680 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4681 *type_out = vectype;
4682 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4683
4684 return pattern_stmt;
4685 }
4686
4687 /* Return true if TYPE is a non-boolean integer type. These are the types
4688 that we want to consider for narrowing. */
4689
4690 static bool
vect_narrowable_type_p(tree type)4691 vect_narrowable_type_p (tree type)
4692 {
4693 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4694 }
4695
4696 /* Return true if the operation given by CODE can be truncated to N bits
4697 when only N bits of the output are needed. This is only true if bit N+1
4698 of the inputs has no effect on the low N bits of the result. */
4699
4700 static bool
vect_truncatable_operation_p(tree_code code)4701 vect_truncatable_operation_p (tree_code code)
4702 {
4703 switch (code)
4704 {
4705 case PLUS_EXPR:
4706 case MINUS_EXPR:
4707 case MULT_EXPR:
4708 case BIT_AND_EXPR:
4709 case BIT_IOR_EXPR:
4710 case BIT_XOR_EXPR:
4711 case COND_EXPR:
4712 return true;
4713
4714 default:
4715 return false;
4716 }
4717 }
4718
4719 /* Record that STMT_INFO could be changed from operating on TYPE to
4720 operating on a type with the precision and sign given by PRECISION
4721 and SIGN respectively. PRECISION is an arbitrary bit precision;
4722 it might not be a whole number of bytes. */
4723
4724 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4725 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4726 unsigned int precision, signop sign)
4727 {
4728 /* Round the precision up to a whole number of bytes. */
4729 precision = vect_element_precision (precision);
4730 if (precision < TYPE_PRECISION (type)
4731 && (!stmt_info->operation_precision
4732 || stmt_info->operation_precision > precision))
4733 {
4734 stmt_info->operation_precision = precision;
4735 stmt_info->operation_sign = sign;
4736 }
4737 }
4738
4739 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4740 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4741 is an arbitrary bit precision; it might not be a whole number of bytes. */
4742
4743 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4744 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4745 unsigned int min_input_precision)
4746 {
4747 /* This operation in isolation only requires the inputs to have
4748 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4749 that MIN_INPUT_PRECISION is a natural precision for the chain
4750 as a whole. E.g. consider something like:
4751
4752 unsigned short *x, *y;
4753 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4754
4755 The right shift can be done on unsigned chars, and only requires the
4756 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4757 approach would mean turning a natural chain of single-vector unsigned
4758 short operations into one that truncates "*x" and then extends
4759 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4760 operation and one vector for each unsigned char operation.
4761 This would be a significant pessimization.
4762
4763 Instead only propagate the maximum of this precision and the precision
4764 required by the users of the result. This means that we don't pessimize
4765 the case above but continue to optimize things like:
4766
4767 unsigned char *y;
4768 unsigned short *x;
4769 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4770
4771 Here we would truncate two vectors of *x to a single vector of
4772 unsigned chars and use single-vector unsigned char operations for
4773 everything else, rather than doing two unsigned short copies of
4774 "(*x & 0xf0) >> 4" and then truncating the result. */
4775 min_input_precision = MAX (min_input_precision,
4776 stmt_info->min_output_precision);
4777
4778 if (min_input_precision < TYPE_PRECISION (type)
4779 && (!stmt_info->min_input_precision
4780 || stmt_info->min_input_precision > min_input_precision))
4781 stmt_info->min_input_precision = min_input_precision;
4782 }
4783
4784 /* Subroutine of vect_determine_min_output_precision. Return true if
4785 we can calculate a reduced number of output bits for STMT_INFO,
4786 whose result is LHS. */
4787
4788 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)4789 vect_determine_min_output_precision_1 (vec_info *vinfo,
4790 stmt_vec_info stmt_info, tree lhs)
4791 {
4792 /* Take the maximum precision required by users of the result. */
4793 unsigned int precision = 0;
4794 imm_use_iterator iter;
4795 use_operand_p use;
4796 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4797 {
4798 gimple *use_stmt = USE_STMT (use);
4799 if (is_gimple_debug (use_stmt))
4800 continue;
4801 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4802 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4803 return false;
4804 /* The input precision recorded for COND_EXPRs applies only to the
4805 "then" and "else" values. */
4806 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4807 if (assign
4808 && gimple_assign_rhs_code (assign) == COND_EXPR
4809 && use->use != gimple_assign_rhs2_ptr (assign)
4810 && use->use != gimple_assign_rhs3_ptr (assign))
4811 return false;
4812 precision = MAX (precision, use_stmt_info->min_input_precision);
4813 }
4814
4815 if (dump_enabled_p ())
4816 dump_printf_loc (MSG_NOTE, vect_location,
4817 "only the low %d bits of %T are significant\n",
4818 precision, lhs);
4819 stmt_info->min_output_precision = precision;
4820 return true;
4821 }
4822
4823 /* Calculate min_output_precision for STMT_INFO. */
4824
4825 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)4826 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4827 {
4828 /* We're only interested in statements with a narrowable result. */
4829 tree lhs = gimple_get_lhs (stmt_info->stmt);
4830 if (!lhs
4831 || TREE_CODE (lhs) != SSA_NAME
4832 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4833 return;
4834
4835 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4836 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4837 }
4838
4839 /* Use range information to decide whether STMT (described by STMT_INFO)
4840 could be done in a narrower type. This is effectively a forward
4841 propagation, since it uses context-independent information that applies
4842 to all users of an SSA name. */
4843
4844 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4845 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4846 {
4847 tree lhs = gimple_assign_lhs (stmt);
4848 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4849 return;
4850
4851 tree type = TREE_TYPE (lhs);
4852 if (!vect_narrowable_type_p (type))
4853 return;
4854
4855 /* First see whether we have any useful range information for the result. */
4856 unsigned int precision = TYPE_PRECISION (type);
4857 signop sign = TYPE_SIGN (type);
4858 wide_int min_value, max_value;
4859 if (!vect_get_range_info (lhs, &min_value, &max_value))
4860 return;
4861
4862 tree_code code = gimple_assign_rhs_code (stmt);
4863 unsigned int nops = gimple_num_ops (stmt);
4864
4865 if (!vect_truncatable_operation_p (code))
4866 /* Check that all relevant input operands are compatible, and update
4867 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4868 for (unsigned int i = 1; i < nops; ++i)
4869 {
4870 tree op = gimple_op (stmt, i);
4871 if (TREE_CODE (op) == INTEGER_CST)
4872 {
4873 /* Don't require the integer to have RHS_TYPE (which it might
4874 not for things like shift amounts, etc.), but do require it
4875 to fit the type. */
4876 if (!int_fits_type_p (op, type))
4877 return;
4878
4879 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4880 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4881 }
4882 else if (TREE_CODE (op) == SSA_NAME)
4883 {
4884 /* Ignore codes that don't take uniform arguments. */
4885 if (!types_compatible_p (TREE_TYPE (op), type))
4886 return;
4887
4888 wide_int op_min_value, op_max_value;
4889 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4890 return;
4891
4892 min_value = wi::min (min_value, op_min_value, sign);
4893 max_value = wi::max (max_value, op_max_value, sign);
4894 }
4895 else
4896 return;
4897 }
4898
4899 /* Try to switch signed types for unsigned types if we can.
4900 This is better for two reasons. First, unsigned ops tend
4901 to be cheaper than signed ops. Second, it means that we can
4902 handle things like:
4903
4904 signed char c;
4905 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4906
4907 as:
4908
4909 signed char c;
4910 unsigned short res_1 = (unsigned short) c & 0xff00;
4911 int res = (int) res_1;
4912
4913 where the intermediate result res_1 has unsigned rather than
4914 signed type. */
4915 if (sign == SIGNED && !wi::neg_p (min_value))
4916 sign = UNSIGNED;
4917
4918 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4919 unsigned int precision1 = wi::min_precision (min_value, sign);
4920 unsigned int precision2 = wi::min_precision (max_value, sign);
4921 unsigned int value_precision = MAX (precision1, precision2);
4922 if (value_precision >= precision)
4923 return;
4924
4925 if (dump_enabled_p ())
4926 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4927 " without loss of precision: %G",
4928 sign == SIGNED ? "signed" : "unsigned",
4929 value_precision, stmt);
4930
4931 vect_set_operation_type (stmt_info, type, value_precision, sign);
4932 vect_set_min_input_precision (stmt_info, type, value_precision);
4933 }
4934
4935 /* Use information about the users of STMT's result to decide whether
4936 STMT (described by STMT_INFO) could be done in a narrower type.
4937 This is effectively a backward propagation. */
4938
4939 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4940 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4941 {
4942 tree_code code = gimple_assign_rhs_code (stmt);
4943 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4944 tree type = TREE_TYPE (gimple_op (stmt, opno));
4945 if (!vect_narrowable_type_p (type))
4946 return;
4947
4948 unsigned int precision = TYPE_PRECISION (type);
4949 unsigned int operation_precision, min_input_precision;
4950 switch (code)
4951 {
4952 CASE_CONVERT:
4953 /* Only the bits that contribute to the output matter. Don't change
4954 the precision of the operation itself. */
4955 operation_precision = precision;
4956 min_input_precision = stmt_info->min_output_precision;
4957 break;
4958
4959 case LSHIFT_EXPR:
4960 case RSHIFT_EXPR:
4961 {
4962 tree shift = gimple_assign_rhs2 (stmt);
4963 if (TREE_CODE (shift) != INTEGER_CST
4964 || !wi::ltu_p (wi::to_widest (shift), precision))
4965 return;
4966 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4967 if (code == LSHIFT_EXPR)
4968 {
4969 /* Avoid creating an undefined shift.
4970
4971 ??? We could instead use min_output_precision as-is and
4972 optimize out-of-range shifts to zero. However, only
4973 degenerate testcases shift away all their useful input data,
4974 and it isn't natural to drop input operations in the middle
4975 of vectorization. This sort of thing should really be
4976 handled before vectorization. */
4977 operation_precision = MAX (stmt_info->min_output_precision,
4978 const_shift + 1);
4979 /* We need CONST_SHIFT fewer bits of the input. */
4980 min_input_precision = (MAX (operation_precision, const_shift)
4981 - const_shift);
4982 }
4983 else
4984 {
4985 /* We need CONST_SHIFT extra bits to do the operation. */
4986 operation_precision = (stmt_info->min_output_precision
4987 + const_shift);
4988 min_input_precision = operation_precision;
4989 }
4990 break;
4991 }
4992
4993 default:
4994 if (vect_truncatable_operation_p (code))
4995 {
4996 /* Input bit N has no effect on output bits N-1 and lower. */
4997 operation_precision = stmt_info->min_output_precision;
4998 min_input_precision = operation_precision;
4999 break;
5000 }
5001 return;
5002 }
5003
5004 if (operation_precision < precision)
5005 {
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5008 " without affecting users: %G",
5009 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5010 operation_precision, stmt);
5011 vect_set_operation_type (stmt_info, type, operation_precision,
5012 TYPE_SIGN (type));
5013 }
5014 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5015 }
5016
5017 /* Return true if the statement described by STMT_INFO sets a boolean
5018 SSA_NAME and if we know how to vectorize this kind of statement using
5019 vector mask types. */
5020
5021 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5022 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5023 {
5024 tree lhs = gimple_get_lhs (stmt_info->stmt);
5025 if (!lhs
5026 || TREE_CODE (lhs) != SSA_NAME
5027 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5028 return false;
5029
5030 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5031 {
5032 tree_code rhs_code = gimple_assign_rhs_code (assign);
5033 switch (rhs_code)
5034 {
5035 CASE_CONVERT:
5036 case SSA_NAME:
5037 case BIT_NOT_EXPR:
5038 case BIT_IOR_EXPR:
5039 case BIT_XOR_EXPR:
5040 case BIT_AND_EXPR:
5041 return true;
5042
5043 default:
5044 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5045 }
5046 }
5047 else if (is_a <gphi *> (stmt_info->stmt))
5048 return true;
5049 return false;
5050 }
5051
5052 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5053 a vector mask type instead of a normal vector type. Record the
5054 result in STMT_INFO->mask_precision. */
5055
5056 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5057 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5058 {
5059 if (!possible_vector_mask_operation_p (stmt_info))
5060 return;
5061
5062 /* If at least one boolean input uses a vector mask type,
5063 pick the mask type with the narrowest elements.
5064
5065 ??? This is the traditional behavior. It should always produce
5066 the smallest number of operations, but isn't necessarily the
5067 optimal choice. For example, if we have:
5068
5069 a = b & c
5070
5071 where:
5072
5073 - the user of a wants it to have a mask type for 16-bit elements (M16)
5074 - b also uses M16
5075 - c uses a mask type for 8-bit elements (M8)
5076
5077 then picking M8 gives:
5078
5079 - 1 M16->M8 pack for b
5080 - 1 M8 AND for a
5081 - 2 M8->M16 unpacks for the user of a
5082
5083 whereas picking M16 would have given:
5084
5085 - 2 M8->M16 unpacks for c
5086 - 2 M16 ANDs for a
5087
5088 The number of operations are equal, but M16 would have given
5089 a shorter dependency chain and allowed more ILP. */
5090 unsigned int precision = ~0U;
5091 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5092 {
5093 unsigned int nops = gimple_num_ops (assign);
5094 for (unsigned int i = 1; i < nops; ++i)
5095 {
5096 tree rhs = gimple_op (assign, i);
5097 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5098 continue;
5099
5100 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5101 if (!def_stmt_info)
5102 /* Don't let external or constant operands influence the choice.
5103 We can convert them to whichever vector type we pick. */
5104 continue;
5105
5106 if (def_stmt_info->mask_precision)
5107 {
5108 if (precision > def_stmt_info->mask_precision)
5109 precision = def_stmt_info->mask_precision;
5110 }
5111 }
5112
5113 /* If the statement compares two values that shouldn't use vector masks,
5114 try comparing the values as normal scalars instead. */
5115 tree_code rhs_code = gimple_assign_rhs_code (assign);
5116 if (precision == ~0U
5117 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5118 {
5119 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5120 scalar_mode mode;
5121 tree vectype, mask_type;
5122 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5123 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5124 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5125 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5126 precision = GET_MODE_BITSIZE (mode);
5127 }
5128 }
5129 else
5130 {
5131 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5132 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5133 {
5134 tree rhs = gimple_phi_arg_def (phi, i);
5135
5136 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5137 if (!def_stmt_info)
5138 /* Don't let external or constant operands influence the choice.
5139 We can convert them to whichever vector type we pick. */
5140 continue;
5141
5142 if (def_stmt_info->mask_precision)
5143 {
5144 if (precision > def_stmt_info->mask_precision)
5145 precision = def_stmt_info->mask_precision;
5146 }
5147 }
5148 }
5149
5150 if (dump_enabled_p ())
5151 {
5152 if (precision == ~0U)
5153 dump_printf_loc (MSG_NOTE, vect_location,
5154 "using normal nonmask vectors for %G",
5155 stmt_info->stmt);
5156 else
5157 dump_printf_loc (MSG_NOTE, vect_location,
5158 "using boolean precision %d for %G",
5159 precision, stmt_info->stmt);
5160 }
5161
5162 stmt_info->mask_precision = precision;
5163 }
5164
5165 /* Handle vect_determine_precisions for STMT_INFO, given that we
5166 have already done so for the users of its result. */
5167
5168 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5169 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5170 {
5171 vect_determine_min_output_precision (vinfo, stmt_info);
5172 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5173 {
5174 vect_determine_precisions_from_range (stmt_info, stmt);
5175 vect_determine_precisions_from_users (stmt_info, stmt);
5176 }
5177 }
5178
5179 /* Walk backwards through the vectorizable region to determine the
5180 values of these fields:
5181
5182 - min_output_precision
5183 - min_input_precision
5184 - operation_precision
5185 - operation_sign. */
5186
5187 void
vect_determine_precisions(vec_info * vinfo)5188 vect_determine_precisions (vec_info *vinfo)
5189 {
5190 DUMP_VECT_SCOPE ("vect_determine_precisions");
5191
5192 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5193 {
5194 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5195 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5196 unsigned int nbbs = loop->num_nodes;
5197
5198 for (unsigned int i = 0; i < nbbs; i++)
5199 {
5200 basic_block bb = bbs[i];
5201 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5202 if (!is_gimple_debug (gsi_stmt (si)))
5203 vect_determine_mask_precision
5204 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5205 }
5206 for (unsigned int i = 0; i < nbbs; i++)
5207 {
5208 basic_block bb = bbs[nbbs - i - 1];
5209 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5210 !gsi_end_p (si); gsi_prev (&si))
5211 if (!is_gimple_debug (gsi_stmt (si)))
5212 vect_determine_stmt_precisions
5213 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5214 }
5215 }
5216 else
5217 {
5218 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5219 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5220 {
5221 basic_block bb = bb_vinfo->bbs[i];
5222 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5223 {
5224 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5225 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5226 vect_determine_mask_precision (vinfo, stmt_info);
5227 }
5228 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5229 {
5230 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5231 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5232 vect_determine_mask_precision (vinfo, stmt_info);
5233 }
5234 }
5235 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5236 {
5237 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5238 !gsi_end_p (gsi); gsi_prev (&gsi))
5239 {
5240 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5241 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5242 vect_determine_stmt_precisions (vinfo, stmt_info);
5243 }
5244 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5245 !gsi_end_p (gsi); gsi_next (&gsi))
5246 {
5247 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5248 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5249 vect_determine_stmt_precisions (vinfo, stmt_info);
5250 }
5251 }
5252 }
5253 }
5254
5255 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5256
5257 struct vect_recog_func
5258 {
5259 vect_recog_func_ptr fn;
5260 const char *name;
5261 };
5262
5263 /* Note that ordering matters - the first pattern matching on a stmt is
5264 taken which means usually the more complex one needs to preceed the
5265 less comples onex (widen_sum only after dot_prod or sad for example). */
5266 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5267 { vect_recog_over_widening_pattern, "over_widening" },
5268 /* Must come after over_widening, which narrows the shift as much as
5269 possible beforehand. */
5270 { vect_recog_average_pattern, "average" },
5271 { vect_recog_mulhs_pattern, "mult_high" },
5272 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5273 { vect_recog_widen_mult_pattern, "widen_mult" },
5274 { vect_recog_dot_prod_pattern, "dot_prod" },
5275 { vect_recog_sad_pattern, "sad" },
5276 { vect_recog_widen_sum_pattern, "widen_sum" },
5277 { vect_recog_pow_pattern, "pow" },
5278 { vect_recog_widen_shift_pattern, "widen_shift" },
5279 { vect_recog_rotate_pattern, "rotate" },
5280 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5281 { vect_recog_divmod_pattern, "divmod" },
5282 { vect_recog_mult_pattern, "mult" },
5283 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5284 { vect_recog_bool_pattern, "bool" },
5285 /* This must come before mask conversion, and includes the parts
5286 of mask conversion that are needed for gather and scatter
5287 internal functions. */
5288 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5289 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5290 { vect_recog_widen_plus_pattern, "widen_plus" },
5291 { vect_recog_widen_minus_pattern, "widen_minus" },
5292 };
5293
5294 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5295
5296 /* Mark statements that are involved in a pattern. */
5297
5298 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5299 vect_mark_pattern_stmts (vec_info *vinfo,
5300 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5301 tree pattern_vectype)
5302 {
5303 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5304 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5305
5306 gimple *orig_pattern_stmt = NULL;
5307 if (is_pattern_stmt_p (orig_stmt_info))
5308 {
5309 /* We're replacing a statement in an existing pattern definition
5310 sequence. */
5311 orig_pattern_stmt = orig_stmt_info->stmt;
5312 if (dump_enabled_p ())
5313 dump_printf_loc (MSG_NOTE, vect_location,
5314 "replacing earlier pattern %G", orig_pattern_stmt);
5315
5316 /* To keep the book-keeping simple, just swap the lhs of the
5317 old and new statements, so that the old one has a valid but
5318 unused lhs. */
5319 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5320 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5321 gimple_set_lhs (pattern_stmt, old_lhs);
5322
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5325
5326 /* Switch to the statement that ORIG replaces. */
5327 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5328
5329 /* We shouldn't be replacing the main pattern statement. */
5330 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5331 != orig_pattern_stmt);
5332 }
5333
5334 if (def_seq)
5335 for (gimple_stmt_iterator si = gsi_start (def_seq);
5336 !gsi_end_p (si); gsi_next (&si))
5337 {
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_NOTE, vect_location,
5340 "extra pattern stmt: %G", gsi_stmt (si));
5341 stmt_vec_info pattern_stmt_info
5342 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5343 orig_stmt_info, pattern_vectype);
5344 /* Stmts in the def sequence are not vectorizable cycle or
5345 induction defs, instead they should all be vect_internal_def
5346 feeding the main pattern stmt which retains this def type. */
5347 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5348 }
5349
5350 if (orig_pattern_stmt)
5351 {
5352 vect_init_pattern_stmt (vinfo, pattern_stmt,
5353 orig_stmt_info, pattern_vectype);
5354
5355 /* Insert all the new pattern statements before the original one. */
5356 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5357 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5358 orig_def_seq);
5359 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5360 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5361
5362 /* Remove the pattern statement that this new pattern replaces. */
5363 gsi_remove (&gsi, false);
5364 }
5365 else
5366 vect_set_pattern_stmt (vinfo,
5367 pattern_stmt, orig_stmt_info, pattern_vectype);
5368
5369 /* Transfer reduction path info to the pattern. */
5370 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5371 {
5372 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5373 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5374 /* Search the pattern def sequence and the main pattern stmt. Note
5375 we may have inserted all into a containing pattern def sequence
5376 so the following is a bit awkward. */
5377 gimple_stmt_iterator si;
5378 gimple *s;
5379 if (def_seq)
5380 {
5381 si = gsi_start (def_seq);
5382 s = gsi_stmt (si);
5383 gsi_next (&si);
5384 }
5385 else
5386 {
5387 si = gsi_none ();
5388 s = pattern_stmt;
5389 }
5390 do
5391 {
5392 bool found = false;
5393 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5394 if (gimple_op (s, i) == lookfor)
5395 {
5396 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5397 lookfor = gimple_get_lhs (s);
5398 found = true;
5399 break;
5400 }
5401 if (s == pattern_stmt)
5402 {
5403 if (!found && dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE, vect_location,
5405 "failed to update reduction index.\n");
5406 break;
5407 }
5408 if (gsi_end_p (si))
5409 s = pattern_stmt;
5410 else
5411 {
5412 s = gsi_stmt (si);
5413 if (s == pattern_stmt)
5414 /* Found the end inside a bigger pattern def seq. */
5415 si = gsi_none ();
5416 else
5417 gsi_next (&si);
5418 }
5419 } while (1);
5420 }
5421 }
5422
5423 /* Function vect_pattern_recog_1
5424
5425 Input:
5426 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5427 computation pattern.
5428 STMT_INFO: A stmt from which the pattern search should start.
5429
5430 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5431 a sequence of statements that has the same functionality and can be
5432 used to replace STMT_INFO. It returns the last statement in the sequence
5433 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5434 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5435 statement, having first checked that the target supports the new operation
5436 in that type.
5437
5438 This function also does some bookkeeping, as explained in the documentation
5439 for vect_recog_pattern. */
5440
5441 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5442 vect_pattern_recog_1 (vec_info *vinfo,
5443 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5444 {
5445 gimple *pattern_stmt;
5446 loop_vec_info loop_vinfo;
5447 tree pattern_vectype;
5448
5449 /* If this statement has already been replaced with pattern statements,
5450 leave the original statement alone, since the first match wins.
5451 Instead try to match against the definition statements that feed
5452 the main pattern statement. */
5453 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5454 {
5455 gimple_stmt_iterator gsi;
5456 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5457 !gsi_end_p (gsi); gsi_next (&gsi))
5458 vect_pattern_recog_1 (vinfo, recog_func,
5459 vinfo->lookup_stmt (gsi_stmt (gsi)));
5460 return;
5461 }
5462
5463 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5464 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5465 if (!pattern_stmt)
5466 {
5467 /* Clear any half-formed pattern definition sequence. */
5468 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5469 return;
5470 }
5471
5472 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5473 gcc_assert (pattern_vectype);
5474
5475 /* Found a vectorizable pattern. */
5476 if (dump_enabled_p ())
5477 dump_printf_loc (MSG_NOTE, vect_location,
5478 "%s pattern recognized: %G",
5479 recog_func->name, pattern_stmt);
5480
5481 /* Mark the stmts that are involved in the pattern. */
5482 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5483
5484 /* Patterns cannot be vectorized using SLP, because they change the order of
5485 computation. */
5486 if (loop_vinfo)
5487 {
5488 unsigned ix, ix2;
5489 stmt_vec_info *elem_ptr;
5490 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5491 elem_ptr, *elem_ptr == stmt_info);
5492 }
5493 }
5494
5495
5496 /* Function vect_pattern_recog
5497
5498 Input:
5499 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5500 computation idioms.
5501
5502 Output - for each computation idiom that is detected we create a new stmt
5503 that provides the same functionality and that can be vectorized. We
5504 also record some information in the struct_stmt_info of the relevant
5505 stmts, as explained below:
5506
5507 At the entry to this function we have the following stmts, with the
5508 following initial value in the STMT_VINFO fields:
5509
5510 stmt in_pattern_p related_stmt vec_stmt
5511 S1: a_i = .... - - -
5512 S2: a_2 = ..use(a_i).. - - -
5513 S3: a_1 = ..use(a_2).. - - -
5514 S4: a_0 = ..use(a_1).. - - -
5515 S5: ... = ..use(a_0).. - - -
5516
5517 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5518 represented by a single stmt. We then:
5519 - create a new stmt S6 equivalent to the pattern (the stmt is not
5520 inserted into the code)
5521 - fill in the STMT_VINFO fields as follows:
5522
5523 in_pattern_p related_stmt vec_stmt
5524 S1: a_i = .... - - -
5525 S2: a_2 = ..use(a_i).. - - -
5526 S3: a_1 = ..use(a_2).. - - -
5527 S4: a_0 = ..use(a_1).. true S6 -
5528 '---> S6: a_new = .... - S4 -
5529 S5: ... = ..use(a_0).. - - -
5530
5531 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5532 to each other through the RELATED_STMT field).
5533
5534 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5535 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5536 remain irrelevant unless used by stmts other than S4.
5537
5538 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5539 (because they are marked as irrelevant). It will vectorize S6, and record
5540 a pointer to the new vector stmt VS6 from S6 (as usual).
5541 S4 will be skipped, and S5 will be vectorized as usual:
5542
5543 in_pattern_p related_stmt vec_stmt
5544 S1: a_i = .... - - -
5545 S2: a_2 = ..use(a_i).. - - -
5546 S3: a_1 = ..use(a_2).. - - -
5547 > VS6: va_new = .... - - -
5548 S4: a_0 = ..use(a_1).. true S6 VS6
5549 '---> S6: a_new = .... - S4 VS6
5550 > VS5: ... = ..vuse(va_new).. - - -
5551 S5: ... = ..use(a_0).. - - -
5552
5553 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5554 elsewhere), and we'll end up with:
5555
5556 VS6: va_new = ....
5557 VS5: ... = ..vuse(va_new)..
5558
5559 In case of more than one pattern statements, e.g., widen-mult with
5560 intermediate type:
5561
5562 S1 a_t = ;
5563 S2 a_T = (TYPE) a_t;
5564 '--> S3: a_it = (interm_type) a_t;
5565 S4 prod_T = a_T * CONST;
5566 '--> S5: prod_T' = a_it w* CONST;
5567
5568 there may be other users of a_T outside the pattern. In that case S2 will
5569 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5570 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5571 be recorded in S3. */
5572
5573 void
vect_pattern_recog(vec_info * vinfo)5574 vect_pattern_recog (vec_info *vinfo)
5575 {
5576 class loop *loop;
5577 basic_block *bbs;
5578 unsigned int nbbs;
5579 gimple_stmt_iterator si;
5580 unsigned int i, j;
5581
5582 vect_determine_precisions (vinfo);
5583
5584 DUMP_VECT_SCOPE ("vect_pattern_recog");
5585
5586 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5587 {
5588 loop = LOOP_VINFO_LOOP (loop_vinfo);
5589 bbs = LOOP_VINFO_BBS (loop_vinfo);
5590 nbbs = loop->num_nodes;
5591
5592 /* Scan through the loop stmts, applying the pattern recognition
5593 functions starting at each stmt visited: */
5594 for (i = 0; i < nbbs; i++)
5595 {
5596 basic_block bb = bbs[i];
5597 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5598 {
5599 if (is_gimple_debug (gsi_stmt (si)))
5600 continue;
5601 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5602 /* Scan over all generic vect_recog_xxx_pattern functions. */
5603 for (j = 0; j < NUM_PATTERNS; j++)
5604 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5605 stmt_info);
5606 }
5607 }
5608 }
5609 else
5610 {
5611 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5612 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5613 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5614 !gsi_end_p (gsi); gsi_next (&gsi))
5615 {
5616 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5617 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5618 continue;
5619
5620 /* Scan over all generic vect_recog_xxx_pattern functions. */
5621 for (j = 0; j < NUM_PATTERNS; j++)
5622 vect_pattern_recog_1 (vinfo,
5623 &vect_vect_recog_func_ptrs[j], stmt_info);
5624 }
5625 }
5626
5627 /* After this no more add_stmt calls are allowed. */
5628 vinfo->stmt_vec_info_ro = true;
5629 }
5630