1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57 {
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84 }
85
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89 static void
vect_pattern_detected(const char * name,gimple * stmt)90 vect_pattern_detected (const char *name, gimple *stmt)
91 {
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
94 }
95
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
99
100 static stmt_vec_info
vect_init_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)101 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
103 {
104 vec_info *vinfo = orig_stmt_info->vinfo;
105 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
106 if (pattern_stmt_info == NULL)
107 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
108 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109
110 pattern_stmt_info->pattern_stmt_p = true;
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 {
116 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
117 == vect_use_mask_type_p (orig_stmt_info));
118 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
119 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
120 }
121 return pattern_stmt_info;
122 }
123
124 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
125 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
126 have one already. */
127
128 static void
vect_set_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)129 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
130 tree vectype)
131 {
132 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
133 STMT_VINFO_RELATED_STMT (orig_stmt_info)
134 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
135 }
136
137 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
138 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
139 be different from the vector type of the final pattern statement.
140 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
141 from which it was derived. */
142
143 static inline void
144 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
147 {
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 vec_info *vinfo = stmt_info->vinfo;
151 if (vectype)
152 {
153 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
154 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
155 if (scalar_type_for_mask)
156 new_stmt_info->mask_precision
157 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
158 }
159 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
160 new_stmt);
161 }
162
163 /* The caller wants to perform new operations on vect_external variable
164 VAR, so that the result of the operations would also be vect_external.
165 Return the edge on which the operations can be performed, if one exists.
166 Return null if the operations should instead be treated as part of
167 the pattern that needs them. */
168
169 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)170 vect_get_external_def_edge (vec_info *vinfo, tree var)
171 {
172 edge e = NULL;
173 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
174 {
175 e = loop_preheader_edge (loop_vinfo->loop);
176 if (!SSA_NAME_IS_DEFAULT_DEF (var))
177 {
178 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
179 if (bb == NULL
180 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
181 e = NULL;
182 }
183 }
184 return e;
185 }
186
187 /* Return true if the target supports a vector version of CODE,
188 where CODE is known to map to a direct optab. ITYPE specifies
189 the type of (some of) the scalar inputs and OTYPE specifies the
190 type of the scalar result.
191
192 If CODE allows the inputs and outputs to have different type
193 (such as for WIDEN_SUM_EXPR), it is the input mode rather
194 than the output mode that determines the appropriate target pattern.
195 Operand 0 of the target pattern then specifies the mode that the output
196 must have.
197
198 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
199 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
200 is nonnull. */
201
202 static bool
203 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
204 tree itype, tree *vecotype_out,
205 tree *vecitype_out = NULL)
206 {
207 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
208 if (!vecitype)
209 return false;
210
211 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
212 if (!vecotype)
213 return false;
214
215 optab optab = optab_for_tree_code (code, vecitype, optab_default);
216 if (!optab)
217 return false;
218
219 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
220 if (icode == CODE_FOR_nothing
221 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
222 return false;
223
224 *vecotype_out = vecotype;
225 if (vecitype_out)
226 *vecitype_out = vecitype;
227 return true;
228 }
229
230 /* Round bit precision PRECISION up to a full element. */
231
232 static unsigned int
vect_element_precision(unsigned int precision)233 vect_element_precision (unsigned int precision)
234 {
235 precision = 1 << ceil_log2 (precision);
236 return MAX (precision, BITS_PER_UNIT);
237 }
238
239 /* If OP is defined by a statement that's being considered for vectorization,
240 return information about that statement, otherwise return NULL. */
241
242 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)243 vect_get_internal_def (vec_info *vinfo, tree op)
244 {
245 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
246 if (def_stmt_info
247 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
248 return def_stmt_info;
249 return NULL;
250 }
251
252 /* Check whether NAME, an ssa-name used in STMT_VINFO,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
257
258 static bool
type_conversion_p(tree name,stmt_vec_info stmt_vinfo,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)259 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
260 tree *orig_type, gimple **def_stmt, bool *promotion)
261 {
262 tree type = TREE_TYPE (name);
263 tree oprnd0;
264 enum vect_def_type dt;
265
266 stmt_vec_info def_stmt_info;
267 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
268 def_stmt))
269 return false;
270
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
273 return false;
274
275 if (!*def_stmt)
276 return false;
277
278 if (!is_gimple_assign (*def_stmt))
279 return false;
280
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
282 return false;
283
284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
285
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
290
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
293 else
294 *promotion = false;
295
296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
297 return false;
298
299 return true;
300 }
301
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 class vect_unpromoted_value {
305 public:
306 vect_unpromoted_value ();
307
308 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309
310 /* The value obtained after peeling away zero or more casts. */
311 tree op;
312
313 /* The type of OP. */
314 tree type;
315
316 /* The definition type of OP. */
317 vect_def_type dt;
318
319 /* If OP is the result of peeling at least one cast, and if the cast
320 of OP itself is a vectorizable statement, CASTER identifies that
321 statement, otherwise it is null. */
322 stmt_vec_info caster;
323 };
324
vect_unpromoted_value()325 inline vect_unpromoted_value::vect_unpromoted_value ()
326 : op (NULL_TREE),
327 type (NULL_TREE),
328 dt (vect_uninitialized_def),
329 caster (NULL)
330 {
331 }
332
333 /* Set the operand to OP_IN, its definition type to DT_IN, and the
334 statement that casts it to CASTER_IN. */
335
336 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)337 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
338 stmt_vec_info caster_in)
339 {
340 op = op_in;
341 type = TREE_TYPE (op);
342 dt = dt_in;
343 caster = caster_in;
344 }
345
346 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
347 to reach some vectorizable inner operand OP', continuing as long as it
348 is possible to convert OP' back to OP using a possible sign change
349 followed by a possible promotion P. Return this OP', or null if OP is
350 not a vectorizable SSA name. If there is a promotion P, describe its
351 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
352 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
353 have more than one user.
354
355 A successful return means that it is possible to go from OP' to OP
356 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
357 whereas the cast from UNPROM to OP might be a promotion, a sign
358 change, or a nop.
359
360 E.g. say we have:
361
362 signed short *ptr = ...;
363 signed short C = *ptr;
364 unsigned short B = (unsigned short) C; // sign change
365 signed int A = (signed int) B; // unsigned promotion
366 ...possible other uses of A...
367 unsigned int OP = (unsigned int) A; // sign change
368
369 In this case it's possible to go directly from C to OP using:
370
371 OP = (unsigned int) (unsigned short) C;
372 +------------+ +--------------+
373 promotion sign change
374
375 so OP' would be C. The input to the promotion is B, so UNPROM
376 would describe B. */
377
378 static tree
379 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
380 vect_unpromoted_value *unprom,
381 bool *single_use_p = NULL)
382 {
383 tree res = NULL_TREE;
384 tree op_type = TREE_TYPE (op);
385 unsigned int orig_precision = TYPE_PRECISION (op_type);
386 unsigned int min_precision = orig_precision;
387 stmt_vec_info caster = NULL;
388 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
389 {
390 /* See whether OP is simple enough to vectorize. */
391 stmt_vec_info def_stmt_info;
392 gimple *def_stmt;
393 vect_def_type dt;
394 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
395 break;
396
397 /* If OP is the input of a demotion, skip over it to see whether
398 OP is itself the result of a promotion. If so, the combined
399 effect of the promotion and the demotion might fit the required
400 pattern, otherwise neither operation fits.
401
402 This copes with cases such as the result of an arithmetic
403 operation being truncated before being stored, and where that
404 arithmetic operation has been recognized as an over-widened one. */
405 if (TYPE_PRECISION (op_type) <= min_precision)
406 {
407 /* Use OP as the UNPROM described above if we haven't yet
408 found a promotion, or if using the new input preserves the
409 sign of the previous promotion. */
410 if (!res
411 || TYPE_PRECISION (unprom->type) == orig_precision
412 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
413 {
414 unprom->set_op (op, dt, caster);
415 min_precision = TYPE_PRECISION (op_type);
416 }
417 /* Stop if we've already seen a promotion and if this
418 conversion does more than change the sign. */
419 else if (TYPE_PRECISION (op_type)
420 != TYPE_PRECISION (unprom->type))
421 break;
422
423 /* The sequence now extends to OP. */
424 res = op;
425 }
426
427 /* See whether OP is defined by a cast. Record it as CASTER if
428 the cast is potentially vectorizable. */
429 if (!def_stmt)
430 break;
431 caster = def_stmt_info;
432
433 /* Ignore pattern statements, since we don't link uses for them. */
434 if (caster
435 && single_use_p
436 && !STMT_VINFO_RELATED_STMT (caster)
437 && !has_single_use (res))
438 *single_use_p = false;
439
440 gassign *assign = dyn_cast <gassign *> (def_stmt);
441 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
442 break;
443
444 /* Continue with the input to the cast. */
445 op = gimple_assign_rhs1 (def_stmt);
446 op_type = TREE_TYPE (op);
447 }
448 return res;
449 }
450
451 /* OP is an integer operand to an operation that returns TYPE, and we
452 want to treat the operation as a widening one. So far we can treat
453 it as widening from *COMMON_TYPE.
454
455 Return true if OP is suitable for such a widening operation,
456 either widening from *COMMON_TYPE or from some supertype of it.
457 Update *COMMON_TYPE to the supertype in the latter case.
458
459 SHIFT_P is true if OP is a shift amount. */
460
461 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)462 vect_joust_widened_integer (tree type, bool shift_p, tree op,
463 tree *common_type)
464 {
465 /* Calculate the minimum precision required by OP, without changing
466 the sign of either operand. */
467 unsigned int precision;
468 if (shift_p)
469 {
470 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
471 return false;
472 precision = TREE_INT_CST_LOW (op);
473 }
474 else
475 {
476 precision = wi::min_precision (wi::to_widest (op),
477 TYPE_SIGN (*common_type));
478 if (precision * 2 > TYPE_PRECISION (type))
479 return false;
480 }
481
482 /* If OP requires a wider type, switch to that type. The checks
483 above ensure that this is still narrower than the result. */
484 precision = vect_element_precision (precision);
485 if (TYPE_PRECISION (*common_type) < precision)
486 *common_type = build_nonstandard_integer_type
487 (precision, TYPE_UNSIGNED (*common_type));
488 return true;
489 }
490
491 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
492 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
493
494 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)495 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
496 {
497 if (types_compatible_p (*common_type, new_type))
498 return true;
499
500 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
501 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
502 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
503 return true;
504
505 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
506 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
507 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
508 {
509 *common_type = new_type;
510 return true;
511 }
512
513 /* We have mismatched signs, with the signed type being
514 no wider than the unsigned type. In this case we need
515 a wider signed type. */
516 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
517 TYPE_PRECISION (new_type));
518 precision *= 2;
519 if (precision * 2 > TYPE_PRECISION (type))
520 return false;
521
522 *common_type = build_nonstandard_integer_type (precision, false);
523 return true;
524 }
525
526 /* Check whether STMT_INFO can be viewed as a tree of integer operations
527 in which each node either performs CODE or WIDENED_CODE, and where
528 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
529 specifies the maximum number of leaf operands. SHIFT_P says whether
530 CODE and WIDENED_CODE are some sort of shift.
531
532 If STMT_INFO is such a tree, return the number of leaf operands
533 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
534 to a type that (a) is narrower than the result of STMT_INFO and
535 (b) can hold all leaf operand values.
536
537 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
538 exists. */
539
540 static unsigned int
vect_widened_op_tree(stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)541 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
542 tree_code widened_code, bool shift_p,
543 unsigned int max_nops,
544 vect_unpromoted_value *unprom, tree *common_type)
545 {
546 /* Check for an integer operation with the right code. */
547 vec_info *vinfo = stmt_info->vinfo;
548 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
549 if (!assign)
550 return 0;
551
552 tree_code rhs_code = gimple_assign_rhs_code (assign);
553 if (rhs_code != code && rhs_code != widened_code)
554 return 0;
555
556 tree type = gimple_expr_type (assign);
557 if (!INTEGRAL_TYPE_P (type))
558 return 0;
559
560 /* Assume that both operands will be leaf operands. */
561 max_nops -= 2;
562
563 /* Check the operands. */
564 unsigned int next_op = 0;
565 for (unsigned int i = 0; i < 2; ++i)
566 {
567 vect_unpromoted_value *this_unprom = &unprom[next_op];
568 unsigned int nops = 1;
569 tree op = gimple_op (assign, i + 1);
570 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
571 {
572 /* We already have a common type from earlier operands.
573 Update it to account for OP. */
574 this_unprom->set_op (op, vect_constant_def);
575 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
576 return 0;
577 }
578 else
579 {
580 /* Only allow shifts by constants. */
581 if (shift_p && i == 1)
582 return 0;
583
584 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
585 this_unprom))
586 return 0;
587
588 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
589 {
590 /* The operand isn't widened. If STMT_INFO has the code
591 for an unwidened operation, recursively check whether
592 this operand is a node of the tree. */
593 if (rhs_code != code
594 || max_nops == 0
595 || this_unprom->dt != vect_internal_def)
596 return 0;
597
598 /* Give back the leaf slot allocated above now that we're
599 not treating this as a leaf operand. */
600 max_nops += 1;
601
602 /* Recursively process the definition of the operand. */
603 stmt_vec_info def_stmt_info
604 = vinfo->lookup_def (this_unprom->op);
605 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
606 shift_p, max_nops, this_unprom,
607 common_type);
608 if (nops == 0)
609 return 0;
610
611 max_nops -= nops;
612 }
613 else
614 {
615 /* Make sure that the operand is narrower than the result. */
616 if (TYPE_PRECISION (this_unprom->type) * 2
617 > TYPE_PRECISION (type))
618 return 0;
619
620 /* Update COMMON_TYPE for the new operand. */
621 if (i == 0)
622 *common_type = this_unprom->type;
623 else if (!vect_joust_widened_type (type, this_unprom->type,
624 common_type))
625 return 0;
626 }
627 }
628 next_op += nops;
629 }
630 return next_op;
631 }
632
633 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
634 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
635
636 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)637 vect_recog_temp_ssa_var (tree type, gimple *stmt)
638 {
639 return make_temp_ssa_name (type, stmt, "patt");
640 }
641
642 /* STMT2_INFO describes a type conversion that could be split into STMT1
643 followed by a version of STMT2_INFO that takes NEW_RHS as its first
644 input. Try to do this using pattern statements, returning true on
645 success. */
646
647 static bool
vect_split_statement(stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)648 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
649 gimple *stmt1, tree vectype)
650 {
651 vec_info *vinfo = stmt2_info->vinfo;
652 if (is_pattern_stmt_p (stmt2_info))
653 {
654 /* STMT2_INFO is part of a pattern. Get the statement to which
655 the pattern is attached. */
656 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
657 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
658
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "Splitting pattern statement: %G", stmt2_info->stmt);
662
663 /* Since STMT2_INFO is a pattern statement, we can change it
664 in-situ without worrying about changing the code for the
665 containing block. */
666 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
667
668 if (dump_enabled_p ())
669 {
670 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
671 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
672 stmt2_info->stmt);
673 }
674
675 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
676 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
677 /* STMT2_INFO is the actual pattern statement. Add STMT1
678 to the end of the definition sequence. */
679 gimple_seq_add_stmt_without_update (def_seq, stmt1);
680 else
681 {
682 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
683 before it. */
684 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
685 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 }
687 return true;
688 }
689 else
690 {
691 /* STMT2_INFO doesn't yet have a pattern. Try to create a
692 two-statement pattern now. */
693 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
694 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
695 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
696 if (!lhs_vectype)
697 return false;
698
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: %G", stmt2_info->stmt);
702
703 /* Add STMT1 as a singleton pattern definition sequence. */
704 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
705 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
706 gimple_seq_add_stmt_without_update (def_seq, stmt1);
707
708 /* Build the second of the two pattern statements. */
709 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
710 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
711 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
712
713 if (dump_enabled_p ())
714 {
715 dump_printf_loc (MSG_NOTE, vect_location,
716 "into pattern statements: %G", stmt1);
717 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
718 }
719
720 return true;
721 }
722 }
723
724 /* Convert UNPROM to TYPE and return the result, adding new statements
725 to STMT_INFO's pattern definition statements if no better way is
726 available. VECTYPE is the vector form of TYPE. */
727
728 static tree
vect_convert_input(stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)729 vect_convert_input (stmt_vec_info stmt_info, tree type,
730 vect_unpromoted_value *unprom, tree vectype)
731 {
732 vec_info *vinfo = stmt_info->vinfo;
733
734 /* Check for a no-op conversion. */
735 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
736 return unprom->op;
737
738 /* Allow the caller to create constant vect_unpromoted_values. */
739 if (TREE_CODE (unprom->op) == INTEGER_CST)
740 return wide_int_to_tree (type, wi::to_widest (unprom->op));
741
742 tree input = unprom->op;
743 if (unprom->caster)
744 {
745 tree lhs = gimple_get_lhs (unprom->caster->stmt);
746 tree lhs_type = TREE_TYPE (lhs);
747
748 /* If the result of the existing cast is the right width, use it
749 instead of the source of the cast. */
750 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
751 input = lhs;
752 /* If the precision we want is between the source and result
753 precisions of the existing cast, try splitting the cast into
754 two and tapping into a mid-way point. */
755 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
756 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
757 {
758 /* In order to preserve the semantics of the original cast,
759 give the mid-way point the same signedness as the input value.
760
761 It would be possible to use a signed type here instead if
762 TYPE is signed and UNPROM->TYPE is unsigned, but that would
763 make the sign of the midtype sensitive to the order in
764 which we process the statements, since the signedness of
765 TYPE is the signedness required by just one of possibly
766 many users. Also, unsigned promotions are usually as cheap
767 as or cheaper than signed ones, so it's better to keep an
768 unsigned promotion. */
769 tree midtype = build_nonstandard_integer_type
770 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
771 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
772 if (vec_midtype)
773 {
774 input = vect_recog_temp_ssa_var (midtype, NULL);
775 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
776 unprom->op);
777 if (!vect_split_statement (unprom->caster, input, new_stmt,
778 vec_midtype))
779 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
780 }
781 }
782
783 /* See if we can reuse an existing result. */
784 if (types_compatible_p (type, TREE_TYPE (input)))
785 return input;
786 }
787
788 /* We need a new conversion statement. */
789 tree new_op = vect_recog_temp_ssa_var (type, NULL);
790 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
791
792 /* If OP is an external value, see if we can insert the new statement
793 on an incoming edge. */
794 if (input == unprom->op && unprom->dt == vect_external_def)
795 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
796 {
797 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
798 gcc_assert (!new_bb);
799 return new_op;
800 }
801
802 /* As a (common) last resort, add the statement to the pattern itself. */
803 append_pattern_def_seq (stmt_info, new_stmt, vectype);
804 return new_op;
805 }
806
807 /* Invoke vect_convert_input for N elements of UNPROM and store the
808 result in the corresponding elements of RESULT. */
809
810 static void
vect_convert_inputs(stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)811 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
812 tree *result, tree type, vect_unpromoted_value *unprom,
813 tree vectype)
814 {
815 for (unsigned int i = 0; i < n; ++i)
816 {
817 unsigned int j;
818 for (j = 0; j < i; ++j)
819 if (unprom[j].op == unprom[i].op)
820 break;
821 if (j < i)
822 result[i] = result[j];
823 else
824 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
825 }
826 }
827
828 /* The caller has created a (possibly empty) sequence of pattern definition
829 statements followed by a single statement PATTERN_STMT. Cast the result
830 of this final statement to TYPE. If a new statement is needed, add
831 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
832 and return the new statement, otherwise return PATTERN_STMT as-is.
833 VECITYPE is the vector form of PATTERN_STMT's result type. */
834
835 static gimple *
vect_convert_output(stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)836 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
837 tree vecitype)
838 {
839 tree lhs = gimple_get_lhs (pattern_stmt);
840 if (!types_compatible_p (type, TREE_TYPE (lhs)))
841 {
842 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
843 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
844 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
845 }
846 return pattern_stmt;
847 }
848
849 /* Return true if STMT_VINFO describes a reduction for which reassociation
850 is allowed. If STMT_INFO is part of a group, assume that it's part of
851 a reduction chain and optimistically assume that all statements
852 except the last allow reassociation.
853 Also require it to have code CODE and to be a reduction
854 in the outermost loop. When returning true, store the operands in
855 *OP0_OUT and *OP1_OUT. */
856
857 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)858 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
859 tree *op0_out, tree *op1_out)
860 {
861 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
862 if (!loop_info)
863 return false;
864
865 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
866 if (!assign || gimple_assign_rhs_code (assign) != code)
867 return false;
868
869 /* We don't allow changing the order of the computation in the inner-loop
870 when doing outer-loop vectorization. */
871 class loop *loop = LOOP_VINFO_LOOP (loop_info);
872 if (loop && nested_in_vect_loop_p (loop, stmt_info))
873 return false;
874
875 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
876 {
877 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
878 code))
879 return false;
880 }
881 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
882 return false;
883
884 *op0_out = gimple_assign_rhs1 (assign);
885 *op1_out = gimple_assign_rhs2 (assign);
886 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
887 std::swap (*op0_out, *op1_out);
888 return true;
889 }
890
891 /* Function vect_recog_dot_prod_pattern
892
893 Try to find the following pattern:
894
895 type x_t, y_t;
896 TYPE1 prod;
897 TYPE2 sum = init;
898 loop:
899 sum_0 = phi <init, sum_1>
900 S1 x_t = ...
901 S2 y_t = ...
902 S3 x_T = (TYPE1) x_t;
903 S4 y_T = (TYPE1) y_t;
904 S5 prod = x_T * y_T;
905 [S6 prod = (TYPE2) prod; #optional]
906 S7 sum_1 = prod + sum_0;
907
908 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
909 same size of 'TYPE1' or bigger. This is a special case of a reduction
910 computation.
911
912 Input:
913
914 * STMT_VINFO: The stmt from which the pattern search begins. In the
915 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
916 will be detected.
917
918 Output:
919
920 * TYPE_OUT: The type of the output of this pattern.
921
922 * Return value: A new stmt that will be used to replace the sequence of
923 stmts that constitute the pattern. In this case it will be:
924 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
925
926 Note: The dot-prod idiom is a widening reduction pattern that is
927 vectorized without preserving all the intermediate results. It
928 produces only N/2 (widened) results (by summing up pairs of
929 intermediate results) rather than all N results. Therefore, we
930 cannot allow this pattern when we want to get all the results and in
931 the correct order (as is the case when this computation is in an
932 inner-loop nested in an outer-loop that us being vectorized). */
933
934 static gimple *
vect_recog_dot_prod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)935 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
936 {
937 tree oprnd0, oprnd1;
938 gimple *last_stmt = stmt_vinfo->stmt;
939 vec_info *vinfo = stmt_vinfo->vinfo;
940 tree type, half_type;
941 gimple *pattern_stmt;
942 tree var;
943
944 /* Look for the following pattern
945 DX = (TYPE1) X;
946 DY = (TYPE1) Y;
947 DPROD = DX * DY;
948 DDPROD = (TYPE2) DPROD;
949 sum_1 = DDPROD + sum_0;
950 In which
951 - DX is double the size of X
952 - DY is double the size of Y
953 - DX, DY, DPROD all have the same type
954 - sum is the same size of DPROD or bigger
955 - sum has been recognized as a reduction variable.
956
957 This is equivalent to:
958 DPROD = X w* Y; #widen mult
959 sum_1 = DPROD w+ sum_0; #widen summation
960 or
961 DPROD = X w* Y; #widen mult
962 sum_1 = DPROD + sum_0; #summation
963 */
964
965 /* Starting from LAST_STMT, follow the defs of its uses in search
966 of the above pattern. */
967
968 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
969 &oprnd0, &oprnd1))
970 return NULL;
971
972 type = gimple_expr_type (last_stmt);
973
974 vect_unpromoted_value unprom_mult;
975 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
976
977 /* So far so good. Since last_stmt was detected as a (summation) reduction,
978 we know that oprnd1 is the reduction variable (defined by a loop-header
979 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
980 Left to check that oprnd0 is defined by a (widen_)mult_expr */
981 if (!oprnd0)
982 return NULL;
983
984 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
985 if (!mult_vinfo)
986 return NULL;
987
988 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
989 inside the loop (in case we are analyzing an outer-loop). */
990 vect_unpromoted_value unprom0[2];
991 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
992 false, 2, unprom0, &half_type))
993 return NULL;
994
995 /* If there are two widening operations, make sure they agree on
996 the sign of the extension. */
997 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
998 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
999 return NULL;
1000
1001 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1002
1003 tree half_vectype;
1004 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1005 type_out, &half_vectype))
1006 return NULL;
1007
1008 /* Get the inputs in the appropriate types. */
1009 tree mult_oprnd[2];
1010 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
1011 unprom0, half_vectype);
1012
1013 var = vect_recog_temp_ssa_var (type, NULL);
1014 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1015 mult_oprnd[0], mult_oprnd[1], oprnd1);
1016
1017 return pattern_stmt;
1018 }
1019
1020
1021 /* Function vect_recog_sad_pattern
1022
1023 Try to find the following Sum of Absolute Difference (SAD) pattern:
1024
1025 type x_t, y_t;
1026 signed TYPE1 diff, abs_diff;
1027 TYPE2 sum = init;
1028 loop:
1029 sum_0 = phi <init, sum_1>
1030 S1 x_t = ...
1031 S2 y_t = ...
1032 S3 x_T = (TYPE1) x_t;
1033 S4 y_T = (TYPE1) y_t;
1034 S5 diff = x_T - y_T;
1035 S6 abs_diff = ABS_EXPR <diff>;
1036 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1037 S8 sum_1 = abs_diff + sum_0;
1038
1039 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1040 same size of 'TYPE1' or bigger. This is a special case of a reduction
1041 computation.
1042
1043 Input:
1044
1045 * STMT_VINFO: The stmt from which the pattern search begins. In the
1046 example, when this function is called with S8, the pattern
1047 {S3,S4,S5,S6,S7,S8} will be detected.
1048
1049 Output:
1050
1051 * TYPE_OUT: The type of the output of this pattern.
1052
1053 * Return value: A new stmt that will be used to replace the sequence of
1054 stmts that constitute the pattern. In this case it will be:
1055 SAD_EXPR <x_t, y_t, sum_0>
1056 */
1057
1058 static gimple *
vect_recog_sad_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1059 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1060 {
1061 gimple *last_stmt = stmt_vinfo->stmt;
1062 vec_info *vinfo = stmt_vinfo->vinfo;
1063 tree half_type;
1064
1065 /* Look for the following pattern
1066 DX = (TYPE1) X;
1067 DY = (TYPE1) Y;
1068 DDIFF = DX - DY;
1069 DAD = ABS_EXPR <DDIFF>;
1070 DDPROD = (TYPE2) DPROD;
1071 sum_1 = DAD + sum_0;
1072 In which
1073 - DX is at least double the size of X
1074 - DY is at least double the size of Y
1075 - DX, DY, DDIFF, DAD all have the same type
1076 - sum is the same size of DAD or bigger
1077 - sum has been recognized as a reduction variable.
1078
1079 This is equivalent to:
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD w+ sum_0; #widen summation
1083 or
1084 DDIFF = X w- Y; #widen sub
1085 DAD = ABS_EXPR <DDIFF>;
1086 sum_1 = DAD + sum_0; #summation
1087 */
1088
1089 /* Starting from LAST_STMT, follow the defs of its uses in search
1090 of the above pattern. */
1091
1092 tree plus_oprnd0, plus_oprnd1;
1093 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1094 &plus_oprnd0, &plus_oprnd1))
1095 return NULL;
1096
1097 tree sum_type = gimple_expr_type (last_stmt);
1098
1099 /* Any non-truncating sequence of conversions is OK here, since
1100 with a successful match, the result of the ABS(U) is known to fit
1101 within the nonnegative range of the result type. (It cannot be the
1102 negative of the minimum signed value due to the range of the widening
1103 MINUS_EXPR.) */
1104 vect_unpromoted_value unprom_abs;
1105 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1106 &unprom_abs);
1107
1108 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1109 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1110 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1111 Then check that plus_oprnd0 is defined by an abs_expr. */
1112
1113 if (!plus_oprnd0)
1114 return NULL;
1115
1116 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1117 if (!abs_stmt_vinfo)
1118 return NULL;
1119
1120 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1121 inside the loop (in case we are analyzing an outer-loop). */
1122 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1123 if (!abs_stmt
1124 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1125 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1126 return NULL;
1127
1128 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1129 tree abs_type = TREE_TYPE (abs_oprnd);
1130 if (TYPE_UNSIGNED (abs_type))
1131 return NULL;
1132
1133 /* Peel off conversions from the ABS input. This can involve sign
1134 changes (e.g. from an unsigned subtraction to a signed ABS input)
1135 or signed promotion, but it can't include unsigned promotion.
1136 (Note that ABS of an unsigned promotion should have been folded
1137 away before now anyway.) */
1138 vect_unpromoted_value unprom_diff;
1139 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1140 &unprom_diff);
1141 if (!abs_oprnd)
1142 return NULL;
1143 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1144 && TYPE_UNSIGNED (unprom_diff.type))
1145 return NULL;
1146
1147 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1148 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1149 if (!diff_stmt_vinfo)
1150 return NULL;
1151
1152 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1153 inside the loop (in case we are analyzing an outer-loop). */
1154 vect_unpromoted_value unprom[2];
1155 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1156 false, 2, unprom, &half_type))
1157 return NULL;
1158
1159 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1160
1161 tree half_vectype;
1162 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1163 type_out, &half_vectype))
1164 return NULL;
1165
1166 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1167 tree sad_oprnd[2];
1168 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1169 unprom, half_vectype);
1170
1171 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1172 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1173 sad_oprnd[1], plus_oprnd1);
1174
1175 return pattern_stmt;
1176 }
1177
1178 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1179 so that it can be treated as though it had the form:
1180
1181 A_TYPE a;
1182 B_TYPE b;
1183 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1184 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1185 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1186 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1187 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1188
1189 Try to replace the pattern with:
1190
1191 A_TYPE a;
1192 B_TYPE b;
1193 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1194 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1195 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1196 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1197
1198 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1199
1200 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1201 name of the pattern being matched, for dump purposes. */
1202
1203 static gimple *
vect_recog_widen_op_pattern(stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1204 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1205 tree_code orig_code, tree_code wide_code,
1206 bool shift_p, const char *name)
1207 {
1208 vec_info *vinfo = last_stmt_info->vinfo;
1209 gimple *last_stmt = last_stmt_info->stmt;
1210
1211 vect_unpromoted_value unprom[2];
1212 tree half_type;
1213 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1214 shift_p, 2, unprom, &half_type))
1215 return NULL;
1216
1217 /* Pattern detected. */
1218 vect_pattern_detected (name, last_stmt);
1219
1220 tree type = gimple_expr_type (last_stmt);
1221 tree itype = type;
1222 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1223 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1224 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1225 TYPE_UNSIGNED (half_type));
1226
1227 /* Check target support */
1228 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1229 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1230 enum tree_code dummy_code;
1231 int dummy_int;
1232 auto_vec<tree> dummy_vec;
1233 if (!vectype
1234 || !vecitype
1235 || !supportable_widening_operation (wide_code, last_stmt_info,
1236 vecitype, vectype,
1237 &dummy_code, &dummy_code,
1238 &dummy_int, &dummy_vec))
1239 return NULL;
1240
1241 *type_out = get_vectype_for_scalar_type (vinfo, type);
1242 if (!*type_out)
1243 return NULL;
1244
1245 tree oprnd[2];
1246 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1247
1248 tree var = vect_recog_temp_ssa_var (itype, NULL);
1249 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1250 oprnd[0], oprnd[1]);
1251
1252 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1253 }
1254
1255 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1256 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1257
1258 static gimple *
vect_recog_widen_mult_pattern(stmt_vec_info last_stmt_info,tree * type_out)1259 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1260 {
1261 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1262 WIDEN_MULT_EXPR, false,
1263 "vect_recog_widen_mult_pattern");
1264 }
1265
1266 /* Function vect_recog_pow_pattern
1267
1268 Try to find the following pattern:
1269
1270 x = POW (y, N);
1271
1272 with POW being one of pow, powf, powi, powif and N being
1273 either 2 or 0.5.
1274
1275 Input:
1276
1277 * STMT_VINFO: The stmt from which the pattern search begins.
1278
1279 Output:
1280
1281 * TYPE_OUT: The type of the output of this pattern.
1282
1283 * Return value: A new stmt that will be used to replace the sequence of
1284 stmts that constitute the pattern. In this case it will be:
1285 x = x * x
1286 or
1287 x = sqrt (x)
1288 */
1289
1290 static gimple *
vect_recog_pow_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1291 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1292 {
1293 vec_info *vinfo = stmt_vinfo->vinfo;
1294 gimple *last_stmt = stmt_vinfo->stmt;
1295 tree base, exp;
1296 gimple *stmt;
1297 tree var;
1298
1299 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1300 return NULL;
1301
1302 switch (gimple_call_combined_fn (last_stmt))
1303 {
1304 CASE_CFN_POW:
1305 CASE_CFN_POWI:
1306 break;
1307
1308 default:
1309 return NULL;
1310 }
1311
1312 base = gimple_call_arg (last_stmt, 0);
1313 exp = gimple_call_arg (last_stmt, 1);
1314 if (TREE_CODE (exp) != REAL_CST
1315 && TREE_CODE (exp) != INTEGER_CST)
1316 {
1317 if (flag_unsafe_math_optimizations
1318 && TREE_CODE (base) == REAL_CST
1319 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1320 {
1321 combined_fn log_cfn;
1322 built_in_function exp_bfn;
1323 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1324 {
1325 case BUILT_IN_POW:
1326 log_cfn = CFN_BUILT_IN_LOG;
1327 exp_bfn = BUILT_IN_EXP;
1328 break;
1329 case BUILT_IN_POWF:
1330 log_cfn = CFN_BUILT_IN_LOGF;
1331 exp_bfn = BUILT_IN_EXPF;
1332 break;
1333 case BUILT_IN_POWL:
1334 log_cfn = CFN_BUILT_IN_LOGL;
1335 exp_bfn = BUILT_IN_EXPL;
1336 break;
1337 default:
1338 return NULL;
1339 }
1340 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1341 tree exp_decl = builtin_decl_implicit (exp_bfn);
1342 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1343 does that, but if C is a power of 2, we want to use
1344 exp2 (log2 (C) * x) in the non-vectorized version, but for
1345 vectorization we don't have vectorized exp2. */
1346 if (logc
1347 && TREE_CODE (logc) == REAL_CST
1348 && exp_decl
1349 && lookup_attribute ("omp declare simd",
1350 DECL_ATTRIBUTES (exp_decl)))
1351 {
1352 cgraph_node *node = cgraph_node::get_create (exp_decl);
1353 if (node->simd_clones == NULL)
1354 {
1355 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1356 || node->definition)
1357 return NULL;
1358 expand_simd_clones (node);
1359 if (node->simd_clones == NULL)
1360 return NULL;
1361 }
1362 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1363 if (!*type_out)
1364 return NULL;
1365 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1366 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1367 append_pattern_def_seq (stmt_vinfo, g);
1368 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1369 g = gimple_build_call (exp_decl, 1, def);
1370 gimple_call_set_lhs (g, res);
1371 return g;
1372 }
1373 }
1374
1375 return NULL;
1376 }
1377
1378 /* We now have a pow or powi builtin function call with a constant
1379 exponent. */
1380
1381 /* Catch squaring. */
1382 if ((tree_fits_shwi_p (exp)
1383 && tree_to_shwi (exp) == 2)
1384 || (TREE_CODE (exp) == REAL_CST
1385 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1386 {
1387 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1388 TREE_TYPE (base), type_out))
1389 return NULL;
1390
1391 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1392 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1393 return stmt;
1394 }
1395
1396 /* Catch square root. */
1397 if (TREE_CODE (exp) == REAL_CST
1398 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1399 {
1400 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1401 if (*type_out
1402 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1403 OPTIMIZE_FOR_SPEED))
1404 {
1405 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1406 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1407 gimple_call_set_lhs (stmt, var);
1408 gimple_call_set_nothrow (stmt, true);
1409 return stmt;
1410 }
1411 }
1412
1413 return NULL;
1414 }
1415
1416
1417 /* Function vect_recog_widen_sum_pattern
1418
1419 Try to find the following pattern:
1420
1421 type x_t;
1422 TYPE x_T, sum = init;
1423 loop:
1424 sum_0 = phi <init, sum_1>
1425 S1 x_t = *p;
1426 S2 x_T = (TYPE) x_t;
1427 S3 sum_1 = x_T + sum_0;
1428
1429 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1430 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1431 a special case of a reduction computation.
1432
1433 Input:
1434
1435 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1436 when this function is called with S3, the pattern {S2,S3} will be detected.
1437
1438 Output:
1439
1440 * TYPE_OUT: The type of the output of this pattern.
1441
1442 * Return value: A new stmt that will be used to replace the sequence of
1443 stmts that constitute the pattern. In this case it will be:
1444 WIDEN_SUM <x_t, sum_0>
1445
1446 Note: The widening-sum idiom is a widening reduction pattern that is
1447 vectorized without preserving all the intermediate results. It
1448 produces only N/2 (widened) results (by summing up pairs of
1449 intermediate results) rather than all N results. Therefore, we
1450 cannot allow this pattern when we want to get all the results and in
1451 the correct order (as is the case when this computation is in an
1452 inner-loop nested in an outer-loop that us being vectorized). */
1453
1454 static gimple *
vect_recog_widen_sum_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1455 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1456 {
1457 gimple *last_stmt = stmt_vinfo->stmt;
1458 tree oprnd0, oprnd1;
1459 vec_info *vinfo = stmt_vinfo->vinfo;
1460 tree type;
1461 gimple *pattern_stmt;
1462 tree var;
1463
1464 /* Look for the following pattern
1465 DX = (TYPE) X;
1466 sum_1 = DX + sum_0;
1467 In which DX is at least double the size of X, and sum_1 has been
1468 recognized as a reduction variable.
1469 */
1470
1471 /* Starting from LAST_STMT, follow the defs of its uses in search
1472 of the above pattern. */
1473
1474 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1475 &oprnd0, &oprnd1))
1476 return NULL;
1477
1478 type = gimple_expr_type (last_stmt);
1479
1480 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1481 we know that oprnd1 is the reduction variable (defined by a loop-header
1482 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1483 Left to check that oprnd0 is defined by a cast from type 'type' to type
1484 'TYPE'. */
1485
1486 vect_unpromoted_value unprom0;
1487 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1488 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1489 return NULL;
1490
1491 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1492
1493 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1494 unprom0.type, type_out))
1495 return NULL;
1496
1497 var = vect_recog_temp_ssa_var (type, NULL);
1498 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1499
1500 return pattern_stmt;
1501 }
1502
1503 /* Recognize cases in which an operation is performed in one type WTYPE
1504 but could be done more efficiently in a narrower type NTYPE. For example,
1505 if we have:
1506
1507 ATYPE a; // narrower than NTYPE
1508 BTYPE b; // narrower than NTYPE
1509 WTYPE aw = (WTYPE) a;
1510 WTYPE bw = (WTYPE) b;
1511 WTYPE res = aw + bw; // only uses of aw and bw
1512
1513 then it would be more efficient to do:
1514
1515 NTYPE an = (NTYPE) a;
1516 NTYPE bn = (NTYPE) b;
1517 NTYPE resn = an + bn;
1518 WTYPE res = (WTYPE) resn;
1519
1520 Other situations include things like:
1521
1522 ATYPE a; // NTYPE or narrower
1523 WTYPE aw = (WTYPE) a;
1524 WTYPE res = aw + b;
1525
1526 when only "(NTYPE) res" is significant. In that case it's more efficient
1527 to truncate "b" and do the operation on NTYPE instead:
1528
1529 NTYPE an = (NTYPE) a;
1530 NTYPE bn = (NTYPE) b; // truncation
1531 NTYPE resn = an + bn;
1532 WTYPE res = (WTYPE) resn;
1533
1534 All users of "res" should then use "resn" instead, making the final
1535 statement dead (not marked as relevant). The final statement is still
1536 needed to maintain the type correctness of the IR.
1537
1538 vect_determine_precisions has already determined the minimum
1539 precison of the operation and the minimum precision required
1540 by users of the result. */
1541
1542 static gimple *
vect_recog_over_widening_pattern(stmt_vec_info last_stmt_info,tree * type_out)1543 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1544 {
1545 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1546 if (!last_stmt)
1547 return NULL;
1548
1549 /* See whether we have found that this operation can be done on a
1550 narrower type without changing its semantics. */
1551 unsigned int new_precision = last_stmt_info->operation_precision;
1552 if (!new_precision)
1553 return NULL;
1554
1555 vec_info *vinfo = last_stmt_info->vinfo;
1556 tree lhs = gimple_assign_lhs (last_stmt);
1557 tree type = TREE_TYPE (lhs);
1558 tree_code code = gimple_assign_rhs_code (last_stmt);
1559
1560 /* Keep the first operand of a COND_EXPR as-is: only the other two
1561 operands are interesting. */
1562 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1563
1564 /* Check the operands. */
1565 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1566 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1567 unprom.quick_grow (nops);
1568 unsigned int min_precision = 0;
1569 bool single_use_p = false;
1570 for (unsigned int i = 0; i < nops; ++i)
1571 {
1572 tree op = gimple_op (last_stmt, first_op + i);
1573 if (TREE_CODE (op) == INTEGER_CST)
1574 unprom[i].set_op (op, vect_constant_def);
1575 else if (TREE_CODE (op) == SSA_NAME)
1576 {
1577 bool op_single_use_p = true;
1578 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1579 &op_single_use_p))
1580 return NULL;
1581 /* If:
1582
1583 (1) N bits of the result are needed;
1584 (2) all inputs are widened from M<N bits; and
1585 (3) one operand OP is a single-use SSA name
1586
1587 we can shift the M->N widening from OP to the output
1588 without changing the number or type of extensions involved.
1589 This then reduces the number of copies of STMT_INFO.
1590
1591 If instead of (3) more than one operand is a single-use SSA name,
1592 shifting the extension to the output is even more of a win.
1593
1594 If instead:
1595
1596 (1) N bits of the result are needed;
1597 (2) one operand OP2 is widened from M2<N bits;
1598 (3) another operand OP1 is widened from M1<M2 bits; and
1599 (4) both OP1 and OP2 are single-use
1600
1601 the choice is between:
1602
1603 (a) truncating OP2 to M1, doing the operation on M1,
1604 and then widening the result to N
1605
1606 (b) widening OP1 to M2, doing the operation on M2, and then
1607 widening the result to N
1608
1609 Both shift the M2->N widening of the inputs to the output.
1610 (a) additionally shifts the M1->M2 widening to the output;
1611 it requires fewer copies of STMT_INFO but requires an extra
1612 M2->M1 truncation.
1613
1614 Which is better will depend on the complexity and cost of
1615 STMT_INFO, which is hard to predict at this stage. However,
1616 a clear tie-breaker in favor of (b) is the fact that the
1617 truncation in (a) increases the length of the operation chain.
1618
1619 If instead of (4) only one of OP1 or OP2 is single-use,
1620 (b) is still a win over doing the operation in N bits:
1621 it still shifts the M2->N widening on the single-use operand
1622 to the output and reduces the number of STMT_INFO copies.
1623
1624 If neither operand is single-use then operating on fewer than
1625 N bits might lead to more extensions overall. Whether it does
1626 or not depends on global information about the vectorization
1627 region, and whether that's a good trade-off would again
1628 depend on the complexity and cost of the statements involved,
1629 as well as things like register pressure that are not normally
1630 modelled at this stage. We therefore ignore these cases
1631 and just optimize the clear single-use wins above.
1632
1633 Thus we take the maximum precision of the unpromoted operands
1634 and record whether any operand is single-use. */
1635 if (unprom[i].dt == vect_internal_def)
1636 {
1637 min_precision = MAX (min_precision,
1638 TYPE_PRECISION (unprom[i].type));
1639 single_use_p |= op_single_use_p;
1640 }
1641 }
1642 }
1643
1644 /* Although the operation could be done in operation_precision, we have
1645 to balance that against introducing extra truncations or extensions.
1646 Calculate the minimum precision that can be handled efficiently.
1647
1648 The loop above determined that the operation could be handled
1649 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1650 extension from the inputs to the output without introducing more
1651 instructions, and would reduce the number of instructions required
1652 for STMT_INFO itself.
1653
1654 vect_determine_precisions has also determined that the result only
1655 needs min_output_precision bits. Truncating by a factor of N times
1656 requires a tree of N - 1 instructions, so if TYPE is N times wider
1657 than min_output_precision, doing the operation in TYPE and truncating
1658 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1659 In contrast:
1660
1661 - truncating the input to a unary operation and doing the operation
1662 in the new type requires at most N - 1 + 1 = N instructions per
1663 output vector
1664
1665 - doing the same for a binary operation requires at most
1666 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1667
1668 Both unary and binary operations require fewer instructions than
1669 this if the operands were extended from a suitable truncated form.
1670 Thus there is usually nothing to lose by doing operations in
1671 min_output_precision bits, but there can be something to gain. */
1672 if (!single_use_p)
1673 min_precision = last_stmt_info->min_output_precision;
1674 else
1675 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1676
1677 /* Apply the minimum efficient precision we just calculated. */
1678 if (new_precision < min_precision)
1679 new_precision = min_precision;
1680 new_precision = vect_element_precision (new_precision);
1681 if (new_precision >= TYPE_PRECISION (type))
1682 return NULL;
1683
1684 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1685
1686 *type_out = get_vectype_for_scalar_type (vinfo, type);
1687 if (!*type_out)
1688 return NULL;
1689
1690 /* We've found a viable pattern. Get the new type of the operation. */
1691 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1692 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1693
1694 /* If we're truncating an operation, we need to make sure that we
1695 don't introduce new undefined overflow. The codes tested here are
1696 a subset of those accepted by vect_truncatable_operation_p. */
1697 tree op_type = new_type;
1698 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1699 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1700 op_type = build_nonstandard_integer_type (new_precision, true);
1701
1702 /* We specifically don't check here whether the target supports the
1703 new operation, since it might be something that a later pattern
1704 wants to rewrite anyway. If targets have a minimum element size
1705 for some optabs, we should pattern-match smaller ops to larger ops
1706 where beneficial. */
1707 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1708 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1709 if (!new_vectype || !op_vectype)
1710 return NULL;
1711
1712 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1714 type, new_type);
1715
1716 /* Calculate the rhs operands for an operation on OP_TYPE. */
1717 tree ops[3] = {};
1718 for (unsigned int i = 1; i < first_op; ++i)
1719 ops[i - 1] = gimple_op (last_stmt, i);
1720 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1721 op_type, &unprom[0], op_vectype);
1722
1723 /* Use the operation to produce a result of type OP_TYPE. */
1724 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1725 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1726 ops[0], ops[1], ops[2]);
1727 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1728
1729 if (dump_enabled_p ())
1730 dump_printf_loc (MSG_NOTE, vect_location,
1731 "created pattern stmt: %G", pattern_stmt);
1732
1733 /* Convert back to the original signedness, if OP_TYPE is different
1734 from NEW_TYPE. */
1735 if (op_type != new_type)
1736 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1737 pattern_stmt, op_vectype);
1738
1739 /* Promote the result to the original type. */
1740 pattern_stmt = vect_convert_output (last_stmt_info, type,
1741 pattern_stmt, new_vectype);
1742
1743 return pattern_stmt;
1744 }
1745
1746 /* Recognize the following patterns:
1747
1748 ATYPE a; // narrower than TYPE
1749 BTYPE b; // narrower than TYPE
1750
1751 1) Multiply high with scaling
1752 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1753 2) ... or also with rounding
1754 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1755
1756 where only the bottom half of res is used. */
1757
1758 static gimple *
vect_recog_mulhs_pattern(stmt_vec_info last_stmt_info,tree * type_out)1759 vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1760 {
1761 /* Check for a right shift. */
1762 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1763 if (!last_stmt
1764 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1765 return NULL;
1766 vec_info *vinfo = last_stmt_info->vinfo;
1767
1768 /* Check that the shift result is wider than the users of the
1769 result need (i.e. that narrowing would be a natural choice). */
1770 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1771 unsigned int target_precision
1772 = vect_element_precision (last_stmt_info->min_output_precision);
1773 if (!INTEGRAL_TYPE_P (lhs_type)
1774 || target_precision >= TYPE_PRECISION (lhs_type))
1775 return NULL;
1776
1777 /* Look through any change in sign on the outer shift input. */
1778 vect_unpromoted_value unprom_rshift_input;
1779 tree rshift_input = vect_look_through_possible_promotion
1780 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1781 if (!rshift_input
1782 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1783 != TYPE_PRECISION (lhs_type))
1784 return NULL;
1785
1786 /* Get the definition of the shift input. */
1787 stmt_vec_info rshift_input_stmt_info
1788 = vect_get_internal_def (vinfo, rshift_input);
1789 if (!rshift_input_stmt_info)
1790 return NULL;
1791 gassign *rshift_input_stmt
1792 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1793 if (!rshift_input_stmt)
1794 return NULL;
1795
1796 stmt_vec_info mulh_stmt_info;
1797 tree scale_term;
1798 internal_fn ifn;
1799 unsigned int expect_offset;
1800
1801 /* Check for the presence of the rounding term. */
1802 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1803 {
1804 /* Check that the outer shift was by 1. */
1805 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1806 return NULL;
1807
1808 /* Check that the second operand of the PLUS_EXPR is 1. */
1809 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1810 return NULL;
1811
1812 /* Look through any change in sign on the addition input. */
1813 vect_unpromoted_value unprom_plus_input;
1814 tree plus_input = vect_look_through_possible_promotion
1815 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1816 if (!plus_input
1817 || TYPE_PRECISION (TREE_TYPE (plus_input))
1818 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1819 return NULL;
1820
1821 /* Get the definition of the multiply-high-scale part. */
1822 stmt_vec_info plus_input_stmt_info
1823 = vect_get_internal_def (vinfo, plus_input);
1824 if (!plus_input_stmt_info)
1825 return NULL;
1826 gassign *plus_input_stmt
1827 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1828 if (!plus_input_stmt
1829 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1830 return NULL;
1831
1832 /* Look through any change in sign on the scaling input. */
1833 vect_unpromoted_value unprom_scale_input;
1834 tree scale_input = vect_look_through_possible_promotion
1835 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1836 if (!scale_input
1837 || TYPE_PRECISION (TREE_TYPE (scale_input))
1838 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1839 return NULL;
1840
1841 /* Get the definition of the multiply-high part. */
1842 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1843 if (!mulh_stmt_info)
1844 return NULL;
1845
1846 /* Get the scaling term. */
1847 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1848
1849 expect_offset = target_precision + 2;
1850 ifn = IFN_MULHRS;
1851 }
1852 else
1853 {
1854 mulh_stmt_info = rshift_input_stmt_info;
1855 scale_term = gimple_assign_rhs2 (last_stmt);
1856
1857 expect_offset = target_precision + 1;
1858 ifn = IFN_MULHS;
1859 }
1860
1861 /* Check that the scaling factor is correct. */
1862 if (TREE_CODE (scale_term) != INTEGER_CST
1863 || wi::to_widest (scale_term) + expect_offset
1864 != TYPE_PRECISION (lhs_type))
1865 return NULL;
1866
1867 /* Check whether the scaling input term can be seen as two widened
1868 inputs multiplied together. */
1869 vect_unpromoted_value unprom_mult[2];
1870 tree new_type;
1871 unsigned int nops
1872 = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1873 false, 2, unprom_mult, &new_type);
1874 if (nops != 2)
1875 return NULL;
1876
1877 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1878
1879 /* Adjust output precision. */
1880 if (TYPE_PRECISION (new_type) < target_precision)
1881 new_type = build_nonstandard_integer_type
1882 (target_precision, TYPE_UNSIGNED (new_type));
1883
1884 /* Check for target support. */
1885 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1886 if (!new_vectype
1887 || !direct_internal_fn_supported_p
1888 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1889 return NULL;
1890
1891 /* The IR requires a valid vector type for the cast result, even though
1892 it's likely to be discarded. */
1893 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1894 if (!*type_out)
1895 return NULL;
1896
1897 /* Generate the IFN_MULHRS call. */
1898 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1899 tree new_ops[2];
1900 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1901 unprom_mult, new_vectype);
1902 gcall *mulhrs_stmt
1903 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1904 gimple_call_set_lhs (mulhrs_stmt, new_var);
1905 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1906
1907 if (dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE, vect_location,
1909 "created pattern stmt: %G", mulhrs_stmt);
1910
1911 return vect_convert_output (last_stmt_info, lhs_type,
1912 mulhrs_stmt, new_vectype);
1913 }
1914
1915 /* Recognize the patterns:
1916
1917 ATYPE a; // narrower than TYPE
1918 BTYPE b; // narrower than TYPE
1919 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1920 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1921
1922 where only the bottom half of avg is used. Try to transform them into:
1923
1924 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1925 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1926
1927 followed by:
1928
1929 TYPE avg = (TYPE) avg';
1930
1931 where NTYPE is no wider than half of TYPE. Since only the bottom half
1932 of avg is used, all or part of the cast of avg' should become redundant.
1933
1934 If there is no target support available, generate code to distribute rshift
1935 over plus and add a carry. */
1936
1937 static gimple *
vect_recog_average_pattern(stmt_vec_info last_stmt_info,tree * type_out)1938 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1939 {
1940 /* Check for a shift right by one bit. */
1941 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1942 vec_info *vinfo = last_stmt_info->vinfo;
1943 if (!last_stmt
1944 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1945 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1946 return NULL;
1947
1948 /* Check that the shift result is wider than the users of the
1949 result need (i.e. that narrowing would be a natural choice). */
1950 tree lhs = gimple_assign_lhs (last_stmt);
1951 tree type = TREE_TYPE (lhs);
1952 unsigned int target_precision
1953 = vect_element_precision (last_stmt_info->min_output_precision);
1954 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1955 return NULL;
1956
1957 /* Look through any change in sign on the shift input. */
1958 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1959 vect_unpromoted_value unprom_plus;
1960 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1961 &unprom_plus);
1962 if (!rshift_rhs
1963 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1964 return NULL;
1965
1966 /* Get the definition of the shift input. */
1967 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1968 if (!plus_stmt_info)
1969 return NULL;
1970
1971 /* Check whether the shift input can be seen as a tree of additions on
1972 2 or 3 widened inputs.
1973
1974 Note that the pattern should be a win even if the result of one or
1975 more additions is reused elsewhere: if the pattern matches, we'd be
1976 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1977 internal_fn ifn = IFN_AVG_FLOOR;
1978 vect_unpromoted_value unprom[3];
1979 tree new_type;
1980 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1981 PLUS_EXPR, false, 3,
1982 unprom, &new_type);
1983 if (nops == 0)
1984 return NULL;
1985 if (nops == 3)
1986 {
1987 /* Check that one operand is 1. */
1988 unsigned int i;
1989 for (i = 0; i < 3; ++i)
1990 if (integer_onep (unprom[i].op))
1991 break;
1992 if (i == 3)
1993 return NULL;
1994 /* Throw away the 1 operand and keep the other two. */
1995 if (i < 2)
1996 unprom[i] = unprom[2];
1997 ifn = IFN_AVG_CEIL;
1998 }
1999
2000 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2001
2002 /* We know that:
2003
2004 (a) the operation can be viewed as:
2005
2006 TYPE widened0 = (TYPE) UNPROM[0];
2007 TYPE widened1 = (TYPE) UNPROM[1];
2008 TYPE tmp1 = widened0 + widened1 {+ 1};
2009 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2010
2011 (b) the first two statements are equivalent to:
2012
2013 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2014 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2015
2016 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2017 where sensible;
2018
2019 (d) all the operations can be performed correctly at twice the width of
2020 NEW_TYPE, due to the nature of the average operation; and
2021
2022 (e) users of the result of the right shift need only TARGET_PRECISION
2023 bits, where TARGET_PRECISION is no more than half of TYPE's
2024 precision.
2025
2026 Under these circumstances, the only situation in which NEW_TYPE
2027 could be narrower than TARGET_PRECISION is if widened0, widened1
2028 and an addition result are all used more than once. Thus we can
2029 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2030 as "free", whereas widening the result of the average instruction
2031 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2032 therefore better not to go narrower than TARGET_PRECISION. */
2033 if (TYPE_PRECISION (new_type) < target_precision)
2034 new_type = build_nonstandard_integer_type (target_precision,
2035 TYPE_UNSIGNED (new_type));
2036
2037 /* Check for target support. */
2038 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2039 if (!new_vectype)
2040 return NULL;
2041
2042 bool fallback_p = false;
2043
2044 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2045 ;
2046 else if (TYPE_UNSIGNED (new_type)
2047 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2048 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2049 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2050 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2051 fallback_p = true;
2052 else
2053 return NULL;
2054
2055 /* The IR requires a valid vector type for the cast result, even though
2056 it's likely to be discarded. */
2057 *type_out = get_vectype_for_scalar_type (vinfo, type);
2058 if (!*type_out)
2059 return NULL;
2060
2061 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2062 tree new_ops[2];
2063 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2064 unprom, new_vectype);
2065
2066 if (fallback_p)
2067 {
2068 /* As a fallback, generate code for following sequence:
2069
2070 shifted_op0 = new_ops[0] >> 1;
2071 shifted_op1 = new_ops[1] >> 1;
2072 sum_of_shifted = shifted_op0 + shifted_op1;
2073 unmasked_carry = new_ops[0] and/or new_ops[1];
2074 carry = unmasked_carry & 1;
2075 new_var = sum_of_shifted + carry;
2076 */
2077
2078 tree one_cst = build_one_cst (new_type);
2079 gassign *g;
2080
2081 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2082 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2083 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2084
2085 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2086 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2087 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2088
2089 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2090 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2091 shifted_op0, shifted_op1);
2092 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2093
2094 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2095 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2096 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2097 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2098
2099 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2100 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2101 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2102
2103 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2104 return vect_convert_output (last_stmt_info, type, g, new_vectype);
2105 }
2106
2107 /* Generate the IFN_AVG* call. */
2108 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2109 new_ops[1]);
2110 gimple_call_set_lhs (average_stmt, new_var);
2111 gimple_set_location (average_stmt, gimple_location (last_stmt));
2112
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "created pattern stmt: %G", average_stmt);
2116
2117 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2118 }
2119
2120 /* Recognize cases in which the input to a cast is wider than its
2121 output, and the input is fed by a widening operation. Fold this
2122 by removing the unnecessary intermediate widening. E.g.:
2123
2124 unsigned char a;
2125 unsigned int b = (unsigned int) a;
2126 unsigned short c = (unsigned short) b;
2127
2128 -->
2129
2130 unsigned short c = (unsigned short) a;
2131
2132 Although this is rare in input IR, it is an expected side-effect
2133 of the over-widening pattern above.
2134
2135 This is beneficial also for integer-to-float conversions, if the
2136 widened integer has more bits than the float, and if the unwidened
2137 input doesn't. */
2138
2139 static gimple *
vect_recog_cast_forwprop_pattern(stmt_vec_info last_stmt_info,tree * type_out)2140 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2141 {
2142 /* Check for a cast, including an integer-to-float conversion. */
2143 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2144 if (!last_stmt)
2145 return NULL;
2146 tree_code code = gimple_assign_rhs_code (last_stmt);
2147 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2148 return NULL;
2149
2150 /* Make sure that the rhs is a scalar with a natural bitsize. */
2151 tree lhs = gimple_assign_lhs (last_stmt);
2152 if (!lhs)
2153 return NULL;
2154 tree lhs_type = TREE_TYPE (lhs);
2155 scalar_mode lhs_mode;
2156 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2157 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2158 return NULL;
2159
2160 /* Check for a narrowing operation (from a vector point of view). */
2161 tree rhs = gimple_assign_rhs1 (last_stmt);
2162 tree rhs_type = TREE_TYPE (rhs);
2163 if (!INTEGRAL_TYPE_P (rhs_type)
2164 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2165 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2166 return NULL;
2167
2168 /* Try to find an unpromoted input. */
2169 vec_info *vinfo = last_stmt_info->vinfo;
2170 vect_unpromoted_value unprom;
2171 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2172 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2173 return NULL;
2174
2175 /* If the bits above RHS_TYPE matter, make sure that they're the
2176 same when extending from UNPROM as they are when extending from RHS. */
2177 if (!INTEGRAL_TYPE_P (lhs_type)
2178 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2179 return NULL;
2180
2181 /* We can get the same result by casting UNPROM directly, to avoid
2182 the unnecessary widening and narrowing. */
2183 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2184
2185 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2186 if (!*type_out)
2187 return NULL;
2188
2189 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2190 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2191 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2192
2193 return pattern_stmt;
2194 }
2195
2196 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2197 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2198
2199 static gimple *
vect_recog_widen_shift_pattern(stmt_vec_info last_stmt_info,tree * type_out)2200 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
2201 {
2202 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
2203 WIDEN_LSHIFT_EXPR, true,
2204 "vect_recog_widen_shift_pattern");
2205 }
2206
2207 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2208
2209 type a_t, b_t, c_t;
2210
2211 S0 a_t = b_t r<< c_t;
2212
2213 Input/Output:
2214
2215 * STMT_VINFO: The stmt from which the pattern search begins,
2216 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2217 with a sequence:
2218
2219 S1 d_t = -c_t;
2220 S2 e_t = d_t & (B - 1);
2221 S3 f_t = b_t << c_t;
2222 S4 g_t = b_t >> e_t;
2223 S0 a_t = f_t | g_t;
2224
2225 where B is element bitsize of type.
2226
2227 Output:
2228
2229 * TYPE_OUT: The type of the output of this pattern.
2230
2231 * Return value: A new stmt that will be used to replace the rotate
2232 S0 stmt. */
2233
2234 static gimple *
vect_recog_rotate_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2235 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2236 {
2237 gimple *last_stmt = stmt_vinfo->stmt;
2238 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2239 gimple *pattern_stmt, *def_stmt;
2240 enum tree_code rhs_code;
2241 vec_info *vinfo = stmt_vinfo->vinfo;
2242 enum vect_def_type dt;
2243 optab optab1, optab2;
2244 edge ext_def = NULL;
2245 bool bswap16_p = false;
2246
2247 if (is_gimple_assign (last_stmt))
2248 {
2249 rhs_code = gimple_assign_rhs_code (last_stmt);
2250 switch (rhs_code)
2251 {
2252 case LROTATE_EXPR:
2253 case RROTATE_EXPR:
2254 break;
2255 default:
2256 return NULL;
2257 }
2258
2259 lhs = gimple_assign_lhs (last_stmt);
2260 oprnd0 = gimple_assign_rhs1 (last_stmt);
2261 type = TREE_TYPE (oprnd0);
2262 oprnd1 = gimple_assign_rhs2 (last_stmt);
2263 }
2264 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2265 {
2266 /* __builtin_bswap16 (x) is another form of x r>> 8.
2267 The vectorizer has bswap support, but only if the argument isn't
2268 promoted. */
2269 lhs = gimple_call_lhs (last_stmt);
2270 oprnd0 = gimple_call_arg (last_stmt, 0);
2271 type = TREE_TYPE (oprnd0);
2272 if (!lhs
2273 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2274 || TYPE_PRECISION (type) <= 16
2275 || TREE_CODE (oprnd0) != SSA_NAME
2276 || BITS_PER_UNIT != 8
2277 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2278 return NULL;
2279
2280 stmt_vec_info def_stmt_info;
2281 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2282 return NULL;
2283
2284 if (dt != vect_internal_def)
2285 return NULL;
2286
2287 if (gimple_assign_cast_p (def_stmt))
2288 {
2289 def = gimple_assign_rhs1 (def_stmt);
2290 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2291 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2292 oprnd0 = def;
2293 }
2294
2295 type = TREE_TYPE (lhs);
2296 vectype = get_vectype_for_scalar_type (vinfo, type);
2297 if (vectype == NULL_TREE)
2298 return NULL;
2299
2300 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2301 {
2302 /* The encoding uses one stepped pattern for each byte in the
2303 16-bit word. */
2304 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2305 for (unsigned i = 0; i < 3; ++i)
2306 for (unsigned j = 0; j < 2; ++j)
2307 elts.quick_push ((i + 1) * 2 - j - 1);
2308
2309 vec_perm_indices indices (elts, 1,
2310 TYPE_VECTOR_SUBPARTS (char_vectype));
2311 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2312 {
2313 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2314 undo the argument promotion. */
2315 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2316 {
2317 def = vect_recog_temp_ssa_var (type, NULL);
2318 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2319 append_pattern_def_seq (stmt_vinfo, def_stmt);
2320 oprnd0 = def;
2321 }
2322
2323 /* Pattern detected. */
2324 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2325
2326 *type_out = vectype;
2327
2328 /* Pattern supported. Create a stmt to be used to replace the
2329 pattern, with the unpromoted argument. */
2330 var = vect_recog_temp_ssa_var (type, NULL);
2331 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2332 1, oprnd0);
2333 gimple_call_set_lhs (pattern_stmt, var);
2334 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2335 gimple_call_fntype (last_stmt));
2336 return pattern_stmt;
2337 }
2338 }
2339
2340 oprnd1 = build_int_cst (integer_type_node, 8);
2341 rhs_code = LROTATE_EXPR;
2342 bswap16_p = true;
2343 }
2344 else
2345 return NULL;
2346
2347 if (TREE_CODE (oprnd0) != SSA_NAME
2348 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2349 || !INTEGRAL_TYPE_P (type)
2350 || !TYPE_UNSIGNED (type))
2351 return NULL;
2352
2353 stmt_vec_info def_stmt_info;
2354 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2355 return NULL;
2356
2357 if (dt != vect_internal_def
2358 && dt != vect_constant_def
2359 && dt != vect_external_def)
2360 return NULL;
2361
2362 vectype = get_vectype_for_scalar_type (vinfo, type);
2363 if (vectype == NULL_TREE)
2364 return NULL;
2365
2366 /* If vector/vector or vector/scalar rotate is supported by the target,
2367 don't do anything here. */
2368 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2369 if (optab1
2370 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2371 {
2372 use_rotate:
2373 if (bswap16_p)
2374 {
2375 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2376 {
2377 def = vect_recog_temp_ssa_var (type, NULL);
2378 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2379 append_pattern_def_seq (stmt_vinfo, def_stmt);
2380 oprnd0 = def;
2381 }
2382
2383 /* Pattern detected. */
2384 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2385
2386 *type_out = vectype;
2387
2388 /* Pattern supported. Create a stmt to be used to replace the
2389 pattern. */
2390 var = vect_recog_temp_ssa_var (type, NULL);
2391 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2392 oprnd1);
2393 return pattern_stmt;
2394 }
2395 return NULL;
2396 }
2397
2398 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2399 {
2400 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2401 if (optab2
2402 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2403 goto use_rotate;
2404 }
2405
2406 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2407 don't do anything here either. */
2408 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2409 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2410 if (!optab1
2411 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2412 || !optab2
2413 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2414 {
2415 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2416 return NULL;
2417 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2418 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2419 if (!optab1
2420 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2421 || !optab2
2422 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2423 return NULL;
2424 }
2425
2426 *type_out = vectype;
2427
2428 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2429 {
2430 def = vect_recog_temp_ssa_var (type, NULL);
2431 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2432 append_pattern_def_seq (stmt_vinfo, def_stmt);
2433 oprnd0 = def;
2434 }
2435
2436 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2437 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2438
2439 def = NULL_TREE;
2440 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2441 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2442 def = oprnd1;
2443 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2444 {
2445 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2446 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2447 && TYPE_PRECISION (TREE_TYPE (rhs1))
2448 == TYPE_PRECISION (type))
2449 def = rhs1;
2450 }
2451
2452 if (def == NULL_TREE)
2453 {
2454 def = vect_recog_temp_ssa_var (type, NULL);
2455 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2456 append_pattern_def_seq (stmt_vinfo, def_stmt);
2457 }
2458 stype = TREE_TYPE (def);
2459
2460 if (TREE_CODE (def) == INTEGER_CST)
2461 {
2462 if (!tree_fits_uhwi_p (def)
2463 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2464 || integer_zerop (def))
2465 return NULL;
2466 def2 = build_int_cst (stype,
2467 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2468 }
2469 else
2470 {
2471 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2472
2473 if (vecstype == NULL_TREE)
2474 return NULL;
2475 def2 = vect_recog_temp_ssa_var (stype, NULL);
2476 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2477 if (ext_def)
2478 {
2479 basic_block new_bb
2480 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2481 gcc_assert (!new_bb);
2482 }
2483 else
2484 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2485
2486 def2 = vect_recog_temp_ssa_var (stype, NULL);
2487 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2488 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2489 gimple_assign_lhs (def_stmt), mask);
2490 if (ext_def)
2491 {
2492 basic_block new_bb
2493 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2494 gcc_assert (!new_bb);
2495 }
2496 else
2497 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2498 }
2499
2500 var1 = vect_recog_temp_ssa_var (type, NULL);
2501 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2502 ? LSHIFT_EXPR : RSHIFT_EXPR,
2503 oprnd0, def);
2504 append_pattern_def_seq (stmt_vinfo, def_stmt);
2505
2506 var2 = vect_recog_temp_ssa_var (type, NULL);
2507 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2508 ? RSHIFT_EXPR : LSHIFT_EXPR,
2509 oprnd0, def2);
2510 append_pattern_def_seq (stmt_vinfo, def_stmt);
2511
2512 /* Pattern detected. */
2513 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2514
2515 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2516 var = vect_recog_temp_ssa_var (type, NULL);
2517 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2518
2519 return pattern_stmt;
2520 }
2521
2522 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2523 vectorized:
2524
2525 type a_t;
2526 TYPE b_T, res_T;
2527
2528 S1 a_t = ;
2529 S2 b_T = ;
2530 S3 res_T = b_T op a_t;
2531
2532 where type 'TYPE' is a type with different size than 'type',
2533 and op is <<, >> or rotate.
2534
2535 Also detect cases:
2536
2537 type a_t;
2538 TYPE b_T, c_T, res_T;
2539
2540 S0 c_T = ;
2541 S1 a_t = (type) c_T;
2542 S2 b_T = ;
2543 S3 res_T = b_T op a_t;
2544
2545 Input/Output:
2546
2547 * STMT_VINFO: The stmt from which the pattern search begins,
2548 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2549 with a shift/rotate which has same type on both operands, in the
2550 second case just b_T op c_T, in the first case with added cast
2551 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2552
2553 Output:
2554
2555 * TYPE_OUT: The type of the output of this pattern.
2556
2557 * Return value: A new stmt that will be used to replace the shift/rotate
2558 S3 stmt. */
2559
2560 static gimple *
vect_recog_vector_vector_shift_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2561 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2562 tree *type_out)
2563 {
2564 gimple *last_stmt = stmt_vinfo->stmt;
2565 tree oprnd0, oprnd1, lhs, var;
2566 gimple *pattern_stmt;
2567 enum tree_code rhs_code;
2568 vec_info *vinfo = stmt_vinfo->vinfo;
2569
2570 if (!is_gimple_assign (last_stmt))
2571 return NULL;
2572
2573 rhs_code = gimple_assign_rhs_code (last_stmt);
2574 switch (rhs_code)
2575 {
2576 case LSHIFT_EXPR:
2577 case RSHIFT_EXPR:
2578 case LROTATE_EXPR:
2579 case RROTATE_EXPR:
2580 break;
2581 default:
2582 return NULL;
2583 }
2584
2585 lhs = gimple_assign_lhs (last_stmt);
2586 oprnd0 = gimple_assign_rhs1 (last_stmt);
2587 oprnd1 = gimple_assign_rhs2 (last_stmt);
2588 if (TREE_CODE (oprnd0) != SSA_NAME
2589 || TREE_CODE (oprnd1) != SSA_NAME
2590 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2591 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2592 || TYPE_PRECISION (TREE_TYPE (lhs))
2593 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2594 return NULL;
2595
2596 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2597 if (!def_vinfo)
2598 return NULL;
2599
2600 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2601 if (*type_out == NULL_TREE)
2602 return NULL;
2603
2604 tree def = NULL_TREE;
2605 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2606 if (def_stmt && gimple_assign_cast_p (def_stmt))
2607 {
2608 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2609 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2610 && TYPE_PRECISION (TREE_TYPE (rhs1))
2611 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2612 {
2613 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2614 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2615 def = rhs1;
2616 else
2617 {
2618 tree mask
2619 = build_low_bits_mask (TREE_TYPE (rhs1),
2620 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2621 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2622 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2623 tree vecstype = get_vectype_for_scalar_type (vinfo,
2624 TREE_TYPE (rhs1));
2625 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2626 }
2627 }
2628 }
2629
2630 if (def == NULL_TREE)
2631 {
2632 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2633 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2634 append_pattern_def_seq (stmt_vinfo, def_stmt);
2635 }
2636
2637 /* Pattern detected. */
2638 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2639
2640 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2641 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2642 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2643
2644 return pattern_stmt;
2645 }
2646
2647 /* Return true iff the target has a vector optab implementing the operation
2648 CODE on type VECTYPE. */
2649
2650 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2651 target_has_vecop_for_code (tree_code code, tree vectype)
2652 {
2653 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2654 return voptab
2655 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2656 }
2657
2658 /* Verify that the target has optabs of VECTYPE to perform all the steps
2659 needed by the multiplication-by-immediate synthesis algorithm described by
2660 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2661 present. Return true iff the target supports all the steps. */
2662
2663 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2664 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2665 tree vectype, bool synth_shift_p)
2666 {
2667 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2668 return false;
2669
2670 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2671 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2672
2673 if (var == negate_variant
2674 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2675 return false;
2676
2677 /* If we must synthesize shifts with additions make sure that vector
2678 addition is available. */
2679 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2680 return false;
2681
2682 for (int i = 1; i < alg->ops; i++)
2683 {
2684 switch (alg->op[i])
2685 {
2686 case alg_shift:
2687 break;
2688 case alg_add_t_m2:
2689 case alg_add_t2_m:
2690 case alg_add_factor:
2691 if (!supports_vplus)
2692 return false;
2693 break;
2694 case alg_sub_t_m2:
2695 case alg_sub_t2_m:
2696 case alg_sub_factor:
2697 if (!supports_vminus)
2698 return false;
2699 break;
2700 case alg_unknown:
2701 case alg_m:
2702 case alg_zero:
2703 case alg_impossible:
2704 return false;
2705 default:
2706 gcc_unreachable ();
2707 }
2708 }
2709
2710 return true;
2711 }
2712
2713 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2714 putting the final result in DEST. Append all statements but the last into
2715 VINFO. Return the last statement. */
2716
2717 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2718 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2719 stmt_vec_info vinfo)
2720 {
2721 HOST_WIDE_INT i;
2722 tree itype = TREE_TYPE (op);
2723 tree prev_res = op;
2724 gcc_assert (amnt >= 0);
2725 for (i = 0; i < amnt; i++)
2726 {
2727 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2728 : dest;
2729 gimple *stmt
2730 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2731 prev_res = tmp_var;
2732 if (i < amnt - 1)
2733 append_pattern_def_seq (vinfo, stmt);
2734 else
2735 return stmt;
2736 }
2737 gcc_unreachable ();
2738 return NULL;
2739 }
2740
2741 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2742 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2743 the process if necessary. Append the resulting assignment statements
2744 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2745 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2746 left shifts using additions. */
2747
2748 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2749 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2750 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2751 {
2752 if (integer_zerop (op2)
2753 && (code == LSHIFT_EXPR
2754 || code == PLUS_EXPR))
2755 {
2756 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2757 return op1;
2758 }
2759
2760 gimple *stmt;
2761 tree itype = TREE_TYPE (op1);
2762 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2763
2764 if (code == LSHIFT_EXPR
2765 && synth_shift_p)
2766 {
2767 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2768 stmt_vinfo);
2769 append_pattern_def_seq (stmt_vinfo, stmt);
2770 return tmp_var;
2771 }
2772
2773 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2774 append_pattern_def_seq (stmt_vinfo, stmt);
2775 return tmp_var;
2776 }
2777
2778 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2779 and simple arithmetic operations to be vectorized. Record the statements
2780 produced in STMT_VINFO and return the last statement in the sequence or
2781 NULL if it's not possible to synthesize such a multiplication.
2782 This function mirrors the behavior of expand_mult_const in expmed.c but
2783 works on tree-ssa form. */
2784
2785 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2786 vect_synth_mult_by_constant (tree op, tree val,
2787 stmt_vec_info stmt_vinfo)
2788 {
2789 vec_info *vinfo = stmt_vinfo->vinfo;
2790 tree itype = TREE_TYPE (op);
2791 machine_mode mode = TYPE_MODE (itype);
2792 struct algorithm alg;
2793 mult_variant variant;
2794 if (!tree_fits_shwi_p (val))
2795 return NULL;
2796
2797 /* Multiplication synthesis by shifts, adds and subs can introduce
2798 signed overflow where the original operation didn't. Perform the
2799 operations on an unsigned type and cast back to avoid this.
2800 In the future we may want to relax this for synthesis algorithms
2801 that we can prove do not cause unexpected overflow. */
2802 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2803
2804 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2805
2806 /* Targets that don't support vector shifts but support vector additions
2807 can synthesize shifts that way. */
2808 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2809
2810 HOST_WIDE_INT hwval = tree_to_shwi (val);
2811 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2812 The vectorizer's benefit analysis will decide whether it's beneficial
2813 to do this. */
2814 bool possible = choose_mult_variant (mode, hwval, &alg,
2815 &variant, MAX_COST);
2816 if (!possible)
2817 return NULL;
2818
2819 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2820
2821 if (!vectype
2822 || !target_supports_mult_synth_alg (&alg, variant,
2823 vectype, synth_shift_p))
2824 return NULL;
2825
2826 tree accumulator;
2827
2828 /* Clear out the sequence of statements so we can populate it below. */
2829 gimple *stmt = NULL;
2830
2831 if (cast_to_unsigned_p)
2832 {
2833 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2834 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2835 append_pattern_def_seq (stmt_vinfo, stmt);
2836 op = tmp_op;
2837 }
2838
2839 if (alg.op[0] == alg_zero)
2840 accumulator = build_int_cst (multtype, 0);
2841 else
2842 accumulator = op;
2843
2844 bool needs_fixup = (variant == negate_variant)
2845 || (variant == add_variant);
2846
2847 for (int i = 1; i < alg.ops; i++)
2848 {
2849 tree shft_log = build_int_cst (multtype, alg.log[i]);
2850 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2851 tree tmp_var = NULL_TREE;
2852
2853 switch (alg.op[i])
2854 {
2855 case alg_shift:
2856 if (synth_shift_p)
2857 stmt
2858 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2859 stmt_vinfo);
2860 else
2861 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2862 shft_log);
2863 break;
2864 case alg_add_t_m2:
2865 tmp_var
2866 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2867 stmt_vinfo, synth_shift_p);
2868 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2869 tmp_var);
2870 break;
2871 case alg_sub_t_m2:
2872 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2873 shft_log, stmt_vinfo,
2874 synth_shift_p);
2875 /* In some algorithms the first step involves zeroing the
2876 accumulator. If subtracting from such an accumulator
2877 just emit the negation directly. */
2878 if (integer_zerop (accumulator))
2879 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2880 else
2881 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2882 tmp_var);
2883 break;
2884 case alg_add_t2_m:
2885 tmp_var
2886 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2887 stmt_vinfo, synth_shift_p);
2888 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2889 break;
2890 case alg_sub_t2_m:
2891 tmp_var
2892 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2893 stmt_vinfo, synth_shift_p);
2894 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2895 break;
2896 case alg_add_factor:
2897 tmp_var
2898 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2899 stmt_vinfo, synth_shift_p);
2900 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2901 tmp_var);
2902 break;
2903 case alg_sub_factor:
2904 tmp_var
2905 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2906 stmt_vinfo, synth_shift_p);
2907 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2908 accumulator);
2909 break;
2910 default:
2911 gcc_unreachable ();
2912 }
2913 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2914 but rather return it directly. */
2915
2916 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2917 append_pattern_def_seq (stmt_vinfo, stmt);
2918 accumulator = accum_tmp;
2919 }
2920 if (variant == negate_variant)
2921 {
2922 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2923 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2924 accumulator = accum_tmp;
2925 if (cast_to_unsigned_p)
2926 append_pattern_def_seq (stmt_vinfo, stmt);
2927 }
2928 else if (variant == add_variant)
2929 {
2930 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2931 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2932 accumulator = accum_tmp;
2933 if (cast_to_unsigned_p)
2934 append_pattern_def_seq (stmt_vinfo, stmt);
2935 }
2936 /* Move back to a signed if needed. */
2937 if (cast_to_unsigned_p)
2938 {
2939 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2940 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2941 }
2942
2943 return stmt;
2944 }
2945
2946 /* Detect multiplication by constant and convert it into a sequence of
2947 shifts and additions, subtractions, negations. We reuse the
2948 choose_mult_variant algorithms from expmed.c
2949
2950 Input/Output:
2951
2952 STMT_VINFO: The stmt from which the pattern search begins,
2953 i.e. the mult stmt.
2954
2955 Output:
2956
2957 * TYPE_OUT: The type of the output of this pattern.
2958
2959 * Return value: A new stmt that will be used to replace
2960 the multiplication. */
2961
2962 static gimple *
vect_recog_mult_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2963 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2964 {
2965 vec_info *vinfo = stmt_vinfo->vinfo;
2966 gimple *last_stmt = stmt_vinfo->stmt;
2967 tree oprnd0, oprnd1, vectype, itype;
2968 gimple *pattern_stmt;
2969
2970 if (!is_gimple_assign (last_stmt))
2971 return NULL;
2972
2973 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2974 return NULL;
2975
2976 oprnd0 = gimple_assign_rhs1 (last_stmt);
2977 oprnd1 = gimple_assign_rhs2 (last_stmt);
2978 itype = TREE_TYPE (oprnd0);
2979
2980 if (TREE_CODE (oprnd0) != SSA_NAME
2981 || TREE_CODE (oprnd1) != INTEGER_CST
2982 || !INTEGRAL_TYPE_P (itype)
2983 || !type_has_mode_precision_p (itype))
2984 return NULL;
2985
2986 vectype = get_vectype_for_scalar_type (vinfo, itype);
2987 if (vectype == NULL_TREE)
2988 return NULL;
2989
2990 /* If the target can handle vectorized multiplication natively,
2991 don't attempt to optimize this. */
2992 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2993 if (mul_optab != unknown_optab)
2994 {
2995 machine_mode vec_mode = TYPE_MODE (vectype);
2996 int icode = (int) optab_handler (mul_optab, vec_mode);
2997 if (icode != CODE_FOR_nothing)
2998 return NULL;
2999 }
3000
3001 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
3002 if (!pattern_stmt)
3003 return NULL;
3004
3005 /* Pattern detected. */
3006 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3007
3008 *type_out = vectype;
3009
3010 return pattern_stmt;
3011 }
3012
3013 /* Detect a signed division by a constant that wouldn't be
3014 otherwise vectorized:
3015
3016 type a_t, b_t;
3017
3018 S1 a_t = b_t / N;
3019
3020 where type 'type' is an integral type and N is a constant.
3021
3022 Similarly handle modulo by a constant:
3023
3024 S4 a_t = b_t % N;
3025
3026 Input/Output:
3027
3028 * STMT_VINFO: The stmt from which the pattern search begins,
3029 i.e. the division stmt. S1 is replaced by if N is a power
3030 of two constant and type is signed:
3031 S3 y_t = b_t < 0 ? N - 1 : 0;
3032 S2 x_t = b_t + y_t;
3033 S1' a_t = x_t >> log2 (N);
3034
3035 S4 is replaced if N is a power of two constant and
3036 type is signed by (where *_T temporaries have unsigned type):
3037 S9 y_T = b_t < 0 ? -1U : 0U;
3038 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3039 S7 z_t = (type) z_T;
3040 S6 w_t = b_t + z_t;
3041 S5 x_t = w_t & (N - 1);
3042 S4' a_t = x_t - z_t;
3043
3044 Output:
3045
3046 * TYPE_OUT: The type of the output of this pattern.
3047
3048 * Return value: A new stmt that will be used to replace the division
3049 S1 or modulo S4 stmt. */
3050
3051 static gimple *
vect_recog_divmod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3052 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3053 {
3054 vec_info *vinfo = stmt_vinfo->vinfo;
3055 gimple *last_stmt = stmt_vinfo->stmt;
3056 tree oprnd0, oprnd1, vectype, itype, cond;
3057 gimple *pattern_stmt, *def_stmt;
3058 enum tree_code rhs_code;
3059 optab optab;
3060 tree q;
3061 int dummy_int, prec;
3062
3063 if (!is_gimple_assign (last_stmt))
3064 return NULL;
3065
3066 rhs_code = gimple_assign_rhs_code (last_stmt);
3067 switch (rhs_code)
3068 {
3069 case TRUNC_DIV_EXPR:
3070 case EXACT_DIV_EXPR:
3071 case TRUNC_MOD_EXPR:
3072 break;
3073 default:
3074 return NULL;
3075 }
3076
3077 oprnd0 = gimple_assign_rhs1 (last_stmt);
3078 oprnd1 = gimple_assign_rhs2 (last_stmt);
3079 itype = TREE_TYPE (oprnd0);
3080 if (TREE_CODE (oprnd0) != SSA_NAME
3081 || TREE_CODE (oprnd1) != INTEGER_CST
3082 || TREE_CODE (itype) != INTEGER_TYPE
3083 || !type_has_mode_precision_p (itype))
3084 return NULL;
3085
3086 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3087 vectype = get_vectype_for_scalar_type (vinfo, itype);
3088 if (vectype == NULL_TREE)
3089 return NULL;
3090
3091 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3092 {
3093 /* If the target can handle vectorized division or modulo natively,
3094 don't attempt to optimize this, since native division is likely
3095 to give smaller code. */
3096 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3097 if (optab != unknown_optab)
3098 {
3099 machine_mode vec_mode = TYPE_MODE (vectype);
3100 int icode = (int) optab_handler (optab, vec_mode);
3101 if (icode != CODE_FOR_nothing)
3102 return NULL;
3103 }
3104 }
3105
3106 prec = TYPE_PRECISION (itype);
3107 if (integer_pow2p (oprnd1))
3108 {
3109 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3110 return NULL;
3111
3112 /* Pattern detected. */
3113 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3114
3115 *type_out = vectype;
3116
3117 /* Check if the target supports this internal function. */
3118 internal_fn ifn = IFN_DIV_POW2;
3119 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3120 {
3121 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3122
3123 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3124 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3125 gimple_call_set_lhs (div_stmt, var_div);
3126
3127 if (rhs_code == TRUNC_MOD_EXPR)
3128 {
3129 append_pattern_def_seq (stmt_vinfo, div_stmt);
3130 def_stmt
3131 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3132 LSHIFT_EXPR, var_div, shift);
3133 append_pattern_def_seq (stmt_vinfo, def_stmt);
3134 pattern_stmt
3135 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3136 MINUS_EXPR, oprnd0,
3137 gimple_assign_lhs (def_stmt));
3138 }
3139 else
3140 pattern_stmt = div_stmt;
3141 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3142
3143 return pattern_stmt;
3144 }
3145
3146 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3147 build_int_cst (itype, 0));
3148 if (rhs_code == TRUNC_DIV_EXPR
3149 || rhs_code == EXACT_DIV_EXPR)
3150 {
3151 tree var = vect_recog_temp_ssa_var (itype, NULL);
3152 tree shift;
3153 def_stmt
3154 = gimple_build_assign (var, COND_EXPR, cond,
3155 fold_build2 (MINUS_EXPR, itype, oprnd1,
3156 build_int_cst (itype, 1)),
3157 build_int_cst (itype, 0));
3158 append_pattern_def_seq (stmt_vinfo, def_stmt);
3159 var = vect_recog_temp_ssa_var (itype, NULL);
3160 def_stmt
3161 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3162 gimple_assign_lhs (def_stmt));
3163 append_pattern_def_seq (stmt_vinfo, def_stmt);
3164
3165 shift = build_int_cst (itype, tree_log2 (oprnd1));
3166 pattern_stmt
3167 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 RSHIFT_EXPR, var, shift);
3169 }
3170 else
3171 {
3172 tree signmask;
3173 if (compare_tree_int (oprnd1, 2) == 0)
3174 {
3175 signmask = vect_recog_temp_ssa_var (itype, NULL);
3176 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3177 build_int_cst (itype, 1),
3178 build_int_cst (itype, 0));
3179 append_pattern_def_seq (stmt_vinfo, def_stmt);
3180 }
3181 else
3182 {
3183 tree utype
3184 = build_nonstandard_integer_type (prec, 1);
3185 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3186 tree shift
3187 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3188 - tree_log2 (oprnd1));
3189 tree var = vect_recog_temp_ssa_var (utype, NULL);
3190
3191 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3192 build_int_cst (utype, -1),
3193 build_int_cst (utype, 0));
3194 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3195 var = vect_recog_temp_ssa_var (utype, NULL);
3196 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3197 gimple_assign_lhs (def_stmt),
3198 shift);
3199 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
3200 signmask = vect_recog_temp_ssa_var (itype, NULL);
3201 def_stmt
3202 = gimple_build_assign (signmask, NOP_EXPR, var);
3203 append_pattern_def_seq (stmt_vinfo, def_stmt);
3204 }
3205 def_stmt
3206 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3207 PLUS_EXPR, oprnd0, signmask);
3208 append_pattern_def_seq (stmt_vinfo, def_stmt);
3209 def_stmt
3210 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3211 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3212 fold_build2 (MINUS_EXPR, itype, oprnd1,
3213 build_int_cst (itype, 1)));
3214 append_pattern_def_seq (stmt_vinfo, def_stmt);
3215
3216 pattern_stmt
3217 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3218 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3219 signmask);
3220 }
3221
3222 return pattern_stmt;
3223 }
3224
3225 if (prec > HOST_BITS_PER_WIDE_INT
3226 || integer_zerop (oprnd1))
3227 return NULL;
3228
3229 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3230 return NULL;
3231
3232 if (TYPE_UNSIGNED (itype))
3233 {
3234 unsigned HOST_WIDE_INT mh, ml;
3235 int pre_shift, post_shift;
3236 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3237 & GET_MODE_MASK (itype_mode));
3238 tree t1, t2, t3, t4;
3239
3240 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3241 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3242 return NULL;
3243
3244 /* Find a suitable multiplier and right shift count
3245 instead of multiplying with D. */
3246 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3247
3248 /* If the suggested multiplier is more than SIZE bits, we can do better
3249 for even divisors, using an initial right shift. */
3250 if (mh != 0 && (d & 1) == 0)
3251 {
3252 pre_shift = ctz_or_zero (d);
3253 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3254 &ml, &post_shift, &dummy_int);
3255 gcc_assert (!mh);
3256 }
3257 else
3258 pre_shift = 0;
3259
3260 if (mh != 0)
3261 {
3262 if (post_shift - 1 >= prec)
3263 return NULL;
3264
3265 /* t1 = oprnd0 h* ml;
3266 t2 = oprnd0 - t1;
3267 t3 = t2 >> 1;
3268 t4 = t1 + t3;
3269 q = t4 >> (post_shift - 1); */
3270 t1 = vect_recog_temp_ssa_var (itype, NULL);
3271 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3272 build_int_cst (itype, ml));
3273 append_pattern_def_seq (stmt_vinfo, def_stmt);
3274
3275 t2 = vect_recog_temp_ssa_var (itype, NULL);
3276 def_stmt
3277 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3278 append_pattern_def_seq (stmt_vinfo, def_stmt);
3279
3280 t3 = vect_recog_temp_ssa_var (itype, NULL);
3281 def_stmt
3282 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3283 append_pattern_def_seq (stmt_vinfo, def_stmt);
3284
3285 t4 = vect_recog_temp_ssa_var (itype, NULL);
3286 def_stmt
3287 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3288
3289 if (post_shift != 1)
3290 {
3291 append_pattern_def_seq (stmt_vinfo, def_stmt);
3292
3293 q = vect_recog_temp_ssa_var (itype, NULL);
3294 pattern_stmt
3295 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3296 build_int_cst (itype, post_shift - 1));
3297 }
3298 else
3299 {
3300 q = t4;
3301 pattern_stmt = def_stmt;
3302 }
3303 }
3304 else
3305 {
3306 if (pre_shift >= prec || post_shift >= prec)
3307 return NULL;
3308
3309 /* t1 = oprnd0 >> pre_shift;
3310 t2 = t1 h* ml;
3311 q = t2 >> post_shift; */
3312 if (pre_shift)
3313 {
3314 t1 = vect_recog_temp_ssa_var (itype, NULL);
3315 def_stmt
3316 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3317 build_int_cst (NULL, pre_shift));
3318 append_pattern_def_seq (stmt_vinfo, def_stmt);
3319 }
3320 else
3321 t1 = oprnd0;
3322
3323 t2 = vect_recog_temp_ssa_var (itype, NULL);
3324 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3325 build_int_cst (itype, ml));
3326
3327 if (post_shift)
3328 {
3329 append_pattern_def_seq (stmt_vinfo, def_stmt);
3330
3331 q = vect_recog_temp_ssa_var (itype, NULL);
3332 def_stmt
3333 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3334 build_int_cst (itype, post_shift));
3335 }
3336 else
3337 q = t2;
3338
3339 pattern_stmt = def_stmt;
3340 }
3341 }
3342 else
3343 {
3344 unsigned HOST_WIDE_INT ml;
3345 int post_shift;
3346 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3347 unsigned HOST_WIDE_INT abs_d;
3348 bool add = false;
3349 tree t1, t2, t3, t4;
3350
3351 /* Give up for -1. */
3352 if (d == -1)
3353 return NULL;
3354
3355 /* Since d might be INT_MIN, we have to cast to
3356 unsigned HOST_WIDE_INT before negating to avoid
3357 undefined signed overflow. */
3358 abs_d = (d >= 0
3359 ? (unsigned HOST_WIDE_INT) d
3360 : - (unsigned HOST_WIDE_INT) d);
3361
3362 /* n rem d = n rem -d */
3363 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3364 {
3365 d = abs_d;
3366 oprnd1 = build_int_cst (itype, abs_d);
3367 }
3368 if (HOST_BITS_PER_WIDE_INT >= prec
3369 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3370 /* This case is not handled correctly below. */
3371 return NULL;
3372
3373 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3374 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3375 {
3376 add = true;
3377 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3378 }
3379 if (post_shift >= prec)
3380 return NULL;
3381
3382 /* t1 = oprnd0 h* ml; */
3383 t1 = vect_recog_temp_ssa_var (itype, NULL);
3384 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3385 build_int_cst (itype, ml));
3386
3387 if (add)
3388 {
3389 /* t2 = t1 + oprnd0; */
3390 append_pattern_def_seq (stmt_vinfo, def_stmt);
3391 t2 = vect_recog_temp_ssa_var (itype, NULL);
3392 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3393 }
3394 else
3395 t2 = t1;
3396
3397 if (post_shift)
3398 {
3399 /* t3 = t2 >> post_shift; */
3400 append_pattern_def_seq (stmt_vinfo, def_stmt);
3401 t3 = vect_recog_temp_ssa_var (itype, NULL);
3402 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3403 build_int_cst (itype, post_shift));
3404 }
3405 else
3406 t3 = t2;
3407
3408 wide_int oprnd0_min, oprnd0_max;
3409 int msb = 1;
3410 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3411 {
3412 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3413 msb = 0;
3414 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3415 msb = -1;
3416 }
3417
3418 if (msb == 0 && d >= 0)
3419 {
3420 /* q = t3; */
3421 q = t3;
3422 pattern_stmt = def_stmt;
3423 }
3424 else
3425 {
3426 /* t4 = oprnd0 >> (prec - 1);
3427 or if we know from VRP that oprnd0 >= 0
3428 t4 = 0;
3429 or if we know from VRP that oprnd0 < 0
3430 t4 = -1; */
3431 append_pattern_def_seq (stmt_vinfo, def_stmt);
3432 t4 = vect_recog_temp_ssa_var (itype, NULL);
3433 if (msb != 1)
3434 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3435 build_int_cst (itype, msb));
3436 else
3437 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3438 build_int_cst (itype, prec - 1));
3439 append_pattern_def_seq (stmt_vinfo, def_stmt);
3440
3441 /* q = t3 - t4; or q = t4 - t3; */
3442 q = vect_recog_temp_ssa_var (itype, NULL);
3443 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3444 d < 0 ? t3 : t4);
3445 }
3446 }
3447
3448 if (rhs_code == TRUNC_MOD_EXPR)
3449 {
3450 tree r, t1;
3451
3452 /* We divided. Now finish by:
3453 t1 = q * oprnd1;
3454 r = oprnd0 - t1; */
3455 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3456
3457 t1 = vect_recog_temp_ssa_var (itype, NULL);
3458 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3459 append_pattern_def_seq (stmt_vinfo, def_stmt);
3460
3461 r = vect_recog_temp_ssa_var (itype, NULL);
3462 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3463 }
3464
3465 /* Pattern detected. */
3466 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3467
3468 *type_out = vectype;
3469 return pattern_stmt;
3470 }
3471
3472 /* Function vect_recog_mixed_size_cond_pattern
3473
3474 Try to find the following pattern:
3475
3476 type x_t, y_t;
3477 TYPE a_T, b_T, c_T;
3478 loop:
3479 S1 a_T = x_t CMP y_t ? b_T : c_T;
3480
3481 where type 'TYPE' is an integral type which has different size
3482 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3483 than 'type', the constants need to fit into an integer type
3484 with the same width as 'type') or results of conversion from 'type'.
3485
3486 Input:
3487
3488 * STMT_VINFO: The stmt from which the pattern search begins.
3489
3490 Output:
3491
3492 * TYPE_OUT: The type of the output of this pattern.
3493
3494 * Return value: A new stmt that will be used to replace the pattern.
3495 Additionally a def_stmt is added.
3496
3497 a_it = x_t CMP y_t ? b_it : c_it;
3498 a_T = (TYPE) a_it; */
3499
3500 static gimple *
vect_recog_mixed_size_cond_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3501 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3502 {
3503 vec_info *vinfo = stmt_vinfo->vinfo;
3504 gimple *last_stmt = stmt_vinfo->stmt;
3505 tree cond_expr, then_clause, else_clause;
3506 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3507 gimple *pattern_stmt, *def_stmt;
3508 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3509 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3510 bool promotion;
3511 tree comp_scalar_type;
3512
3513 if (!is_gimple_assign (last_stmt)
3514 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3515 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3516 return NULL;
3517
3518 cond_expr = gimple_assign_rhs1 (last_stmt);
3519 then_clause = gimple_assign_rhs2 (last_stmt);
3520 else_clause = gimple_assign_rhs3 (last_stmt);
3521
3522 if (!COMPARISON_CLASS_P (cond_expr))
3523 return NULL;
3524
3525 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3526 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3527 if (comp_vectype == NULL_TREE)
3528 return NULL;
3529
3530 type = gimple_expr_type (last_stmt);
3531 if (types_compatible_p (type, comp_scalar_type)
3532 || ((TREE_CODE (then_clause) != INTEGER_CST
3533 || TREE_CODE (else_clause) != INTEGER_CST)
3534 && !INTEGRAL_TYPE_P (comp_scalar_type))
3535 || !INTEGRAL_TYPE_P (type))
3536 return NULL;
3537
3538 if ((TREE_CODE (then_clause) != INTEGER_CST
3539 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3540 &def_stmt0, &promotion))
3541 || (TREE_CODE (else_clause) != INTEGER_CST
3542 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3543 &def_stmt1, &promotion)))
3544 return NULL;
3545
3546 if (orig_type0 && orig_type1
3547 && !types_compatible_p (orig_type0, orig_type1))
3548 return NULL;
3549
3550 if (orig_type0)
3551 {
3552 if (!types_compatible_p (orig_type0, comp_scalar_type))
3553 return NULL;
3554 then_clause = gimple_assign_rhs1 (def_stmt0);
3555 itype = orig_type0;
3556 }
3557
3558 if (orig_type1)
3559 {
3560 if (!types_compatible_p (orig_type1, comp_scalar_type))
3561 return NULL;
3562 else_clause = gimple_assign_rhs1 (def_stmt1);
3563 itype = orig_type1;
3564 }
3565
3566
3567 HOST_WIDE_INT cmp_mode_size
3568 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3569
3570 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3571 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3572 return NULL;
3573
3574 vectype = get_vectype_for_scalar_type (vinfo, type);
3575 if (vectype == NULL_TREE)
3576 return NULL;
3577
3578 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3579 return NULL;
3580
3581 if (itype == NULL_TREE)
3582 itype = build_nonstandard_integer_type (cmp_mode_size,
3583 TYPE_UNSIGNED (type));
3584
3585 if (itype == NULL_TREE
3586 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3587 return NULL;
3588
3589 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3590 if (vecitype == NULL_TREE)
3591 return NULL;
3592
3593 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3594 return NULL;
3595
3596 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3597 {
3598 if ((TREE_CODE (then_clause) == INTEGER_CST
3599 && !int_fits_type_p (then_clause, itype))
3600 || (TREE_CODE (else_clause) == INTEGER_CST
3601 && !int_fits_type_p (else_clause, itype)))
3602 return NULL;
3603 }
3604
3605 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3606 COND_EXPR, unshare_expr (cond_expr),
3607 fold_convert (itype, then_clause),
3608 fold_convert (itype, else_clause));
3609 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3610 NOP_EXPR, gimple_assign_lhs (def_stmt));
3611
3612 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3613 *type_out = vectype;
3614
3615 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3616
3617 return pattern_stmt;
3618 }
3619
3620
3621 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3622 true if bool VAR can and should be optimized that way. Assume it shouldn't
3623 in case it's a result of a comparison which can be directly vectorized into
3624 a vector comparison. Fills in STMTS with all stmts visited during the
3625 walk. */
3626
3627 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3628 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3629 {
3630 tree rhs1;
3631 enum tree_code rhs_code;
3632
3633 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3634 if (!def_stmt_info)
3635 return false;
3636
3637 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3638 if (!def_stmt)
3639 return false;
3640
3641 if (stmts.contains (def_stmt))
3642 return true;
3643
3644 rhs1 = gimple_assign_rhs1 (def_stmt);
3645 rhs_code = gimple_assign_rhs_code (def_stmt);
3646 switch (rhs_code)
3647 {
3648 case SSA_NAME:
3649 if (! check_bool_pattern (rhs1, vinfo, stmts))
3650 return false;
3651 break;
3652
3653 CASE_CONVERT:
3654 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3655 return false;
3656 if (! check_bool_pattern (rhs1, vinfo, stmts))
3657 return false;
3658 break;
3659
3660 case BIT_NOT_EXPR:
3661 if (! check_bool_pattern (rhs1, vinfo, stmts))
3662 return false;
3663 break;
3664
3665 case BIT_AND_EXPR:
3666 case BIT_IOR_EXPR:
3667 case BIT_XOR_EXPR:
3668 if (! check_bool_pattern (rhs1, vinfo, stmts)
3669 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3670 return false;
3671 break;
3672
3673 default:
3674 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3675 {
3676 tree vecitype, comp_vectype;
3677
3678 /* If the comparison can throw, then is_gimple_condexpr will be
3679 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3680 if (stmt_could_throw_p (cfun, def_stmt))
3681 return false;
3682
3683 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3684 if (comp_vectype == NULL_TREE)
3685 return false;
3686
3687 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3688 TREE_TYPE (rhs1));
3689 if (mask_type
3690 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3691 return false;
3692
3693 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3694 {
3695 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3696 tree itype
3697 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3698 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3699 if (vecitype == NULL_TREE)
3700 return false;
3701 }
3702 else
3703 vecitype = comp_vectype;
3704 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3705 return false;
3706 }
3707 else
3708 return false;
3709 break;
3710 }
3711
3712 bool res = stmts.add (def_stmt);
3713 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3714 gcc_assert (!res);
3715
3716 return true;
3717 }
3718
3719
3720 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3721 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3722 pattern sequence. */
3723
3724 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3725 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3726 {
3727 vec_info *vinfo = stmt_info->vinfo;
3728 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3729 NOP_EXPR, var);
3730 append_pattern_def_seq (stmt_info, cast_stmt,
3731 get_vectype_for_scalar_type (vinfo, type));
3732 return gimple_assign_lhs (cast_stmt);
3733 }
3734
3735 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3736 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3737 type, OUT_TYPE is the desired final integer type of the whole pattern.
3738 STMT_INFO is the info of the pattern root and is where pattern stmts should
3739 be associated with. DEFS is a map of pattern defs. */
3740
3741 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3742 adjust_bool_pattern (tree var, tree out_type,
3743 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3744 {
3745 vec_info *vinfo = stmt_info->vinfo;
3746 gimple *stmt = SSA_NAME_DEF_STMT (var);
3747 enum tree_code rhs_code, def_rhs_code;
3748 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3749 location_t loc;
3750 gimple *pattern_stmt, *def_stmt;
3751 tree trueval = NULL_TREE;
3752
3753 rhs1 = gimple_assign_rhs1 (stmt);
3754 rhs2 = gimple_assign_rhs2 (stmt);
3755 rhs_code = gimple_assign_rhs_code (stmt);
3756 loc = gimple_location (stmt);
3757 switch (rhs_code)
3758 {
3759 case SSA_NAME:
3760 CASE_CONVERT:
3761 irhs1 = *defs.get (rhs1);
3762 itype = TREE_TYPE (irhs1);
3763 pattern_stmt
3764 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3765 SSA_NAME, irhs1);
3766 break;
3767
3768 case BIT_NOT_EXPR:
3769 irhs1 = *defs.get (rhs1);
3770 itype = TREE_TYPE (irhs1);
3771 pattern_stmt
3772 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3773 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3774 break;
3775
3776 case BIT_AND_EXPR:
3777 /* Try to optimize x = y & (a < b ? 1 : 0); into
3778 x = (a < b ? y : 0);
3779
3780 E.g. for:
3781 bool a_b, b_b, c_b;
3782 TYPE d_T;
3783
3784 S1 a_b = x1 CMP1 y1;
3785 S2 b_b = x2 CMP2 y2;
3786 S3 c_b = a_b & b_b;
3787 S4 d_T = (TYPE) c_b;
3788
3789 we would normally emit:
3790
3791 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3792 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3793 S3' c_T = a_T & b_T;
3794 S4' d_T = c_T;
3795
3796 but we can save one stmt by using the
3797 result of one of the COND_EXPRs in the other COND_EXPR and leave
3798 BIT_AND_EXPR stmt out:
3799
3800 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3801 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3802 S4' f_T = c_T;
3803
3804 At least when VEC_COND_EXPR is implemented using masks
3805 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3806 computes the comparison masks and ands it, in one case with
3807 all ones vector, in the other case with a vector register.
3808 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3809 often more expensive. */
3810 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3811 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3812 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3813 {
3814 irhs1 = *defs.get (rhs1);
3815 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3816 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3817 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3818 {
3819 rhs_code = def_rhs_code;
3820 rhs1 = def_rhs1;
3821 rhs2 = gimple_assign_rhs2 (def_stmt);
3822 trueval = irhs1;
3823 goto do_compare;
3824 }
3825 else
3826 irhs2 = *defs.get (rhs2);
3827 goto and_ior_xor;
3828 }
3829 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3830 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3831 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3832 {
3833 irhs2 = *defs.get (rhs2);
3834 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3835 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3836 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3837 {
3838 rhs_code = def_rhs_code;
3839 rhs1 = def_rhs1;
3840 rhs2 = gimple_assign_rhs2 (def_stmt);
3841 trueval = irhs2;
3842 goto do_compare;
3843 }
3844 else
3845 irhs1 = *defs.get (rhs1);
3846 goto and_ior_xor;
3847 }
3848 /* FALLTHRU */
3849 case BIT_IOR_EXPR:
3850 case BIT_XOR_EXPR:
3851 irhs1 = *defs.get (rhs1);
3852 irhs2 = *defs.get (rhs2);
3853 and_ior_xor:
3854 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3855 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3856 {
3857 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3858 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3859 int out_prec = TYPE_PRECISION (out_type);
3860 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3861 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3862 stmt_info);
3863 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3864 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3865 stmt_info);
3866 else
3867 {
3868 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3869 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3870 }
3871 }
3872 itype = TREE_TYPE (irhs1);
3873 pattern_stmt
3874 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3875 rhs_code, irhs1, irhs2);
3876 break;
3877
3878 default:
3879 do_compare:
3880 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3881 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3882 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3883 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3884 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3885 {
3886 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3887 itype
3888 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3889 }
3890 else
3891 itype = TREE_TYPE (rhs1);
3892 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3893 if (trueval == NULL_TREE)
3894 trueval = build_int_cst (itype, 1);
3895 else
3896 gcc_checking_assert (useless_type_conversion_p (itype,
3897 TREE_TYPE (trueval)));
3898 pattern_stmt
3899 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3900 COND_EXPR, cond_expr, trueval,
3901 build_int_cst (itype, 0));
3902 break;
3903 }
3904
3905 gimple_set_location (pattern_stmt, loc);
3906 append_pattern_def_seq (stmt_info, pattern_stmt,
3907 get_vectype_for_scalar_type (vinfo, itype));
3908 defs.put (var, gimple_assign_lhs (pattern_stmt));
3909 }
3910
3911 /* Comparison function to qsort a vector of gimple stmts after UID. */
3912
3913 static int
sort_after_uid(const void * p1,const void * p2)3914 sort_after_uid (const void *p1, const void *p2)
3915 {
3916 const gimple *stmt1 = *(const gimple * const *)p1;
3917 const gimple *stmt2 = *(const gimple * const *)p2;
3918 return gimple_uid (stmt1) - gimple_uid (stmt2);
3919 }
3920
3921 /* Create pattern stmts for all stmts participating in the bool pattern
3922 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3923 OUT_TYPE. Return the def of the pattern root. */
3924
3925 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3926 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3927 tree out_type, stmt_vec_info stmt_info)
3928 {
3929 /* Gather original stmts in the bool pattern in their order of appearance
3930 in the IL. */
3931 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3932 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3933 i != bool_stmt_set.end (); ++i)
3934 bool_stmts.quick_push (*i);
3935 bool_stmts.qsort (sort_after_uid);
3936
3937 /* Now process them in that order, producing pattern stmts. */
3938 hash_map <tree, tree> defs;
3939 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3940 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3941 out_type, stmt_info, defs);
3942
3943 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3944 gimple *pattern_stmt
3945 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3946 return gimple_assign_lhs (pattern_stmt);
3947 }
3948
3949 /* Return the proper type for converting bool VAR into
3950 an integer value or NULL_TREE if no such type exists.
3951 The type is chosen so that the converted value has the
3952 same number of elements as VAR's vector type. */
3953
3954 static tree
integer_type_for_mask(tree var,vec_info * vinfo)3955 integer_type_for_mask (tree var, vec_info *vinfo)
3956 {
3957 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3958 return NULL_TREE;
3959
3960 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3961 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3962 return NULL_TREE;
3963
3964 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3965 }
3966
3967 /* Function vect_recog_bool_pattern
3968
3969 Try to find pattern like following:
3970
3971 bool a_b, b_b, c_b, d_b, e_b;
3972 TYPE f_T;
3973 loop:
3974 S1 a_b = x1 CMP1 y1;
3975 S2 b_b = x2 CMP2 y2;
3976 S3 c_b = a_b & b_b;
3977 S4 d_b = x3 CMP3 y3;
3978 S5 e_b = c_b | d_b;
3979 S6 f_T = (TYPE) e_b;
3980
3981 where type 'TYPE' is an integral type. Or a similar pattern
3982 ending in
3983
3984 S6 f_Y = e_b ? r_Y : s_Y;
3985
3986 as results from if-conversion of a complex condition.
3987
3988 Input:
3989
3990 * STMT_VINFO: The stmt at the end from which the pattern
3991 search begins, i.e. cast of a bool to
3992 an integer type.
3993
3994 Output:
3995
3996 * TYPE_OUT: The type of the output of this pattern.
3997
3998 * Return value: A new stmt that will be used to replace the pattern.
3999
4000 Assuming size of TYPE is the same as size of all comparisons
4001 (otherwise some casts would be added where needed), the above
4002 sequence we create related pattern stmts:
4003 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4004 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4005 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4006 S5' e_T = c_T | d_T;
4007 S6' f_T = e_T;
4008
4009 Instead of the above S3' we could emit:
4010 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4011 S3' c_T = a_T | b_T;
4012 but the above is more efficient. */
4013
4014 static gimple *
vect_recog_bool_pattern(stmt_vec_info stmt_vinfo,tree * type_out)4015 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4016 {
4017 gimple *last_stmt = stmt_vinfo->stmt;
4018 enum tree_code rhs_code;
4019 tree var, lhs, rhs, vectype;
4020 vec_info *vinfo = stmt_vinfo->vinfo;
4021 gimple *pattern_stmt;
4022
4023 if (!is_gimple_assign (last_stmt))
4024 return NULL;
4025
4026 var = gimple_assign_rhs1 (last_stmt);
4027 lhs = gimple_assign_lhs (last_stmt);
4028 rhs_code = gimple_assign_rhs_code (last_stmt);
4029
4030 if (rhs_code == VIEW_CONVERT_EXPR)
4031 var = TREE_OPERAND (var, 0);
4032
4033 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4034 return NULL;
4035
4036 hash_set<gimple *> bool_stmts;
4037
4038 if (CONVERT_EXPR_CODE_P (rhs_code)
4039 || rhs_code == VIEW_CONVERT_EXPR)
4040 {
4041 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4042 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4043 return NULL;
4044 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4045 if (vectype == NULL_TREE)
4046 return NULL;
4047
4048 if (check_bool_pattern (var, vinfo, bool_stmts))
4049 {
4050 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
4051 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4052 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4053 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4054 else
4055 pattern_stmt
4056 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4057 }
4058 else
4059 {
4060 tree type = integer_type_for_mask (var, vinfo);
4061 tree cst0, cst1, tmp;
4062
4063 if (!type)
4064 return NULL;
4065
4066 /* We may directly use cond with narrowed type to avoid
4067 multiple cond exprs with following result packing and
4068 perform single cond with packed mask instead. In case
4069 of widening we better make cond first and then extract
4070 results. */
4071 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4072 type = TREE_TYPE (lhs);
4073
4074 cst0 = build_int_cst (type, 0);
4075 cst1 = build_int_cst (type, 1);
4076 tmp = vect_recog_temp_ssa_var (type, NULL);
4077 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4078
4079 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4080 {
4081 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4082 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4083
4084 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4085 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4086 }
4087 }
4088
4089 *type_out = vectype;
4090 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4091
4092 return pattern_stmt;
4093 }
4094 else if (rhs_code == COND_EXPR
4095 && TREE_CODE (var) == SSA_NAME)
4096 {
4097 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4098 if (vectype == NULL_TREE)
4099 return NULL;
4100
4101 /* Build a scalar type for the boolean result that when
4102 vectorized matches the vector type of the result in
4103 size and number of elements. */
4104 unsigned prec
4105 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4106 TYPE_VECTOR_SUBPARTS (vectype));
4107
4108 tree type
4109 = build_nonstandard_integer_type (prec,
4110 TYPE_UNSIGNED (TREE_TYPE (var)));
4111 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4112 return NULL;
4113
4114 if (!check_bool_pattern (var, vinfo, bool_stmts))
4115 return NULL;
4116
4117 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
4118
4119 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4120 pattern_stmt
4121 = gimple_build_assign (lhs, COND_EXPR,
4122 build2 (NE_EXPR, boolean_type_node,
4123 rhs, build_int_cst (type, 0)),
4124 gimple_assign_rhs2 (last_stmt),
4125 gimple_assign_rhs3 (last_stmt));
4126 *type_out = vectype;
4127 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4128
4129 return pattern_stmt;
4130 }
4131 else if (rhs_code == SSA_NAME
4132 && STMT_VINFO_DATA_REF (stmt_vinfo))
4133 {
4134 stmt_vec_info pattern_stmt_info;
4135 tree nunits_vectype;
4136 if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
4137 &nunits_vectype)
4138 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4139 return NULL;
4140
4141 if (check_bool_pattern (var, vinfo, bool_stmts))
4142 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
4143 else
4144 {
4145 tree type = integer_type_for_mask (var, vinfo);
4146 tree cst0, cst1, new_vectype;
4147
4148 if (!type)
4149 return NULL;
4150
4151 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4152 type = TREE_TYPE (vectype);
4153
4154 cst0 = build_int_cst (type, 0);
4155 cst1 = build_int_cst (type, 1);
4156 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4157
4158 rhs = vect_recog_temp_ssa_var (type, NULL);
4159 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4160 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
4161 }
4162
4163 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4164 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4165 {
4166 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4167 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4168 append_pattern_def_seq (stmt_vinfo, cast_stmt);
4169 rhs = rhs2;
4170 }
4171 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4172 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4173 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4174 *type_out = vectype;
4175 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4176
4177 return pattern_stmt;
4178 }
4179 else
4180 return NULL;
4181 }
4182
4183
4184 /* A helper for vect_recog_mask_conversion_pattern. Build
4185 conversion of MASK to a type suitable for masking VECTYPE.
4186 Built statement gets required vectype and is appended to
4187 a pattern sequence of STMT_VINFO.
4188
4189 Return converted mask. */
4190
4191 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo)4192 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4193 {
4194 gimple *stmt;
4195 tree masktype, tmp;
4196
4197 masktype = truth_type_for (vectype);
4198 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4199 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4200 append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
4201
4202 return tmp;
4203 }
4204
4205
4206 /* Function vect_recog_mask_conversion_pattern
4207
4208 Try to find statements which require boolean type
4209 converison. Additional conversion statements are
4210 added to handle such cases. For example:
4211
4212 bool m_1, m_2, m_3;
4213 int i_4, i_5;
4214 double d_6, d_7;
4215 char c_1, c_2, c_3;
4216
4217 S1 m_1 = i_4 > i_5;
4218 S2 m_2 = d_6 < d_7;
4219 S3 m_3 = m_1 & m_2;
4220 S4 c_1 = m_3 ? c_2 : c_3;
4221
4222 Will be transformed into:
4223
4224 S1 m_1 = i_4 > i_5;
4225 S2 m_2 = d_6 < d_7;
4226 S3'' m_2' = (_Bool[bitsize=32])m_2
4227 S3' m_3' = m_1 & m_2';
4228 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4229 S4' c_1' = m_3'' ? c_2 : c_3; */
4230
4231 static gimple *
vect_recog_mask_conversion_pattern(stmt_vec_info stmt_vinfo,tree * type_out)4232 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4233 {
4234 gimple *last_stmt = stmt_vinfo->stmt;
4235 enum tree_code rhs_code;
4236 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4237 tree vectype1, vectype2;
4238 stmt_vec_info pattern_stmt_info;
4239 vec_info *vinfo = stmt_vinfo->vinfo;
4240
4241 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4242 if (is_gimple_call (last_stmt)
4243 && gimple_call_internal_p (last_stmt))
4244 {
4245 gcall *pattern_stmt;
4246
4247 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4248 int mask_argno = internal_fn_mask_index (ifn);
4249 if (mask_argno < 0)
4250 return NULL;
4251
4252 bool store_p = internal_store_fn_p (ifn);
4253 if (store_p)
4254 {
4255 int rhs_index = internal_fn_stored_value_index (ifn);
4256 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4257 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4258 }
4259 else
4260 {
4261 lhs = gimple_call_lhs (last_stmt);
4262 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4263 }
4264
4265 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4266 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4267 if (!mask_arg_type)
4268 return NULL;
4269 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4270
4271 if (!vectype1 || !vectype2
4272 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4273 TYPE_VECTOR_SUBPARTS (vectype2)))
4274 return NULL;
4275
4276 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4277
4278 auto_vec<tree, 8> args;
4279 unsigned int nargs = gimple_call_num_args (last_stmt);
4280 args.safe_grow (nargs);
4281 for (unsigned int i = 0; i < nargs; ++i)
4282 args[i] = ((int) i == mask_argno
4283 ? tmp
4284 : gimple_call_arg (last_stmt, i));
4285 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4286
4287 if (!store_p)
4288 {
4289 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4290 gimple_call_set_lhs (pattern_stmt, lhs);
4291 }
4292 gimple_call_set_nothrow (pattern_stmt, true);
4293
4294 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4295 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4296 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4297
4298 *type_out = vectype1;
4299 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4300
4301 return pattern_stmt;
4302 }
4303
4304 if (!is_gimple_assign (last_stmt))
4305 return NULL;
4306
4307 gimple *pattern_stmt;
4308 lhs = gimple_assign_lhs (last_stmt);
4309 rhs1 = gimple_assign_rhs1 (last_stmt);
4310 rhs_code = gimple_assign_rhs_code (last_stmt);
4311
4312 /* Check for cond expression requiring mask conversion. */
4313 if (rhs_code == COND_EXPR)
4314 {
4315 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4316
4317 if (TREE_CODE (rhs1) == SSA_NAME)
4318 {
4319 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4320 if (!rhs1_type)
4321 return NULL;
4322 }
4323 else if (COMPARISON_CLASS_P (rhs1))
4324 {
4325 /* Check whether we're comparing scalar booleans and (if so)
4326 whether a better mask type exists than the mask associated
4327 with boolean-sized elements. This avoids unnecessary packs
4328 and unpacks if the booleans are set from comparisons of
4329 wider types. E.g. in:
4330
4331 int x1, x2, x3, x4, y1, y1;
4332 ...
4333 bool b1 = (x1 == x2);
4334 bool b2 = (x3 == x4);
4335 ... = b1 == b2 ? y1 : y2;
4336
4337 it is better for b1 and b2 to use the mask type associated
4338 with int elements rather bool (byte) elements. */
4339 rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4340 if (!rhs1_type)
4341 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4342 }
4343 else
4344 return NULL;
4345
4346 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4347
4348 if (!vectype1 || !vectype2)
4349 return NULL;
4350
4351 /* Continue if a conversion is needed. Also continue if we have
4352 a comparison whose vector type would normally be different from
4353 VECTYPE2 when considered in isolation. In that case we'll
4354 replace the comparison with an SSA name (so that we can record
4355 its vector type) and behave as though the comparison was an SSA
4356 name from the outset. */
4357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4358 TYPE_VECTOR_SUBPARTS (vectype2))
4359 && (TREE_CODE (rhs1) == SSA_NAME
4360 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4361 return NULL;
4362
4363 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4364 in place, we can handle it in vectorizable_condition. This avoids
4365 unnecessary promotion stmts and increased vectorization factor. */
4366 if (COMPARISON_CLASS_P (rhs1)
4367 && INTEGRAL_TYPE_P (rhs1_type)
4368 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4369 TYPE_VECTOR_SUBPARTS (vectype2)))
4370 {
4371 enum vect_def_type dt;
4372 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4373 && dt == vect_external_def
4374 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4375 && (dt == vect_external_def
4376 || dt == vect_constant_def))
4377 {
4378 tree wide_scalar_type = build_nonstandard_integer_type
4379 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4380 TYPE_UNSIGNED (rhs1_type));
4381 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4382 wide_scalar_type);
4383 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4384 return NULL;
4385 }
4386 }
4387
4388 /* If rhs1 is a comparison we need to move it into a
4389 separate statement. */
4390 if (TREE_CODE (rhs1) != SSA_NAME)
4391 {
4392 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4393 pattern_stmt = gimple_build_assign (tmp, rhs1);
4394 rhs1 = tmp;
4395 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
4396 rhs1_type);
4397 }
4398
4399 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4400 TYPE_VECTOR_SUBPARTS (vectype2)))
4401 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4402 else
4403 tmp = rhs1;
4404
4405 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4406 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4407 gimple_assign_rhs2 (last_stmt),
4408 gimple_assign_rhs3 (last_stmt));
4409
4410 *type_out = vectype1;
4411 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4412
4413 return pattern_stmt;
4414 }
4415
4416 /* Now check for binary boolean operations requiring conversion for
4417 one of operands. */
4418 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4419 return NULL;
4420
4421 if (rhs_code != BIT_IOR_EXPR
4422 && rhs_code != BIT_XOR_EXPR
4423 && rhs_code != BIT_AND_EXPR
4424 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4425 return NULL;
4426
4427 rhs2 = gimple_assign_rhs2 (last_stmt);
4428
4429 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4430 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4431
4432 if (!rhs1_type || !rhs2_type
4433 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4434 return NULL;
4435
4436 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4437 {
4438 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4439 if (!vectype1)
4440 return NULL;
4441 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4442 }
4443 else
4444 {
4445 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4446 if (!vectype1)
4447 return NULL;
4448 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4449 }
4450
4451 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4452 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4453
4454 *type_out = vectype1;
4455 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4456
4457 return pattern_stmt;
4458 }
4459
4460 /* STMT_INFO is a load or store. If the load or store is conditional, return
4461 the boolean condition under which it occurs, otherwise return null. */
4462
4463 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4464 vect_get_load_store_mask (stmt_vec_info stmt_info)
4465 {
4466 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4467 {
4468 gcc_assert (gimple_assign_single_p (def_assign));
4469 return NULL_TREE;
4470 }
4471
4472 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4473 {
4474 internal_fn ifn = gimple_call_internal_fn (def_call);
4475 int mask_index = internal_fn_mask_index (ifn);
4476 return gimple_call_arg (def_call, mask_index);
4477 }
4478
4479 gcc_unreachable ();
4480 }
4481
4482 /* Return MASK if MASK is suitable for masking an operation on vectors
4483 of type VECTYPE, otherwise convert it into such a form and return
4484 the result. Associate any conversion statements with STMT_INFO's
4485 pattern. */
4486
4487 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4488 vect_convert_mask_for_vectype (tree mask, tree vectype,
4489 stmt_vec_info stmt_info, vec_info *vinfo)
4490 {
4491 tree mask_type = integer_type_for_mask (mask, vinfo);
4492 if (mask_type)
4493 {
4494 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4495 if (mask_vectype
4496 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4497 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4498 mask = build_mask_conversion (mask, vectype, stmt_info);
4499 }
4500 return mask;
4501 }
4502
4503 /* Return the equivalent of:
4504
4505 fold_convert (TYPE, VALUE)
4506
4507 with the expectation that the operation will be vectorized.
4508 If new statements are needed, add them as pattern statements
4509 to STMT_INFO. */
4510
4511 static tree
vect_add_conversion_to_pattern(tree type,tree value,stmt_vec_info stmt_info)4512 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4513 {
4514 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4515 return value;
4516
4517 vec_info *vinfo = stmt_info->vinfo;
4518 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4519 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4520 append_pattern_def_seq (stmt_info, conversion,
4521 get_vectype_for_scalar_type (vinfo, type));
4522 return new_value;
4523 }
4524
4525 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4526 internal function. Return the final statement on success and set
4527 *TYPE_OUT to the vector type being loaded or stored.
4528
4529 This function only handles gathers and scatters that were recognized
4530 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4531
4532 static gimple *
vect_recog_gather_scatter_pattern(stmt_vec_info stmt_info,tree * type_out)4533 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4534 {
4535 /* Currently we only support this for loop vectorization. */
4536 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4537 if (!loop_vinfo)
4538 return NULL;
4539
4540 /* Make sure that we're looking at a gather load or scatter store. */
4541 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4542 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4543 return NULL;
4544
4545 /* Get the boolean that controls whether the load or store happens.
4546 This is null if the operation is unconditional. */
4547 tree mask = vect_get_load_store_mask (stmt_info);
4548
4549 /* Make sure that the target supports an appropriate internal
4550 function for the gather/scatter operation. */
4551 gather_scatter_info gs_info;
4552 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4553 || gs_info.decl)
4554 return NULL;
4555
4556 /* Convert the mask to the right form. */
4557 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4558 gs_info.element_type);
4559 if (mask)
4560 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4561 loop_vinfo);
4562
4563 /* Get the invariant base and non-invariant offset, converting the
4564 latter to the same width as the vector elements. */
4565 tree base = gs_info.base;
4566 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4567 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4568 stmt_info);
4569
4570 /* Build the new pattern statement. */
4571 tree scale = size_int (gs_info.scale);
4572 gcall *pattern_stmt;
4573 if (DR_IS_READ (dr))
4574 {
4575 tree zero = build_zero_cst (gs_info.element_type);
4576 if (mask != NULL)
4577 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4578 offset, scale, zero, mask);
4579 else
4580 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4581 offset, scale, zero);
4582 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4583 gimple_call_set_lhs (pattern_stmt, load_lhs);
4584 }
4585 else
4586 {
4587 tree rhs = vect_get_store_rhs (stmt_info);
4588 if (mask != NULL)
4589 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4590 base, offset, scale, rhs,
4591 mask);
4592 else
4593 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4594 base, offset, scale, rhs);
4595 }
4596 gimple_call_set_nothrow (pattern_stmt, true);
4597
4598 /* Copy across relevant vectorization info and associate DR with the
4599 new pattern statement instead of the original statement. */
4600 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4601 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4602
4603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4604 *type_out = vectype;
4605 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4606
4607 return pattern_stmt;
4608 }
4609
4610 /* Return true if TYPE is a non-boolean integer type. These are the types
4611 that we want to consider for narrowing. */
4612
4613 static bool
vect_narrowable_type_p(tree type)4614 vect_narrowable_type_p (tree type)
4615 {
4616 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4617 }
4618
4619 /* Return true if the operation given by CODE can be truncated to N bits
4620 when only N bits of the output are needed. This is only true if bit N+1
4621 of the inputs has no effect on the low N bits of the result. */
4622
4623 static bool
vect_truncatable_operation_p(tree_code code)4624 vect_truncatable_operation_p (tree_code code)
4625 {
4626 switch (code)
4627 {
4628 case PLUS_EXPR:
4629 case MINUS_EXPR:
4630 case MULT_EXPR:
4631 case BIT_AND_EXPR:
4632 case BIT_IOR_EXPR:
4633 case BIT_XOR_EXPR:
4634 case COND_EXPR:
4635 return true;
4636
4637 default:
4638 return false;
4639 }
4640 }
4641
4642 /* Record that STMT_INFO could be changed from operating on TYPE to
4643 operating on a type with the precision and sign given by PRECISION
4644 and SIGN respectively. PRECISION is an arbitrary bit precision;
4645 it might not be a whole number of bytes. */
4646
4647 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4648 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4649 unsigned int precision, signop sign)
4650 {
4651 /* Round the precision up to a whole number of bytes. */
4652 precision = vect_element_precision (precision);
4653 if (precision < TYPE_PRECISION (type)
4654 && (!stmt_info->operation_precision
4655 || stmt_info->operation_precision > precision))
4656 {
4657 stmt_info->operation_precision = precision;
4658 stmt_info->operation_sign = sign;
4659 }
4660 }
4661
4662 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4663 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4664 is an arbitrary bit precision; it might not be a whole number of bytes. */
4665
4666 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4667 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4668 unsigned int min_input_precision)
4669 {
4670 /* This operation in isolation only requires the inputs to have
4671 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4672 that MIN_INPUT_PRECISION is a natural precision for the chain
4673 as a whole. E.g. consider something like:
4674
4675 unsigned short *x, *y;
4676 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4677
4678 The right shift can be done on unsigned chars, and only requires the
4679 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4680 approach would mean turning a natural chain of single-vector unsigned
4681 short operations into one that truncates "*x" and then extends
4682 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4683 operation and one vector for each unsigned char operation.
4684 This would be a significant pessimization.
4685
4686 Instead only propagate the maximum of this precision and the precision
4687 required by the users of the result. This means that we don't pessimize
4688 the case above but continue to optimize things like:
4689
4690 unsigned char *y;
4691 unsigned short *x;
4692 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4693
4694 Here we would truncate two vectors of *x to a single vector of
4695 unsigned chars and use single-vector unsigned char operations for
4696 everything else, rather than doing two unsigned short copies of
4697 "(*x & 0xf0) >> 4" and then truncating the result. */
4698 min_input_precision = MAX (min_input_precision,
4699 stmt_info->min_output_precision);
4700
4701 if (min_input_precision < TYPE_PRECISION (type)
4702 && (!stmt_info->min_input_precision
4703 || stmt_info->min_input_precision > min_input_precision))
4704 stmt_info->min_input_precision = min_input_precision;
4705 }
4706
4707 /* Subroutine of vect_determine_min_output_precision. Return true if
4708 we can calculate a reduced number of output bits for STMT_INFO,
4709 whose result is LHS. */
4710
4711 static bool
vect_determine_min_output_precision_1(stmt_vec_info stmt_info,tree lhs)4712 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4713 {
4714 /* Take the maximum precision required by users of the result. */
4715 vec_info *vinfo = stmt_info->vinfo;
4716 unsigned int precision = 0;
4717 imm_use_iterator iter;
4718 use_operand_p use;
4719 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4720 {
4721 gimple *use_stmt = USE_STMT (use);
4722 if (is_gimple_debug (use_stmt))
4723 continue;
4724 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4725 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4726 return false;
4727 /* The input precision recorded for COND_EXPRs applies only to the
4728 "then" and "else" values. */
4729 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4730 if (assign
4731 && gimple_assign_rhs_code (assign) == COND_EXPR
4732 && use->use != gimple_assign_rhs2_ptr (assign)
4733 && use->use != gimple_assign_rhs3_ptr (assign))
4734 return false;
4735 precision = MAX (precision, use_stmt_info->min_input_precision);
4736 }
4737
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_NOTE, vect_location,
4740 "only the low %d bits of %T are significant\n",
4741 precision, lhs);
4742 stmt_info->min_output_precision = precision;
4743 return true;
4744 }
4745
4746 /* Calculate min_output_precision for STMT_INFO. */
4747
4748 static void
vect_determine_min_output_precision(stmt_vec_info stmt_info)4749 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4750 {
4751 /* We're only interested in statements with a narrowable result. */
4752 tree lhs = gimple_get_lhs (stmt_info->stmt);
4753 if (!lhs
4754 || TREE_CODE (lhs) != SSA_NAME
4755 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4756 return;
4757
4758 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4759 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4760 }
4761
4762 /* Use range information to decide whether STMT (described by STMT_INFO)
4763 could be done in a narrower type. This is effectively a forward
4764 propagation, since it uses context-independent information that applies
4765 to all users of an SSA name. */
4766
4767 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4768 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4769 {
4770 tree lhs = gimple_assign_lhs (stmt);
4771 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4772 return;
4773
4774 tree type = TREE_TYPE (lhs);
4775 if (!vect_narrowable_type_p (type))
4776 return;
4777
4778 /* First see whether we have any useful range information for the result. */
4779 unsigned int precision = TYPE_PRECISION (type);
4780 signop sign = TYPE_SIGN (type);
4781 wide_int min_value, max_value;
4782 if (!vect_get_range_info (lhs, &min_value, &max_value))
4783 return;
4784
4785 tree_code code = gimple_assign_rhs_code (stmt);
4786 unsigned int nops = gimple_num_ops (stmt);
4787
4788 if (!vect_truncatable_operation_p (code))
4789 /* Check that all relevant input operands are compatible, and update
4790 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4791 for (unsigned int i = 1; i < nops; ++i)
4792 {
4793 tree op = gimple_op (stmt, i);
4794 if (TREE_CODE (op) == INTEGER_CST)
4795 {
4796 /* Don't require the integer to have RHS_TYPE (which it might
4797 not for things like shift amounts, etc.), but do require it
4798 to fit the type. */
4799 if (!int_fits_type_p (op, type))
4800 return;
4801
4802 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4803 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4804 }
4805 else if (TREE_CODE (op) == SSA_NAME)
4806 {
4807 /* Ignore codes that don't take uniform arguments. */
4808 if (!types_compatible_p (TREE_TYPE (op), type))
4809 return;
4810
4811 wide_int op_min_value, op_max_value;
4812 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4813 return;
4814
4815 min_value = wi::min (min_value, op_min_value, sign);
4816 max_value = wi::max (max_value, op_max_value, sign);
4817 }
4818 else
4819 return;
4820 }
4821
4822 /* Try to switch signed types for unsigned types if we can.
4823 This is better for two reasons. First, unsigned ops tend
4824 to be cheaper than signed ops. Second, it means that we can
4825 handle things like:
4826
4827 signed char c;
4828 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4829
4830 as:
4831
4832 signed char c;
4833 unsigned short res_1 = (unsigned short) c & 0xff00;
4834 int res = (int) res_1;
4835
4836 where the intermediate result res_1 has unsigned rather than
4837 signed type. */
4838 if (sign == SIGNED && !wi::neg_p (min_value))
4839 sign = UNSIGNED;
4840
4841 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4842 unsigned int precision1 = wi::min_precision (min_value, sign);
4843 unsigned int precision2 = wi::min_precision (max_value, sign);
4844 unsigned int value_precision = MAX (precision1, precision2);
4845 if (value_precision >= precision)
4846 return;
4847
4848 if (dump_enabled_p ())
4849 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4850 " without loss of precision: %G",
4851 sign == SIGNED ? "signed" : "unsigned",
4852 value_precision, stmt);
4853
4854 vect_set_operation_type (stmt_info, type, value_precision, sign);
4855 vect_set_min_input_precision (stmt_info, type, value_precision);
4856 }
4857
4858 /* Use information about the users of STMT's result to decide whether
4859 STMT (described by STMT_INFO) could be done in a narrower type.
4860 This is effectively a backward propagation. */
4861
4862 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4863 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4864 {
4865 tree_code code = gimple_assign_rhs_code (stmt);
4866 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4867 tree type = TREE_TYPE (gimple_op (stmt, opno));
4868 if (!vect_narrowable_type_p (type))
4869 return;
4870
4871 unsigned int precision = TYPE_PRECISION (type);
4872 unsigned int operation_precision, min_input_precision;
4873 switch (code)
4874 {
4875 CASE_CONVERT:
4876 /* Only the bits that contribute to the output matter. Don't change
4877 the precision of the operation itself. */
4878 operation_precision = precision;
4879 min_input_precision = stmt_info->min_output_precision;
4880 break;
4881
4882 case LSHIFT_EXPR:
4883 case RSHIFT_EXPR:
4884 {
4885 tree shift = gimple_assign_rhs2 (stmt);
4886 if (TREE_CODE (shift) != INTEGER_CST
4887 || !wi::ltu_p (wi::to_widest (shift), precision))
4888 return;
4889 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4890 if (code == LSHIFT_EXPR)
4891 {
4892 /* Avoid creating an undefined shift.
4893
4894 ??? We could instead use min_output_precision as-is and
4895 optimize out-of-range shifts to zero. However, only
4896 degenerate testcases shift away all their useful input data,
4897 and it isn't natural to drop input operations in the middle
4898 of vectorization. This sort of thing should really be
4899 handled before vectorization. */
4900 operation_precision = MAX (stmt_info->min_output_precision,
4901 const_shift + 1);
4902 /* We need CONST_SHIFT fewer bits of the input. */
4903 min_input_precision = (MAX (operation_precision, const_shift)
4904 - const_shift);
4905 }
4906 else
4907 {
4908 /* We need CONST_SHIFT extra bits to do the operation. */
4909 operation_precision = (stmt_info->min_output_precision
4910 + const_shift);
4911 min_input_precision = operation_precision;
4912 }
4913 break;
4914 }
4915
4916 default:
4917 if (vect_truncatable_operation_p (code))
4918 {
4919 /* Input bit N has no effect on output bits N-1 and lower. */
4920 operation_precision = stmt_info->min_output_precision;
4921 min_input_precision = operation_precision;
4922 break;
4923 }
4924 return;
4925 }
4926
4927 if (operation_precision < precision)
4928 {
4929 if (dump_enabled_p ())
4930 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4931 " without affecting users: %G",
4932 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4933 operation_precision, stmt);
4934 vect_set_operation_type (stmt_info, type, operation_precision,
4935 TYPE_SIGN (type));
4936 }
4937 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4938 }
4939
4940 /* Return true if the statement described by STMT_INFO sets a boolean
4941 SSA_NAME and if we know how to vectorize this kind of statement using
4942 vector mask types. */
4943
4944 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)4945 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4946 {
4947 tree lhs = gimple_get_lhs (stmt_info->stmt);
4948 if (!lhs
4949 || TREE_CODE (lhs) != SSA_NAME
4950 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4951 return false;
4952
4953 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4954 {
4955 tree_code rhs_code = gimple_assign_rhs_code (assign);
4956 switch (rhs_code)
4957 {
4958 CASE_CONVERT:
4959 case SSA_NAME:
4960 case BIT_NOT_EXPR:
4961 case BIT_IOR_EXPR:
4962 case BIT_XOR_EXPR:
4963 case BIT_AND_EXPR:
4964 return true;
4965
4966 default:
4967 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
4968 }
4969 }
4970 return false;
4971 }
4972
4973 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
4974 a vector mask type instead of a normal vector type. Record the
4975 result in STMT_INFO->mask_precision. */
4976
4977 static void
vect_determine_mask_precision(stmt_vec_info stmt_info)4978 vect_determine_mask_precision (stmt_vec_info stmt_info)
4979 {
4980 vec_info *vinfo = stmt_info->vinfo;
4981
4982 if (!possible_vector_mask_operation_p (stmt_info)
4983 || stmt_info->mask_precision)
4984 return;
4985
4986 auto_vec<stmt_vec_info, 32> worklist;
4987 worklist.quick_push (stmt_info);
4988 while (!worklist.is_empty ())
4989 {
4990 stmt_info = worklist.last ();
4991 unsigned int orig_length = worklist.length ();
4992
4993 /* If at least one boolean input uses a vector mask type,
4994 pick the mask type with the narrowest elements.
4995
4996 ??? This is the traditional behavior. It should always produce
4997 the smallest number of operations, but isn't necessarily the
4998 optimal choice. For example, if we have:
4999
5000 a = b & c
5001
5002 where:
5003
5004 - the user of a wants it to have a mask type for 16-bit elements (M16)
5005 - b also uses M16
5006 - c uses a mask type for 8-bit elements (M8)
5007
5008 then picking M8 gives:
5009
5010 - 1 M16->M8 pack for b
5011 - 1 M8 AND for a
5012 - 2 M8->M16 unpacks for the user of a
5013
5014 whereas picking M16 would have given:
5015
5016 - 2 M8->M16 unpacks for c
5017 - 2 M16 ANDs for a
5018
5019 The number of operations are equal, but M16 would have given
5020 a shorter dependency chain and allowed more ILP. */
5021 unsigned int precision = ~0U;
5022 gassign *assign = as_a <gassign *> (stmt_info->stmt);
5023 unsigned int nops = gimple_num_ops (assign);
5024 for (unsigned int i = 1; i < nops; ++i)
5025 {
5026 tree rhs = gimple_op (assign, i);
5027 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5028 continue;
5029
5030 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5031 if (!def_stmt_info)
5032 /* Don't let external or constant operands influence the choice.
5033 We can convert them to whichever vector type we pick. */
5034 continue;
5035
5036 if (def_stmt_info->mask_precision)
5037 {
5038 if (precision > def_stmt_info->mask_precision)
5039 precision = def_stmt_info->mask_precision;
5040 }
5041 else if (possible_vector_mask_operation_p (def_stmt_info))
5042 worklist.safe_push (def_stmt_info);
5043 }
5044
5045 /* Defer the choice if we need to visit operands first. */
5046 if (orig_length != worklist.length ())
5047 continue;
5048
5049 /* If the statement compares two values that shouldn't use vector masks,
5050 try comparing the values as normal scalars instead. */
5051 tree_code rhs_code = gimple_assign_rhs_code (assign);
5052 if (precision == ~0U
5053 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5054 {
5055 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5056 scalar_mode mode;
5057 tree vectype, mask_type;
5058 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5059 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5060 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5061 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5062 precision = GET_MODE_BITSIZE (mode);
5063 }
5064
5065 if (dump_enabled_p ())
5066 {
5067 if (precision == ~0U)
5068 dump_printf_loc (MSG_NOTE, vect_location,
5069 "using normal nonmask vectors for %G",
5070 stmt_info->stmt);
5071 else
5072 dump_printf_loc (MSG_NOTE, vect_location,
5073 "using boolean precision %d for %G",
5074 precision, stmt_info->stmt);
5075 }
5076
5077 stmt_info->mask_precision = precision;
5078 worklist.pop ();
5079 }
5080 }
5081
5082 /* Handle vect_determine_precisions for STMT_INFO, given that we
5083 have already done so for the users of its result. */
5084
5085 void
vect_determine_stmt_precisions(stmt_vec_info stmt_info)5086 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
5087 {
5088 vect_determine_min_output_precision (stmt_info);
5089 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5090 {
5091 vect_determine_precisions_from_range (stmt_info, stmt);
5092 vect_determine_precisions_from_users (stmt_info, stmt);
5093 }
5094 vect_determine_mask_precision (stmt_info);
5095 }
5096
5097 /* Walk backwards through the vectorizable region to determine the
5098 values of these fields:
5099
5100 - min_output_precision
5101 - min_input_precision
5102 - operation_precision
5103 - operation_sign. */
5104
5105 void
vect_determine_precisions(vec_info * vinfo)5106 vect_determine_precisions (vec_info *vinfo)
5107 {
5108 DUMP_VECT_SCOPE ("vect_determine_precisions");
5109
5110 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5111 {
5112 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5113 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5114 unsigned int nbbs = loop->num_nodes;
5115
5116 for (unsigned int i = 0; i < nbbs; i++)
5117 {
5118 basic_block bb = bbs[nbbs - i - 1];
5119 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5120 !gsi_end_p (si); gsi_prev (&si))
5121 vect_determine_stmt_precisions
5122 (vinfo->lookup_stmt (gsi_stmt (si)));
5123 }
5124 }
5125 else
5126 {
5127 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5128 gimple_stmt_iterator si = bb_vinfo->region_end;
5129 gimple *stmt;
5130 do
5131 {
5132 if (!gsi_stmt (si))
5133 si = gsi_last_bb (bb_vinfo->bb);
5134 else
5135 gsi_prev (&si);
5136 stmt = gsi_stmt (si);
5137 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
5138 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5139 vect_determine_stmt_precisions (stmt_info);
5140 }
5141 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5142 }
5143 }
5144
5145 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
5146
5147 struct vect_recog_func
5148 {
5149 vect_recog_func_ptr fn;
5150 const char *name;
5151 };
5152
5153 /* Note that ordering matters - the first pattern matching on a stmt is
5154 taken which means usually the more complex one needs to preceed the
5155 less comples onex (widen_sum only after dot_prod or sad for example). */
5156 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5157 { vect_recog_over_widening_pattern, "over_widening" },
5158 /* Must come after over_widening, which narrows the shift as much as
5159 possible beforehand. */
5160 { vect_recog_average_pattern, "average" },
5161 { vect_recog_mulhs_pattern, "mult_high" },
5162 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5163 { vect_recog_widen_mult_pattern, "widen_mult" },
5164 { vect_recog_dot_prod_pattern, "dot_prod" },
5165 { vect_recog_sad_pattern, "sad" },
5166 { vect_recog_widen_sum_pattern, "widen_sum" },
5167 { vect_recog_pow_pattern, "pow" },
5168 { vect_recog_widen_shift_pattern, "widen_shift" },
5169 { vect_recog_rotate_pattern, "rotate" },
5170 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5171 { vect_recog_divmod_pattern, "divmod" },
5172 { vect_recog_mult_pattern, "mult" },
5173 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5174 { vect_recog_bool_pattern, "bool" },
5175 /* This must come before mask conversion, and includes the parts
5176 of mask conversion that are needed for gather and scatter
5177 internal functions. */
5178 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5179 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5180 };
5181
5182 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5183
5184 /* Mark statements that are involved in a pattern. */
5185
5186 static inline void
vect_mark_pattern_stmts(stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5187 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5188 tree pattern_vectype)
5189 {
5190 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5191 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5192
5193 gimple *orig_pattern_stmt = NULL;
5194 if (is_pattern_stmt_p (orig_stmt_info))
5195 {
5196 /* We're replacing a statement in an existing pattern definition
5197 sequence. */
5198 orig_pattern_stmt = orig_stmt_info->stmt;
5199 if (dump_enabled_p ())
5200 dump_printf_loc (MSG_NOTE, vect_location,
5201 "replacing earlier pattern %G", orig_pattern_stmt);
5202
5203 /* To keep the book-keeping simple, just swap the lhs of the
5204 old and new statements, so that the old one has a valid but
5205 unused lhs. */
5206 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5207 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5208 gimple_set_lhs (pattern_stmt, old_lhs);
5209
5210 if (dump_enabled_p ())
5211 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5212
5213 /* Switch to the statement that ORIG replaces. */
5214 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5215
5216 /* We shouldn't be replacing the main pattern statement. */
5217 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5218 != orig_pattern_stmt);
5219 }
5220
5221 if (def_seq)
5222 for (gimple_stmt_iterator si = gsi_start (def_seq);
5223 !gsi_end_p (si); gsi_next (&si))
5224 {
5225 if (dump_enabled_p ())
5226 dump_printf_loc (MSG_NOTE, vect_location,
5227 "extra pattern stmt: %G", gsi_stmt (si));
5228 stmt_vec_info pattern_stmt_info
5229 = vect_init_pattern_stmt (gsi_stmt (si),
5230 orig_stmt_info, pattern_vectype);
5231 /* Stmts in the def sequence are not vectorizable cycle or
5232 induction defs, instead they should all be vect_internal_def
5233 feeding the main pattern stmt which retains this def type. */
5234 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5235 }
5236
5237 if (orig_pattern_stmt)
5238 {
5239 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5240
5241 /* Insert all the new pattern statements before the original one. */
5242 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5243 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5244 orig_def_seq);
5245 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5246 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5247
5248 /* Remove the pattern statement that this new pattern replaces. */
5249 gsi_remove (&gsi, false);
5250 }
5251 else
5252 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5253
5254 /* Transfer reduction path info to the pattern. */
5255 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5256 {
5257 vec_info *vinfo = orig_stmt_info_saved->vinfo;
5258 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5259 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5260 /* Search the pattern def sequence and the main pattern stmt. Note
5261 we may have inserted all into a containing pattern def sequence
5262 so the following is a bit awkward. */
5263 gimple_stmt_iterator si;
5264 gimple *s;
5265 if (def_seq)
5266 {
5267 si = gsi_start (def_seq);
5268 s = gsi_stmt (si);
5269 gsi_next (&si);
5270 }
5271 else
5272 {
5273 si = gsi_none ();
5274 s = pattern_stmt;
5275 }
5276 do
5277 {
5278 bool found = false;
5279 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5280 if (gimple_op (s, i) == lookfor)
5281 {
5282 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5283 lookfor = gimple_get_lhs (s);
5284 found = true;
5285 break;
5286 }
5287 if (s == pattern_stmt)
5288 {
5289 if (!found && dump_enabled_p ())
5290 dump_printf_loc (MSG_NOTE, vect_location,
5291 "failed to update reduction index.\n");
5292 break;
5293 }
5294 if (gsi_end_p (si))
5295 s = pattern_stmt;
5296 else
5297 {
5298 s = gsi_stmt (si);
5299 if (s == pattern_stmt)
5300 /* Found the end inside a bigger pattern def seq. */
5301 si = gsi_none ();
5302 else
5303 gsi_next (&si);
5304 }
5305 } while (1);
5306 }
5307 }
5308
5309 /* Function vect_pattern_recog_1
5310
5311 Input:
5312 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5313 computation pattern.
5314 STMT_INFO: A stmt from which the pattern search should start.
5315
5316 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5317 a sequence of statements that has the same functionality and can be
5318 used to replace STMT_INFO. It returns the last statement in the sequence
5319 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5320 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5321 statement, having first checked that the target supports the new operation
5322 in that type.
5323
5324 This function also does some bookkeeping, as explained in the documentation
5325 for vect_recog_pattern. */
5326
5327 static void
vect_pattern_recog_1(vect_recog_func * recog_func,stmt_vec_info stmt_info)5328 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
5329 {
5330 vec_info *vinfo = stmt_info->vinfo;
5331 gimple *pattern_stmt;
5332 loop_vec_info loop_vinfo;
5333 tree pattern_vectype;
5334
5335 /* If this statement has already been replaced with pattern statements,
5336 leave the original statement alone, since the first match wins.
5337 Instead try to match against the definition statements that feed
5338 the main pattern statement. */
5339 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5340 {
5341 gimple_stmt_iterator gsi;
5342 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5343 !gsi_end_p (gsi); gsi_next (&gsi))
5344 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
5345 return;
5346 }
5347
5348 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5349 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
5350 if (!pattern_stmt)
5351 {
5352 /* Clear any half-formed pattern definition sequence. */
5353 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5354 return;
5355 }
5356
5357 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5358 gcc_assert (pattern_vectype);
5359
5360 /* Found a vectorizable pattern. */
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_NOTE, vect_location,
5363 "%s pattern recognized: %G",
5364 recog_func->name, pattern_stmt);
5365
5366 /* Mark the stmts that are involved in the pattern. */
5367 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
5368
5369 /* Patterns cannot be vectorized using SLP, because they change the order of
5370 computation. */
5371 if (loop_vinfo)
5372 {
5373 unsigned ix, ix2;
5374 stmt_vec_info *elem_ptr;
5375 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5376 elem_ptr, *elem_ptr == stmt_info);
5377 }
5378 }
5379
5380
5381 /* Function vect_pattern_recog
5382
5383 Input:
5384 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5385 computation idioms.
5386
5387 Output - for each computation idiom that is detected we create a new stmt
5388 that provides the same functionality and that can be vectorized. We
5389 also record some information in the struct_stmt_info of the relevant
5390 stmts, as explained below:
5391
5392 At the entry to this function we have the following stmts, with the
5393 following initial value in the STMT_VINFO fields:
5394
5395 stmt in_pattern_p related_stmt vec_stmt
5396 S1: a_i = .... - - -
5397 S2: a_2 = ..use(a_i).. - - -
5398 S3: a_1 = ..use(a_2).. - - -
5399 S4: a_0 = ..use(a_1).. - - -
5400 S5: ... = ..use(a_0).. - - -
5401
5402 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5403 represented by a single stmt. We then:
5404 - create a new stmt S6 equivalent to the pattern (the stmt is not
5405 inserted into the code)
5406 - fill in the STMT_VINFO fields as follows:
5407
5408 in_pattern_p related_stmt vec_stmt
5409 S1: a_i = .... - - -
5410 S2: a_2 = ..use(a_i).. - - -
5411 S3: a_1 = ..use(a_2).. - - -
5412 S4: a_0 = ..use(a_1).. true S6 -
5413 '---> S6: a_new = .... - S4 -
5414 S5: ... = ..use(a_0).. - - -
5415
5416 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5417 to each other through the RELATED_STMT field).
5418
5419 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5420 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5421 remain irrelevant unless used by stmts other than S4.
5422
5423 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5424 (because they are marked as irrelevant). It will vectorize S6, and record
5425 a pointer to the new vector stmt VS6 from S6 (as usual).
5426 S4 will be skipped, and S5 will be vectorized as usual:
5427
5428 in_pattern_p related_stmt vec_stmt
5429 S1: a_i = .... - - -
5430 S2: a_2 = ..use(a_i).. - - -
5431 S3: a_1 = ..use(a_2).. - - -
5432 > VS6: va_new = .... - - -
5433 S4: a_0 = ..use(a_1).. true S6 VS6
5434 '---> S6: a_new = .... - S4 VS6
5435 > VS5: ... = ..vuse(va_new).. - - -
5436 S5: ... = ..use(a_0).. - - -
5437
5438 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5439 elsewhere), and we'll end up with:
5440
5441 VS6: va_new = ....
5442 VS5: ... = ..vuse(va_new)..
5443
5444 In case of more than one pattern statements, e.g., widen-mult with
5445 intermediate type:
5446
5447 S1 a_t = ;
5448 S2 a_T = (TYPE) a_t;
5449 '--> S3: a_it = (interm_type) a_t;
5450 S4 prod_T = a_T * CONST;
5451 '--> S5: prod_T' = a_it w* CONST;
5452
5453 there may be other users of a_T outside the pattern. In that case S2 will
5454 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5455 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5456 be recorded in S3. */
5457
5458 void
vect_pattern_recog(vec_info * vinfo)5459 vect_pattern_recog (vec_info *vinfo)
5460 {
5461 class loop *loop;
5462 basic_block *bbs;
5463 unsigned int nbbs;
5464 gimple_stmt_iterator si;
5465 unsigned int i, j;
5466
5467 vect_determine_precisions (vinfo);
5468
5469 DUMP_VECT_SCOPE ("vect_pattern_recog");
5470
5471 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5472 {
5473 loop = LOOP_VINFO_LOOP (loop_vinfo);
5474 bbs = LOOP_VINFO_BBS (loop_vinfo);
5475 nbbs = loop->num_nodes;
5476
5477 /* Scan through the loop stmts, applying the pattern recognition
5478 functions starting at each stmt visited: */
5479 for (i = 0; i < nbbs; i++)
5480 {
5481 basic_block bb = bbs[i];
5482 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5483 {
5484 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5485 /* Scan over all generic vect_recog_xxx_pattern functions. */
5486 for (j = 0; j < NUM_PATTERNS; j++)
5487 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5488 stmt_info);
5489 }
5490 }
5491 }
5492 else
5493 {
5494 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5495 for (si = bb_vinfo->region_begin;
5496 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5497 {
5498 gimple *stmt = gsi_stmt (si);
5499 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
5500 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5501 continue;
5502
5503 /* Scan over all generic vect_recog_xxx_pattern functions. */
5504 for (j = 0; j < NUM_PATTERNS; j++)
5505 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
5506 }
5507 }
5508 }
5509