1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55 {
56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82 }
83
84 /* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87 static void
vect_pattern_detected(const char * name,gimple * stmt)88 vect_pattern_detected (const char *name, gimple *stmt)
89 {
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
92 }
93
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
97
98 static stmt_vec_info
vect_init_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101 {
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104 if (pattern_stmt_info == NULL)
105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
108 pattern_stmt_info->pattern_stmt_p = true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114 return pattern_stmt_info;
115 }
116
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121 static void
vect_set_pattern_stmt(gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124 {
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
128 }
129
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
137 {
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 new_stmt);
146 }
147
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)155 vect_get_external_def_edge (vec_info *vinfo, tree var)
156 {
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170 }
171
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191 {
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213 }
214
215 /* Round bit precision PRECISION up to a full element. */
216
217 static unsigned int
vect_element_precision(unsigned int precision)218 vect_element_precision (unsigned int precision)
219 {
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222 }
223
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)228 vect_get_internal_def (vec_info *vinfo, tree op)
229 {
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
235 }
236
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
242
243 static bool
type_conversion_p(tree name,stmt_vec_info stmt_vinfo,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 tree *orig_type, gimple **def_stmt, bool *promotion)
246 {
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
250
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
254 return false;
255
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
258 return false;
259
260 if (!*def_stmt)
261 return false;
262
263 if (!is_gimple_assign (*def_stmt))
264 return false;
265
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267 return false;
268
269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
270
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
278 else
279 *promotion = false;
280
281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282 return false;
283
284 return true;
285 }
286
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value {
290 vect_unpromoted_value ();
291
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
296
297 /* The type of OP. */
298 tree type;
299
300 /* The definition type of OP. */
301 vect_def_type dt;
302
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
307 };
308
vect_unpromoted_value()309 inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
314 {
315 }
316
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
319
320 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
323 {
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
328 }
329
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
338
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
343
344 E.g. say we have:
345
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
352
353 In this case it's possible to go directly from C to OP using:
354
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
358
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
361
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
366 {
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
370 unsigned int min_precision = orig_precision;
371 stmt_vec_info caster = NULL;
372 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373 {
374 /* See whether OP is simple enough to vectorize. */
375 stmt_vec_info def_stmt_info;
376 gimple *def_stmt;
377 vect_def_type dt;
378 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
379 break;
380
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
385
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
389 if (TYPE_PRECISION (op_type) <= min_precision)
390 {
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
394 if (!res
395 || TYPE_PRECISION (unprom->type) == orig_precision
396 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
397 {
398 unprom->set_op (op, dt, caster);
399 min_precision = TYPE_PRECISION (op_type);
400 }
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
406
407 /* The sequence now extends to OP. */
408 res = op;
409 }
410
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
415 caster = def_stmt_info;
416
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
423
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
427
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
431 }
432 return res;
433 }
434
435 /* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
438
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
442
443 SHIFT_P is true if OP is a shift amount. */
444
445 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
448 {
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
453 {
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
457 }
458 else
459 {
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
464 }
465
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
473 }
474
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477
478 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480 {
481 if (types_compatible_p (*common_type, new_type))
482 return true;
483
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
488
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 {
493 *common_type = new_type;
494 return true;
495 }
496
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
505
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
508 }
509
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
515
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
520
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
523
524 static unsigned int
vect_widened_op_tree(stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type)525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
529 {
530 /* Check for an integer operation with the right code. */
531 vec_info *vinfo = stmt_info->vinfo;
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
535
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
539
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
543
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
546
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
550 {
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 {
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
561 }
562 else
563 {
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
567
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
571
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 {
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
581
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
585
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
588 = vinfo->lookup_def (this_unprom->op);
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
594
595 max_nops -= nops;
596 }
597 else
598 {
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
603
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
610 }
611 }
612 next_op += nops;
613 }
614 return next_op;
615 }
616
617 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619
620 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
622 {
623 return make_temp_ssa_name (type, stmt, "patt");
624 }
625
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
630
631 static bool
vect_split_statement(stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
634 {
635 if (is_pattern_stmt_p (stmt2_info))
636 {
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: %G", stmt2_info->stmt);
645
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
648 containing block. */
649 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650
651 if (dump_enabled_p ())
652 {
653 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 stmt2_info->stmt);
656 }
657
658 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq, stmt1);
663 else
664 {
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
666 before it. */
667 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 }
670 return true;
671 }
672 else
673 {
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679 if (!lhs_vectype)
680 return false;
681
682 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE, vect_location,
684 "Splitting statement: %G", stmt2_info->stmt);
685
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689 gimple_seq_add_stmt_without_update (def_seq, stmt1);
690
691 /* Build the second of the two pattern statements. */
692 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695
696 if (dump_enabled_p ())
697 {
698 dump_printf_loc (MSG_NOTE, vect_location,
699 "into pattern statements: %G", stmt1);
700 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
701 }
702
703 return true;
704 }
705 }
706
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
710
711 static tree
vect_convert_input(stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype)712 vect_convert_input (stmt_vec_info stmt_info, tree type,
713 vect_unpromoted_value *unprom, tree vectype)
714 {
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717 return unprom->op;
718
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom->op) == INTEGER_CST)
721 return wide_int_to_tree (type, wi::to_widest (unprom->op));
722
723 tree input = unprom->op;
724 if (unprom->caster)
725 {
726 tree lhs = gimple_get_lhs (unprom->caster->stmt);
727 tree lhs_type = TREE_TYPE (lhs);
728
729 /* If the result of the existing cast is the right width, use it
730 instead of the source of the cast. */
731 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
732 input = lhs;
733 /* If the precision we want is between the source and result
734 precisions of the existing cast, try splitting the cast into
735 two and tapping into a mid-way point. */
736 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
737 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
738 {
739 /* In order to preserve the semantics of the original cast,
740 give the mid-way point the same signedness as the input value.
741
742 It would be possible to use a signed type here instead if
743 TYPE is signed and UNPROM->TYPE is unsigned, but that would
744 make the sign of the midtype sensitive to the order in
745 which we process the statements, since the signedness of
746 TYPE is the signedness required by just one of possibly
747 many users. Also, unsigned promotions are usually as cheap
748 as or cheaper than signed ones, so it's better to keep an
749 unsigned promotion. */
750 tree midtype = build_nonstandard_integer_type
751 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
752 tree vec_midtype = get_vectype_for_scalar_type (midtype);
753 if (vec_midtype)
754 {
755 input = vect_recog_temp_ssa_var (midtype, NULL);
756 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
757 unprom->op);
758 if (!vect_split_statement (unprom->caster, input, new_stmt,
759 vec_midtype))
760 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
761 }
762 }
763
764 /* See if we can reuse an existing result. */
765 if (types_compatible_p (type, TREE_TYPE (input)))
766 return input;
767 }
768
769 /* We need a new conversion statement. */
770 tree new_op = vect_recog_temp_ssa_var (type, NULL);
771 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
772
773 /* If OP is an external value, see if we can insert the new statement
774 on an incoming edge. */
775 if (input == unprom->op && unprom->dt == vect_external_def)
776 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
777 {
778 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
779 gcc_assert (!new_bb);
780 return new_op;
781 }
782
783 /* As a (common) last resort, add the statement to the pattern itself. */
784 append_pattern_def_seq (stmt_info, new_stmt, vectype);
785 return new_op;
786 }
787
788 /* Invoke vect_convert_input for N elements of UNPROM and store the
789 result in the corresponding elements of RESULT. */
790
791 static void
vect_convert_inputs(stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype)792 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
793 tree *result, tree type, vect_unpromoted_value *unprom,
794 tree vectype)
795 {
796 for (unsigned int i = 0; i < n; ++i)
797 {
798 unsigned int j;
799 for (j = 0; j < i; ++j)
800 if (unprom[j].op == unprom[i].op)
801 break;
802 if (j < i)
803 result[i] = result[j];
804 else
805 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
806 }
807 }
808
809 /* The caller has created a (possibly empty) sequence of pattern definition
810 statements followed by a single statement PATTERN_STMT. Cast the result
811 of this final statement to TYPE. If a new statement is needed, add
812 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
813 and return the new statement, otherwise return PATTERN_STMT as-is.
814 VECITYPE is the vector form of PATTERN_STMT's result type. */
815
816 static gimple *
vect_convert_output(stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)817 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
818 tree vecitype)
819 {
820 tree lhs = gimple_get_lhs (pattern_stmt);
821 if (!types_compatible_p (type, TREE_TYPE (lhs)))
822 {
823 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
824 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
825 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
826 }
827 return pattern_stmt;
828 }
829
830 /* Return true if STMT_VINFO describes a reduction for which reassociation
831 is allowed. If STMT_INFO is part of a group, assume that it's part of
832 a reduction chain and optimistically assume that all statements
833 except the last allow reassociation. */
834
835 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)836 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
837 {
838 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
839 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
840 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
841 }
842
843 /* As above, but also require it to have code CODE and to be a reduction
844 in the outermost loop. When returning true, store the operands in
845 *OP0_OUT and *OP1_OUT. */
846
847 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)848 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
849 tree *op0_out, tree *op1_out)
850 {
851 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
852 if (!loop_info)
853 return false;
854
855 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
856 if (!assign || gimple_assign_rhs_code (assign) != code)
857 return false;
858
859 /* We don't allow changing the order of the computation in the inner-loop
860 when doing outer-loop vectorization. */
861 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
862 if (loop && nested_in_vect_loop_p (loop, stmt_info))
863 return false;
864
865 if (!vect_reassociating_reduction_p (stmt_info))
866 return false;
867
868 *op0_out = gimple_assign_rhs1 (assign);
869 *op1_out = gimple_assign_rhs2 (assign);
870 return true;
871 }
872
873 /* Function vect_recog_dot_prod_pattern
874
875 Try to find the following pattern:
876
877 type x_t, y_t;
878 TYPE1 prod;
879 TYPE2 sum = init;
880 loop:
881 sum_0 = phi <init, sum_1>
882 S1 x_t = ...
883 S2 y_t = ...
884 S3 x_T = (TYPE1) x_t;
885 S4 y_T = (TYPE1) y_t;
886 S5 prod = x_T * y_T;
887 [S6 prod = (TYPE2) prod; #optional]
888 S7 sum_1 = prod + sum_0;
889
890 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
891 same size of 'TYPE1' or bigger. This is a special case of a reduction
892 computation.
893
894 Input:
895
896 * STMT_VINFO: The stmt from which the pattern search begins. In the
897 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
898 will be detected.
899
900 Output:
901
902 * TYPE_OUT: The type of the output of this pattern.
903
904 * Return value: A new stmt that will be used to replace the sequence of
905 stmts that constitute the pattern. In this case it will be:
906 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
907
908 Note: The dot-prod idiom is a widening reduction pattern that is
909 vectorized without preserving all the intermediate results. It
910 produces only N/2 (widened) results (by summing up pairs of
911 intermediate results) rather than all N results. Therefore, we
912 cannot allow this pattern when we want to get all the results and in
913 the correct order (as is the case when this computation is in an
914 inner-loop nested in an outer-loop that us being vectorized). */
915
916 static gimple *
vect_recog_dot_prod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)917 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
918 {
919 tree oprnd0, oprnd1;
920 gimple *last_stmt = stmt_vinfo->stmt;
921 vec_info *vinfo = stmt_vinfo->vinfo;
922 tree type, half_type;
923 gimple *pattern_stmt;
924 tree var;
925
926 /* Look for the following pattern
927 DX = (TYPE1) X;
928 DY = (TYPE1) Y;
929 DPROD = DX * DY;
930 DDPROD = (TYPE2) DPROD;
931 sum_1 = DDPROD + sum_0;
932 In which
933 - DX is double the size of X
934 - DY is double the size of Y
935 - DX, DY, DPROD all have the same type
936 - sum is the same size of DPROD or bigger
937 - sum has been recognized as a reduction variable.
938
939 This is equivalent to:
940 DPROD = X w* Y; #widen mult
941 sum_1 = DPROD w+ sum_0; #widen summation
942 or
943 DPROD = X w* Y; #widen mult
944 sum_1 = DPROD + sum_0; #summation
945 */
946
947 /* Starting from LAST_STMT, follow the defs of its uses in search
948 of the above pattern. */
949
950 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
951 &oprnd0, &oprnd1))
952 return NULL;
953
954 type = gimple_expr_type (last_stmt);
955
956 vect_unpromoted_value unprom_mult;
957 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
958
959 /* So far so good. Since last_stmt was detected as a (summation) reduction,
960 we know that oprnd1 is the reduction variable (defined by a loop-header
961 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
962 Left to check that oprnd0 is defined by a (widen_)mult_expr */
963 if (!oprnd0)
964 return NULL;
965
966 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
967 if (!mult_vinfo)
968 return NULL;
969
970 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
971 inside the loop (in case we are analyzing an outer-loop). */
972 vect_unpromoted_value unprom0[2];
973 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
974 false, 2, unprom0, &half_type))
975 return NULL;
976
977 /* If there are two widening operations, make sure they agree on
978 the sign of the extension. */
979 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
980 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
981 return NULL;
982
983 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
984
985 tree half_vectype;
986 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
987 type_out, &half_vectype))
988 return NULL;
989
990 /* Get the inputs in the appropriate types. */
991 tree mult_oprnd[2];
992 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
993 unprom0, half_vectype);
994
995 var = vect_recog_temp_ssa_var (type, NULL);
996 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
997 mult_oprnd[0], mult_oprnd[1], oprnd1);
998
999 return pattern_stmt;
1000 }
1001
1002
1003 /* Function vect_recog_sad_pattern
1004
1005 Try to find the following Sum of Absolute Difference (SAD) pattern:
1006
1007 type x_t, y_t;
1008 signed TYPE1 diff, abs_diff;
1009 TYPE2 sum = init;
1010 loop:
1011 sum_0 = phi <init, sum_1>
1012 S1 x_t = ...
1013 S2 y_t = ...
1014 S3 x_T = (TYPE1) x_t;
1015 S4 y_T = (TYPE1) y_t;
1016 S5 diff = x_T - y_T;
1017 S6 abs_diff = ABS_EXPR <diff>;
1018 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1019 S8 sum_1 = abs_diff + sum_0;
1020
1021 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1022 same size of 'TYPE1' or bigger. This is a special case of a reduction
1023 computation.
1024
1025 Input:
1026
1027 * STMT_VINFO: The stmt from which the pattern search begins. In the
1028 example, when this function is called with S8, the pattern
1029 {S3,S4,S5,S6,S7,S8} will be detected.
1030
1031 Output:
1032
1033 * TYPE_OUT: The type of the output of this pattern.
1034
1035 * Return value: A new stmt that will be used to replace the sequence of
1036 stmts that constitute the pattern. In this case it will be:
1037 SAD_EXPR <x_t, y_t, sum_0>
1038 */
1039
1040 static gimple *
vect_recog_sad_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1041 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1042 {
1043 gimple *last_stmt = stmt_vinfo->stmt;
1044 vec_info *vinfo = stmt_vinfo->vinfo;
1045 tree half_type;
1046
1047 /* Look for the following pattern
1048 DX = (TYPE1) X;
1049 DY = (TYPE1) Y;
1050 DDIFF = DX - DY;
1051 DAD = ABS_EXPR <DDIFF>;
1052 DDPROD = (TYPE2) DPROD;
1053 sum_1 = DAD + sum_0;
1054 In which
1055 - DX is at least double the size of X
1056 - DY is at least double the size of Y
1057 - DX, DY, DDIFF, DAD all have the same type
1058 - sum is the same size of DAD or bigger
1059 - sum has been recognized as a reduction variable.
1060
1061 This is equivalent to:
1062 DDIFF = X w- Y; #widen sub
1063 DAD = ABS_EXPR <DDIFF>;
1064 sum_1 = DAD w+ sum_0; #widen summation
1065 or
1066 DDIFF = X w- Y; #widen sub
1067 DAD = ABS_EXPR <DDIFF>;
1068 sum_1 = DAD + sum_0; #summation
1069 */
1070
1071 /* Starting from LAST_STMT, follow the defs of its uses in search
1072 of the above pattern. */
1073
1074 tree plus_oprnd0, plus_oprnd1;
1075 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1076 &plus_oprnd0, &plus_oprnd1))
1077 return NULL;
1078
1079 tree sum_type = gimple_expr_type (last_stmt);
1080
1081 /* Any non-truncating sequence of conversions is OK here, since
1082 with a successful match, the result of the ABS(U) is known to fit
1083 within the nonnegative range of the result type. (It cannot be the
1084 negative of the minimum signed value due to the range of the widening
1085 MINUS_EXPR.) */
1086 vect_unpromoted_value unprom_abs;
1087 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1088 &unprom_abs);
1089
1090 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1091 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1092 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1093 Then check that plus_oprnd0 is defined by an abs_expr. */
1094
1095 if (!plus_oprnd0)
1096 return NULL;
1097
1098 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1099 if (!abs_stmt_vinfo)
1100 return NULL;
1101
1102 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1103 inside the loop (in case we are analyzing an outer-loop). */
1104 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1105 if (!abs_stmt
1106 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1107 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1108 return NULL;
1109
1110 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1111 tree abs_type = TREE_TYPE (abs_oprnd);
1112 if (TYPE_UNSIGNED (abs_type))
1113 return NULL;
1114
1115 /* Peel off conversions from the ABS input. This can involve sign
1116 changes (e.g. from an unsigned subtraction to a signed ABS input)
1117 or signed promotion, but it can't include unsigned promotion.
1118 (Note that ABS of an unsigned promotion should have been folded
1119 away before now anyway.) */
1120 vect_unpromoted_value unprom_diff;
1121 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1122 &unprom_diff);
1123 if (!abs_oprnd)
1124 return NULL;
1125 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1126 && TYPE_UNSIGNED (unprom_diff.type))
1127 return NULL;
1128
1129 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1130 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1131 if (!diff_stmt_vinfo)
1132 return NULL;
1133
1134 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1135 inside the loop (in case we are analyzing an outer-loop). */
1136 vect_unpromoted_value unprom[2];
1137 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1138 false, 2, unprom, &half_type))
1139 return NULL;
1140
1141 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1142
1143 tree half_vectype;
1144 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1145 type_out, &half_vectype))
1146 return NULL;
1147
1148 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1149 tree sad_oprnd[2];
1150 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1151 unprom, half_vectype);
1152
1153 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1154 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1155 sad_oprnd[1], plus_oprnd1);
1156
1157 return pattern_stmt;
1158 }
1159
1160 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1161 so that it can be treated as though it had the form:
1162
1163 A_TYPE a;
1164 B_TYPE b;
1165 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1166 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1167 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1168 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1169 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170
1171 Try to replace the pattern with:
1172
1173 A_TYPE a;
1174 B_TYPE b;
1175 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1176 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1177 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1178 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1179
1180 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181
1182 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1183 name of the pattern being matched, for dump purposes. */
1184
1185 static gimple *
vect_recog_widen_op_pattern(stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1186 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1187 tree_code orig_code, tree_code wide_code,
1188 bool shift_p, const char *name)
1189 {
1190 gimple *last_stmt = last_stmt_info->stmt;
1191
1192 vect_unpromoted_value unprom[2];
1193 tree half_type;
1194 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 shift_p, 2, unprom, &half_type))
1196 return NULL;
1197
1198 /* Pattern detected. */
1199 vect_pattern_detected (name, last_stmt);
1200
1201 tree type = gimple_expr_type (last_stmt);
1202 tree itype = type;
1203 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 TYPE_UNSIGNED (half_type));
1207
1208 /* Check target support */
1209 tree vectype = get_vectype_for_scalar_type (half_type);
1210 tree vecitype = get_vectype_for_scalar_type (itype);
1211 enum tree_code dummy_code;
1212 int dummy_int;
1213 auto_vec<tree> dummy_vec;
1214 if (!vectype
1215 || !vecitype
1216 || !supportable_widening_operation (wide_code, last_stmt_info,
1217 vecitype, vectype,
1218 &dummy_code, &dummy_code,
1219 &dummy_int, &dummy_vec))
1220 return NULL;
1221
1222 *type_out = get_vectype_for_scalar_type (type);
1223 if (!*type_out)
1224 return NULL;
1225
1226 tree oprnd[2];
1227 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1228
1229 tree var = vect_recog_temp_ssa_var (itype, NULL);
1230 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1231 oprnd[0], oprnd[1]);
1232
1233 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1234 }
1235
1236 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1237 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1238
1239 static gimple *
vect_recog_widen_mult_pattern(stmt_vec_info last_stmt_info,tree * type_out)1240 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1241 {
1242 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1243 WIDEN_MULT_EXPR, false,
1244 "vect_recog_widen_mult_pattern");
1245 }
1246
1247 /* Function vect_recog_pow_pattern
1248
1249 Try to find the following pattern:
1250
1251 x = POW (y, N);
1252
1253 with POW being one of pow, powf, powi, powif and N being
1254 either 2 or 0.5.
1255
1256 Input:
1257
1258 * STMT_VINFO: The stmt from which the pattern search begins.
1259
1260 Output:
1261
1262 * TYPE_OUT: The type of the output of this pattern.
1263
1264 * Return value: A new stmt that will be used to replace the sequence of
1265 stmts that constitute the pattern. In this case it will be:
1266 x = x * x
1267 or
1268 x = sqrt (x)
1269 */
1270
1271 static gimple *
vect_recog_pow_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1272 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1273 {
1274 gimple *last_stmt = stmt_vinfo->stmt;
1275 tree base, exp;
1276 gimple *stmt;
1277 tree var;
1278
1279 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1280 return NULL;
1281
1282 switch (gimple_call_combined_fn (last_stmt))
1283 {
1284 CASE_CFN_POW:
1285 CASE_CFN_POWI:
1286 break;
1287
1288 default:
1289 return NULL;
1290 }
1291
1292 base = gimple_call_arg (last_stmt, 0);
1293 exp = gimple_call_arg (last_stmt, 1);
1294 if (TREE_CODE (exp) != REAL_CST
1295 && TREE_CODE (exp) != INTEGER_CST)
1296 {
1297 if (flag_unsafe_math_optimizations
1298 && TREE_CODE (base) == REAL_CST
1299 && !gimple_call_internal_p (last_stmt))
1300 {
1301 combined_fn log_cfn;
1302 built_in_function exp_bfn;
1303 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1304 {
1305 case BUILT_IN_POW:
1306 log_cfn = CFN_BUILT_IN_LOG;
1307 exp_bfn = BUILT_IN_EXP;
1308 break;
1309 case BUILT_IN_POWF:
1310 log_cfn = CFN_BUILT_IN_LOGF;
1311 exp_bfn = BUILT_IN_EXPF;
1312 break;
1313 case BUILT_IN_POWL:
1314 log_cfn = CFN_BUILT_IN_LOGL;
1315 exp_bfn = BUILT_IN_EXPL;
1316 break;
1317 default:
1318 return NULL;
1319 }
1320 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1321 tree exp_decl = builtin_decl_implicit (exp_bfn);
1322 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1323 does that, but if C is a power of 2, we want to use
1324 exp2 (log2 (C) * x) in the non-vectorized version, but for
1325 vectorization we don't have vectorized exp2. */
1326 if (logc
1327 && TREE_CODE (logc) == REAL_CST
1328 && exp_decl
1329 && lookup_attribute ("omp declare simd",
1330 DECL_ATTRIBUTES (exp_decl)))
1331 {
1332 cgraph_node *node = cgraph_node::get_create (exp_decl);
1333 if (node->simd_clones == NULL)
1334 {
1335 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1336 || node->definition)
1337 return NULL;
1338 expand_simd_clones (node);
1339 if (node->simd_clones == NULL)
1340 return NULL;
1341 }
1342 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1343 if (!*type_out)
1344 return NULL;
1345 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1347 append_pattern_def_seq (stmt_vinfo, g);
1348 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 g = gimple_build_call (exp_decl, 1, def);
1350 gimple_call_set_lhs (g, res);
1351 return g;
1352 }
1353 }
1354
1355 return NULL;
1356 }
1357
1358 /* We now have a pow or powi builtin function call with a constant
1359 exponent. */
1360
1361 /* Catch squaring. */
1362 if ((tree_fits_shwi_p (exp)
1363 && tree_to_shwi (exp) == 2)
1364 || (TREE_CODE (exp) == REAL_CST
1365 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1366 {
1367 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1368 TREE_TYPE (base), type_out))
1369 return NULL;
1370
1371 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1372 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1373 return stmt;
1374 }
1375
1376 /* Catch square root. */
1377 if (TREE_CODE (exp) == REAL_CST
1378 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1379 {
1380 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1381 if (*type_out
1382 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1383 OPTIMIZE_FOR_SPEED))
1384 {
1385 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1386 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1387 gimple_call_set_lhs (stmt, var);
1388 gimple_call_set_nothrow (stmt, true);
1389 return stmt;
1390 }
1391 }
1392
1393 return NULL;
1394 }
1395
1396
1397 /* Function vect_recog_widen_sum_pattern
1398
1399 Try to find the following pattern:
1400
1401 type x_t;
1402 TYPE x_T, sum = init;
1403 loop:
1404 sum_0 = phi <init, sum_1>
1405 S1 x_t = *p;
1406 S2 x_T = (TYPE) x_t;
1407 S3 sum_1 = x_T + sum_0;
1408
1409 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1410 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1411 a special case of a reduction computation.
1412
1413 Input:
1414
1415 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1416 when this function is called with S3, the pattern {S2,S3} will be detected.
1417
1418 Output:
1419
1420 * TYPE_OUT: The type of the output of this pattern.
1421
1422 * Return value: A new stmt that will be used to replace the sequence of
1423 stmts that constitute the pattern. In this case it will be:
1424 WIDEN_SUM <x_t, sum_0>
1425
1426 Note: The widening-sum idiom is a widening reduction pattern that is
1427 vectorized without preserving all the intermediate results. It
1428 produces only N/2 (widened) results (by summing up pairs of
1429 intermediate results) rather than all N results. Therefore, we
1430 cannot allow this pattern when we want to get all the results and in
1431 the correct order (as is the case when this computation is in an
1432 inner-loop nested in an outer-loop that us being vectorized). */
1433
1434 static gimple *
vect_recog_widen_sum_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1435 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1436 {
1437 gimple *last_stmt = stmt_vinfo->stmt;
1438 tree oprnd0, oprnd1;
1439 vec_info *vinfo = stmt_vinfo->vinfo;
1440 tree type;
1441 gimple *pattern_stmt;
1442 tree var;
1443
1444 /* Look for the following pattern
1445 DX = (TYPE) X;
1446 sum_1 = DX + sum_0;
1447 In which DX is at least double the size of X, and sum_1 has been
1448 recognized as a reduction variable.
1449 */
1450
1451 /* Starting from LAST_STMT, follow the defs of its uses in search
1452 of the above pattern. */
1453
1454 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1455 &oprnd0, &oprnd1))
1456 return NULL;
1457
1458 type = gimple_expr_type (last_stmt);
1459
1460 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1461 we know that oprnd1 is the reduction variable (defined by a loop-header
1462 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1463 Left to check that oprnd0 is defined by a cast from type 'type' to type
1464 'TYPE'. */
1465
1466 vect_unpromoted_value unprom0;
1467 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1468 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1469 return NULL;
1470
1471 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1472
1473 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1474 type_out))
1475 return NULL;
1476
1477 var = vect_recog_temp_ssa_var (type, NULL);
1478 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1479
1480 return pattern_stmt;
1481 }
1482
1483 /* Recognize cases in which an operation is performed in one type WTYPE
1484 but could be done more efficiently in a narrower type NTYPE. For example,
1485 if we have:
1486
1487 ATYPE a; // narrower than NTYPE
1488 BTYPE b; // narrower than NTYPE
1489 WTYPE aw = (WTYPE) a;
1490 WTYPE bw = (WTYPE) b;
1491 WTYPE res = aw + bw; // only uses of aw and bw
1492
1493 then it would be more efficient to do:
1494
1495 NTYPE an = (NTYPE) a;
1496 NTYPE bn = (NTYPE) b;
1497 NTYPE resn = an + bn;
1498 WTYPE res = (WTYPE) resn;
1499
1500 Other situations include things like:
1501
1502 ATYPE a; // NTYPE or narrower
1503 WTYPE aw = (WTYPE) a;
1504 WTYPE res = aw + b;
1505
1506 when only "(NTYPE) res" is significant. In that case it's more efficient
1507 to truncate "b" and do the operation on NTYPE instead:
1508
1509 NTYPE an = (NTYPE) a;
1510 NTYPE bn = (NTYPE) b; // truncation
1511 NTYPE resn = an + bn;
1512 WTYPE res = (WTYPE) resn;
1513
1514 All users of "res" should then use "resn" instead, making the final
1515 statement dead (not marked as relevant). The final statement is still
1516 needed to maintain the type correctness of the IR.
1517
1518 vect_determine_precisions has already determined the minimum
1519 precison of the operation and the minimum precision required
1520 by users of the result. */
1521
1522 static gimple *
vect_recog_over_widening_pattern(stmt_vec_info last_stmt_info,tree * type_out)1523 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1524 {
1525 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1526 if (!last_stmt)
1527 return NULL;
1528
1529 /* See whether we have found that this operation can be done on a
1530 narrower type without changing its semantics. */
1531 unsigned int new_precision = last_stmt_info->operation_precision;
1532 if (!new_precision)
1533 return NULL;
1534
1535 vec_info *vinfo = last_stmt_info->vinfo;
1536 tree lhs = gimple_assign_lhs (last_stmt);
1537 tree type = TREE_TYPE (lhs);
1538 tree_code code = gimple_assign_rhs_code (last_stmt);
1539
1540 /* Keep the first operand of a COND_EXPR as-is: only the other two
1541 operands are interesting. */
1542 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1543
1544 /* Check the operands. */
1545 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1546 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1547 unprom.quick_grow (nops);
1548 unsigned int min_precision = 0;
1549 bool single_use_p = false;
1550 for (unsigned int i = 0; i < nops; ++i)
1551 {
1552 tree op = gimple_op (last_stmt, first_op + i);
1553 if (TREE_CODE (op) == INTEGER_CST)
1554 unprom[i].set_op (op, vect_constant_def);
1555 else if (TREE_CODE (op) == SSA_NAME)
1556 {
1557 bool op_single_use_p = true;
1558 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1559 &op_single_use_p))
1560 return NULL;
1561 /* If:
1562
1563 (1) N bits of the result are needed;
1564 (2) all inputs are widened from M<N bits; and
1565 (3) one operand OP is a single-use SSA name
1566
1567 we can shift the M->N widening from OP to the output
1568 without changing the number or type of extensions involved.
1569 This then reduces the number of copies of STMT_INFO.
1570
1571 If instead of (3) more than one operand is a single-use SSA name,
1572 shifting the extension to the output is even more of a win.
1573
1574 If instead:
1575
1576 (1) N bits of the result are needed;
1577 (2) one operand OP2 is widened from M2<N bits;
1578 (3) another operand OP1 is widened from M1<M2 bits; and
1579 (4) both OP1 and OP2 are single-use
1580
1581 the choice is between:
1582
1583 (a) truncating OP2 to M1, doing the operation on M1,
1584 and then widening the result to N
1585
1586 (b) widening OP1 to M2, doing the operation on M2, and then
1587 widening the result to N
1588
1589 Both shift the M2->N widening of the inputs to the output.
1590 (a) additionally shifts the M1->M2 widening to the output;
1591 it requires fewer copies of STMT_INFO but requires an extra
1592 M2->M1 truncation.
1593
1594 Which is better will depend on the complexity and cost of
1595 STMT_INFO, which is hard to predict at this stage. However,
1596 a clear tie-breaker in favor of (b) is the fact that the
1597 truncation in (a) increases the length of the operation chain.
1598
1599 If instead of (4) only one of OP1 or OP2 is single-use,
1600 (b) is still a win over doing the operation in N bits:
1601 it still shifts the M2->N widening on the single-use operand
1602 to the output and reduces the number of STMT_INFO copies.
1603
1604 If neither operand is single-use then operating on fewer than
1605 N bits might lead to more extensions overall. Whether it does
1606 or not depends on global information about the vectorization
1607 region, and whether that's a good trade-off would again
1608 depend on the complexity and cost of the statements involved,
1609 as well as things like register pressure that are not normally
1610 modelled at this stage. We therefore ignore these cases
1611 and just optimize the clear single-use wins above.
1612
1613 Thus we take the maximum precision of the unpromoted operands
1614 and record whether any operand is single-use. */
1615 if (unprom[i].dt == vect_internal_def)
1616 {
1617 min_precision = MAX (min_precision,
1618 TYPE_PRECISION (unprom[i].type));
1619 single_use_p |= op_single_use_p;
1620 }
1621 }
1622 }
1623
1624 /* Although the operation could be done in operation_precision, we have
1625 to balance that against introducing extra truncations or extensions.
1626 Calculate the minimum precision that can be handled efficiently.
1627
1628 The loop above determined that the operation could be handled
1629 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1630 extension from the inputs to the output without introducing more
1631 instructions, and would reduce the number of instructions required
1632 for STMT_INFO itself.
1633
1634 vect_determine_precisions has also determined that the result only
1635 needs min_output_precision bits. Truncating by a factor of N times
1636 requires a tree of N - 1 instructions, so if TYPE is N times wider
1637 than min_output_precision, doing the operation in TYPE and truncating
1638 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1639 In contrast:
1640
1641 - truncating the input to a unary operation and doing the operation
1642 in the new type requires at most N - 1 + 1 = N instructions per
1643 output vector
1644
1645 - doing the same for a binary operation requires at most
1646 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1647
1648 Both unary and binary operations require fewer instructions than
1649 this if the operands were extended from a suitable truncated form.
1650 Thus there is usually nothing to lose by doing operations in
1651 min_output_precision bits, but there can be something to gain. */
1652 if (!single_use_p)
1653 min_precision = last_stmt_info->min_output_precision;
1654 else
1655 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1656
1657 /* Apply the minimum efficient precision we just calculated. */
1658 if (new_precision < min_precision)
1659 new_precision = min_precision;
1660 new_precision = vect_element_precision (new_precision);
1661 if (new_precision >= TYPE_PRECISION (type))
1662 return NULL;
1663
1664 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1665
1666 *type_out = get_vectype_for_scalar_type (type);
1667 if (!*type_out)
1668 return NULL;
1669
1670 /* We've found a viable pattern. Get the new type of the operation. */
1671 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1672 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1673
1674 /* If we're truncating an operation, we need to make sure that we
1675 don't introduce new undefined overflow. The codes tested here are
1676 a subset of those accepted by vect_truncatable_operation_p. */
1677 tree op_type = new_type;
1678 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1679 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1680 op_type = build_nonstandard_integer_type (new_precision, true);
1681
1682 /* We specifically don't check here whether the target supports the
1683 new operation, since it might be something that a later pattern
1684 wants to rewrite anyway. If targets have a minimum element size
1685 for some optabs, we should pattern-match smaller ops to larger ops
1686 where beneficial. */
1687 tree new_vectype = get_vectype_for_scalar_type (new_type);
1688 tree op_vectype = get_vectype_for_scalar_type (op_type);
1689 if (!new_vectype || !op_vectype)
1690 return NULL;
1691
1692 if (dump_enabled_p ())
1693 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1694 type, new_type);
1695
1696 /* Calculate the rhs operands for an operation on OP_TYPE. */
1697 tree ops[3] = {};
1698 for (unsigned int i = 1; i < first_op; ++i)
1699 ops[i - 1] = gimple_op (last_stmt, i);
1700 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1701 op_type, &unprom[0], op_vectype);
1702
1703 /* Use the operation to produce a result of type OP_TYPE. */
1704 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1705 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1706 ops[0], ops[1], ops[2]);
1707 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1708
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_NOTE, vect_location,
1711 "created pattern stmt: %G", pattern_stmt);
1712
1713 /* Convert back to the original signedness, if OP_TYPE is different
1714 from NEW_TYPE. */
1715 if (op_type != new_type)
1716 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1717 pattern_stmt, op_vectype);
1718
1719 /* Promote the result to the original type. */
1720 pattern_stmt = vect_convert_output (last_stmt_info, type,
1721 pattern_stmt, new_vectype);
1722
1723 return pattern_stmt;
1724 }
1725
1726 /* Recognize the patterns:
1727
1728 ATYPE a; // narrower than TYPE
1729 BTYPE b; // narrower than TYPE
1730 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1731 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1732
1733 where only the bottom half of avg is used. Try to transform them into:
1734
1735 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1736 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1737
1738 followed by:
1739
1740 TYPE avg = (TYPE) avg';
1741
1742 where NTYPE is no wider than half of TYPE. Since only the bottom half
1743 of avg is used, all or part of the cast of avg' should become redundant. */
1744
1745 static gimple *
vect_recog_average_pattern(stmt_vec_info last_stmt_info,tree * type_out)1746 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1747 {
1748 /* Check for a shift right by one bit. */
1749 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1750 vec_info *vinfo = last_stmt_info->vinfo;
1751 if (!last_stmt
1752 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1753 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1754 return NULL;
1755
1756 /* Check that the shift result is wider than the users of the
1757 result need (i.e. that narrowing would be a natural choice). */
1758 tree lhs = gimple_assign_lhs (last_stmt);
1759 tree type = TREE_TYPE (lhs);
1760 unsigned int target_precision
1761 = vect_element_precision (last_stmt_info->min_output_precision);
1762 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1763 return NULL;
1764
1765 /* Look through any change in sign on the shift input. */
1766 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1767 vect_unpromoted_value unprom_plus;
1768 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1769 &unprom_plus);
1770 if (!rshift_rhs
1771 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1772 return NULL;
1773
1774 /* Get the definition of the shift input. */
1775 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1776 if (!plus_stmt_info)
1777 return NULL;
1778
1779 /* Check whether the shift input can be seen as a tree of additions on
1780 2 or 3 widened inputs.
1781
1782 Note that the pattern should be a win even if the result of one or
1783 more additions is reused elsewhere: if the pattern matches, we'd be
1784 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1785 internal_fn ifn = IFN_AVG_FLOOR;
1786 vect_unpromoted_value unprom[3];
1787 tree new_type;
1788 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1789 PLUS_EXPR, false, 3,
1790 unprom, &new_type);
1791 if (nops == 0)
1792 return NULL;
1793 if (nops == 3)
1794 {
1795 /* Check that one operand is 1. */
1796 unsigned int i;
1797 for (i = 0; i < 3; ++i)
1798 if (integer_onep (unprom[i].op))
1799 break;
1800 if (i == 3)
1801 return NULL;
1802 /* Throw away the 1 operand and keep the other two. */
1803 if (i < 2)
1804 unprom[i] = unprom[2];
1805 ifn = IFN_AVG_CEIL;
1806 }
1807
1808 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1809
1810 /* We know that:
1811
1812 (a) the operation can be viewed as:
1813
1814 TYPE widened0 = (TYPE) UNPROM[0];
1815 TYPE widened1 = (TYPE) UNPROM[1];
1816 TYPE tmp1 = widened0 + widened1 {+ 1};
1817 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1818
1819 (b) the first two statements are equivalent to:
1820
1821 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1822 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1823
1824 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1825 where sensible;
1826
1827 (d) all the operations can be performed correctly at twice the width of
1828 NEW_TYPE, due to the nature of the average operation; and
1829
1830 (e) users of the result of the right shift need only TARGET_PRECISION
1831 bits, where TARGET_PRECISION is no more than half of TYPE's
1832 precision.
1833
1834 Under these circumstances, the only situation in which NEW_TYPE
1835 could be narrower than TARGET_PRECISION is if widened0, widened1
1836 and an addition result are all used more than once. Thus we can
1837 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1838 as "free", whereas widening the result of the average instruction
1839 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1840 therefore better not to go narrower than TARGET_PRECISION. */
1841 if (TYPE_PRECISION (new_type) < target_precision)
1842 new_type = build_nonstandard_integer_type (target_precision,
1843 TYPE_UNSIGNED (new_type));
1844
1845 /* Check for target support. */
1846 tree new_vectype = get_vectype_for_scalar_type (new_type);
1847 if (!new_vectype
1848 || !direct_internal_fn_supported_p (ifn, new_vectype,
1849 OPTIMIZE_FOR_SPEED))
1850 return NULL;
1851
1852 /* The IR requires a valid vector type for the cast result, even though
1853 it's likely to be discarded. */
1854 *type_out = get_vectype_for_scalar_type (type);
1855 if (!*type_out)
1856 return NULL;
1857
1858 /* Generate the IFN_AVG* call. */
1859 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1860 tree new_ops[2];
1861 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1862 unprom, new_vectype);
1863 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1864 new_ops[1]);
1865 gimple_call_set_lhs (average_stmt, new_var);
1866 gimple_set_location (average_stmt, gimple_location (last_stmt));
1867
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE, vect_location,
1870 "created pattern stmt: %G", average_stmt);
1871
1872 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1873 }
1874
1875 /* Recognize cases in which the input to a cast is wider than its
1876 output, and the input is fed by a widening operation. Fold this
1877 by removing the unnecessary intermediate widening. E.g.:
1878
1879 unsigned char a;
1880 unsigned int b = (unsigned int) a;
1881 unsigned short c = (unsigned short) b;
1882
1883 -->
1884
1885 unsigned short c = (unsigned short) a;
1886
1887 Although this is rare in input IR, it is an expected side-effect
1888 of the over-widening pattern above.
1889
1890 This is beneficial also for integer-to-float conversions, if the
1891 widened integer has more bits than the float, and if the unwidened
1892 input doesn't. */
1893
1894 static gimple *
vect_recog_cast_forwprop_pattern(stmt_vec_info last_stmt_info,tree * type_out)1895 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1896 {
1897 /* Check for a cast, including an integer-to-float conversion. */
1898 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1899 if (!last_stmt)
1900 return NULL;
1901 tree_code code = gimple_assign_rhs_code (last_stmt);
1902 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1903 return NULL;
1904
1905 /* Make sure that the rhs is a scalar with a natural bitsize. */
1906 tree lhs = gimple_assign_lhs (last_stmt);
1907 if (!lhs)
1908 return NULL;
1909 tree lhs_type = TREE_TYPE (lhs);
1910 scalar_mode lhs_mode;
1911 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1912 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1913 return NULL;
1914
1915 /* Check for a narrowing operation (from a vector point of view). */
1916 tree rhs = gimple_assign_rhs1 (last_stmt);
1917 tree rhs_type = TREE_TYPE (rhs);
1918 if (!INTEGRAL_TYPE_P (rhs_type)
1919 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1920 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1921 return NULL;
1922
1923 /* Try to find an unpromoted input. */
1924 vec_info *vinfo = last_stmt_info->vinfo;
1925 vect_unpromoted_value unprom;
1926 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1927 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1928 return NULL;
1929
1930 /* If the bits above RHS_TYPE matter, make sure that they're the
1931 same when extending from UNPROM as they are when extending from RHS. */
1932 if (!INTEGRAL_TYPE_P (lhs_type)
1933 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1934 return NULL;
1935
1936 /* We can get the same result by casting UNPROM directly, to avoid
1937 the unnecessary widening and narrowing. */
1938 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1939
1940 *type_out = get_vectype_for_scalar_type (lhs_type);
1941 if (!*type_out)
1942 return NULL;
1943
1944 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1945 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1946 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1947
1948 return pattern_stmt;
1949 }
1950
1951 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1952 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1953
1954 static gimple *
vect_recog_widen_shift_pattern(stmt_vec_info last_stmt_info,tree * type_out)1955 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1956 {
1957 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1958 WIDEN_LSHIFT_EXPR, true,
1959 "vect_recog_widen_shift_pattern");
1960 }
1961
1962 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1963
1964 type a_t, b_t, c_t;
1965
1966 S0 a_t = b_t r<< c_t;
1967
1968 Input/Output:
1969
1970 * STMT_VINFO: The stmt from which the pattern search begins,
1971 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1972 with a sequence:
1973
1974 S1 d_t = -c_t;
1975 S2 e_t = d_t & (B - 1);
1976 S3 f_t = b_t << c_t;
1977 S4 g_t = b_t >> e_t;
1978 S0 a_t = f_t | g_t;
1979
1980 where B is element bitsize of type.
1981
1982 Output:
1983
1984 * TYPE_OUT: The type of the output of this pattern.
1985
1986 * Return value: A new stmt that will be used to replace the rotate
1987 S0 stmt. */
1988
1989 static gimple *
vect_recog_rotate_pattern(stmt_vec_info stmt_vinfo,tree * type_out)1990 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1991 {
1992 gimple *last_stmt = stmt_vinfo->stmt;
1993 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1994 gimple *pattern_stmt, *def_stmt;
1995 enum tree_code rhs_code;
1996 vec_info *vinfo = stmt_vinfo->vinfo;
1997 enum vect_def_type dt;
1998 optab optab1, optab2;
1999 edge ext_def = NULL;
2000
2001 if (!is_gimple_assign (last_stmt))
2002 return NULL;
2003
2004 rhs_code = gimple_assign_rhs_code (last_stmt);
2005 switch (rhs_code)
2006 {
2007 case LROTATE_EXPR:
2008 case RROTATE_EXPR:
2009 break;
2010 default:
2011 return NULL;
2012 }
2013
2014 lhs = gimple_assign_lhs (last_stmt);
2015 oprnd0 = gimple_assign_rhs1 (last_stmt);
2016 type = TREE_TYPE (oprnd0);
2017 oprnd1 = gimple_assign_rhs2 (last_stmt);
2018 if (TREE_CODE (oprnd0) != SSA_NAME
2019 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2020 || !INTEGRAL_TYPE_P (type)
2021 || !TYPE_UNSIGNED (type))
2022 return NULL;
2023
2024 stmt_vec_info def_stmt_info;
2025 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2026 return NULL;
2027
2028 if (dt != vect_internal_def
2029 && dt != vect_constant_def
2030 && dt != vect_external_def)
2031 return NULL;
2032
2033 vectype = get_vectype_for_scalar_type (type);
2034 if (vectype == NULL_TREE)
2035 return NULL;
2036
2037 /* If vector/vector or vector/scalar rotate is supported by the target,
2038 don't do anything here. */
2039 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2040 if (optab1
2041 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2042 return NULL;
2043
2044 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2045 {
2046 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2047 if (optab2
2048 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2049 return NULL;
2050 }
2051
2052 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2053 don't do anything here either. */
2054 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2055 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2056 if (!optab1
2057 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2058 || !optab2
2059 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2060 {
2061 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2062 return NULL;
2063 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2064 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2065 if (!optab1
2066 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2067 || !optab2
2068 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2069 return NULL;
2070 }
2071
2072 *type_out = vectype;
2073
2074 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2075 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2076
2077 def = NULL_TREE;
2078 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2079 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2080 def = oprnd1;
2081 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2082 {
2083 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2084 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2085 && TYPE_PRECISION (TREE_TYPE (rhs1))
2086 == TYPE_PRECISION (type))
2087 def = rhs1;
2088 }
2089
2090 if (def == NULL_TREE)
2091 {
2092 def = vect_recog_temp_ssa_var (type, NULL);
2093 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2094 append_pattern_def_seq (stmt_vinfo, def_stmt);
2095 }
2096 stype = TREE_TYPE (def);
2097
2098 if (TREE_CODE (def) == INTEGER_CST)
2099 {
2100 if (!tree_fits_uhwi_p (def)
2101 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2102 || integer_zerop (def))
2103 return NULL;
2104 def2 = build_int_cst (stype,
2105 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2106 }
2107 else
2108 {
2109 tree vecstype = get_vectype_for_scalar_type (stype);
2110
2111 if (vecstype == NULL_TREE)
2112 return NULL;
2113 def2 = vect_recog_temp_ssa_var (stype, NULL);
2114 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2115 if (ext_def)
2116 {
2117 basic_block new_bb
2118 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2119 gcc_assert (!new_bb);
2120 }
2121 else
2122 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2123
2124 def2 = vect_recog_temp_ssa_var (stype, NULL);
2125 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2126 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2127 gimple_assign_lhs (def_stmt), mask);
2128 if (ext_def)
2129 {
2130 basic_block new_bb
2131 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2132 gcc_assert (!new_bb);
2133 }
2134 else
2135 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2136 }
2137
2138 var1 = vect_recog_temp_ssa_var (type, NULL);
2139 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2140 ? LSHIFT_EXPR : RSHIFT_EXPR,
2141 oprnd0, def);
2142 append_pattern_def_seq (stmt_vinfo, def_stmt);
2143
2144 var2 = vect_recog_temp_ssa_var (type, NULL);
2145 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2146 ? RSHIFT_EXPR : LSHIFT_EXPR,
2147 oprnd0, def2);
2148 append_pattern_def_seq (stmt_vinfo, def_stmt);
2149
2150 /* Pattern detected. */
2151 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2152
2153 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2154 var = vect_recog_temp_ssa_var (type, NULL);
2155 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2156
2157 return pattern_stmt;
2158 }
2159
2160 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2161 vectorized:
2162
2163 type a_t;
2164 TYPE b_T, res_T;
2165
2166 S1 a_t = ;
2167 S2 b_T = ;
2168 S3 res_T = b_T op a_t;
2169
2170 where type 'TYPE' is a type with different size than 'type',
2171 and op is <<, >> or rotate.
2172
2173 Also detect cases:
2174
2175 type a_t;
2176 TYPE b_T, c_T, res_T;
2177
2178 S0 c_T = ;
2179 S1 a_t = (type) c_T;
2180 S2 b_T = ;
2181 S3 res_T = b_T op a_t;
2182
2183 Input/Output:
2184
2185 * STMT_VINFO: The stmt from which the pattern search begins,
2186 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2187 with a shift/rotate which has same type on both operands, in the
2188 second case just b_T op c_T, in the first case with added cast
2189 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2190
2191 Output:
2192
2193 * TYPE_OUT: The type of the output of this pattern.
2194
2195 * Return value: A new stmt that will be used to replace the shift/rotate
2196 S3 stmt. */
2197
2198 static gimple *
vect_recog_vector_vector_shift_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2199 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2200 tree *type_out)
2201 {
2202 gimple *last_stmt = stmt_vinfo->stmt;
2203 tree oprnd0, oprnd1, lhs, var;
2204 gimple *pattern_stmt;
2205 enum tree_code rhs_code;
2206 vec_info *vinfo = stmt_vinfo->vinfo;
2207
2208 if (!is_gimple_assign (last_stmt))
2209 return NULL;
2210
2211 rhs_code = gimple_assign_rhs_code (last_stmt);
2212 switch (rhs_code)
2213 {
2214 case LSHIFT_EXPR:
2215 case RSHIFT_EXPR:
2216 case LROTATE_EXPR:
2217 case RROTATE_EXPR:
2218 break;
2219 default:
2220 return NULL;
2221 }
2222
2223 lhs = gimple_assign_lhs (last_stmt);
2224 oprnd0 = gimple_assign_rhs1 (last_stmt);
2225 oprnd1 = gimple_assign_rhs2 (last_stmt);
2226 if (TREE_CODE (oprnd0) != SSA_NAME
2227 || TREE_CODE (oprnd1) != SSA_NAME
2228 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2229 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2230 || TYPE_PRECISION (TREE_TYPE (lhs))
2231 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2232 return NULL;
2233
2234 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2235 if (!def_vinfo)
2236 return NULL;
2237
2238 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2239 if (*type_out == NULL_TREE)
2240 return NULL;
2241
2242 tree def = NULL_TREE;
2243 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2244 if (def_stmt && gimple_assign_cast_p (def_stmt))
2245 {
2246 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2247 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2248 && TYPE_PRECISION (TREE_TYPE (rhs1))
2249 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2250 {
2251 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2252 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2253 def = rhs1;
2254 else
2255 {
2256 tree mask
2257 = build_low_bits_mask (TREE_TYPE (rhs1),
2258 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2259 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2260 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2261 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2262 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2263 }
2264 }
2265 }
2266
2267 if (def == NULL_TREE)
2268 {
2269 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2270 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2271 append_pattern_def_seq (stmt_vinfo, def_stmt);
2272 }
2273
2274 /* Pattern detected. */
2275 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2276
2277 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2278 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2279 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2280
2281 return pattern_stmt;
2282 }
2283
2284 /* Return true iff the target has a vector optab implementing the operation
2285 CODE on type VECTYPE. */
2286
2287 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2288 target_has_vecop_for_code (tree_code code, tree vectype)
2289 {
2290 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2291 return voptab
2292 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2293 }
2294
2295 /* Verify that the target has optabs of VECTYPE to perform all the steps
2296 needed by the multiplication-by-immediate synthesis algorithm described by
2297 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2298 present. Return true iff the target supports all the steps. */
2299
2300 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2301 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2302 tree vectype, bool synth_shift_p)
2303 {
2304 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2305 return false;
2306
2307 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2308 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2309
2310 if (var == negate_variant
2311 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2312 return false;
2313
2314 /* If we must synthesize shifts with additions make sure that vector
2315 addition is available. */
2316 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2317 return false;
2318
2319 for (int i = 1; i < alg->ops; i++)
2320 {
2321 switch (alg->op[i])
2322 {
2323 case alg_shift:
2324 break;
2325 case alg_add_t_m2:
2326 case alg_add_t2_m:
2327 case alg_add_factor:
2328 if (!supports_vplus)
2329 return false;
2330 break;
2331 case alg_sub_t_m2:
2332 case alg_sub_t2_m:
2333 case alg_sub_factor:
2334 if (!supports_vminus)
2335 return false;
2336 break;
2337 case alg_unknown:
2338 case alg_m:
2339 case alg_zero:
2340 case alg_impossible:
2341 return false;
2342 default:
2343 gcc_unreachable ();
2344 }
2345 }
2346
2347 return true;
2348 }
2349
2350 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2351 putting the final result in DEST. Append all statements but the last into
2352 VINFO. Return the last statement. */
2353
2354 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2355 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2356 stmt_vec_info vinfo)
2357 {
2358 HOST_WIDE_INT i;
2359 tree itype = TREE_TYPE (op);
2360 tree prev_res = op;
2361 gcc_assert (amnt >= 0);
2362 for (i = 0; i < amnt; i++)
2363 {
2364 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2365 : dest;
2366 gimple *stmt
2367 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2368 prev_res = tmp_var;
2369 if (i < amnt - 1)
2370 append_pattern_def_seq (vinfo, stmt);
2371 else
2372 return stmt;
2373 }
2374 gcc_unreachable ();
2375 return NULL;
2376 }
2377
2378 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2379 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2380 the process if necessary. Append the resulting assignment statements
2381 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2382 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2383 left shifts using additions. */
2384
2385 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2386 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2387 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2388 {
2389 if (integer_zerop (op2)
2390 && (code == LSHIFT_EXPR
2391 || code == PLUS_EXPR))
2392 {
2393 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2394 return op1;
2395 }
2396
2397 gimple *stmt;
2398 tree itype = TREE_TYPE (op1);
2399 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2400
2401 if (code == LSHIFT_EXPR
2402 && synth_shift_p)
2403 {
2404 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2405 stmt_vinfo);
2406 append_pattern_def_seq (stmt_vinfo, stmt);
2407 return tmp_var;
2408 }
2409
2410 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2411 append_pattern_def_seq (stmt_vinfo, stmt);
2412 return tmp_var;
2413 }
2414
2415 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2416 and simple arithmetic operations to be vectorized. Record the statements
2417 produced in STMT_VINFO and return the last statement in the sequence or
2418 NULL if it's not possible to synthesize such a multiplication.
2419 This function mirrors the behavior of expand_mult_const in expmed.c but
2420 works on tree-ssa form. */
2421
2422 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2423 vect_synth_mult_by_constant (tree op, tree val,
2424 stmt_vec_info stmt_vinfo)
2425 {
2426 tree itype = TREE_TYPE (op);
2427 machine_mode mode = TYPE_MODE (itype);
2428 struct algorithm alg;
2429 mult_variant variant;
2430 if (!tree_fits_shwi_p (val))
2431 return NULL;
2432
2433 /* Multiplication synthesis by shifts, adds and subs can introduce
2434 signed overflow where the original operation didn't. Perform the
2435 operations on an unsigned type and cast back to avoid this.
2436 In the future we may want to relax this for synthesis algorithms
2437 that we can prove do not cause unexpected overflow. */
2438 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2439
2440 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2441
2442 /* Targets that don't support vector shifts but support vector additions
2443 can synthesize shifts that way. */
2444 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2445
2446 HOST_WIDE_INT hwval = tree_to_shwi (val);
2447 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2448 The vectorizer's benefit analysis will decide whether it's beneficial
2449 to do this. */
2450 bool possible = choose_mult_variant (mode, hwval, &alg,
2451 &variant, MAX_COST);
2452 if (!possible)
2453 return NULL;
2454
2455 tree vectype = get_vectype_for_scalar_type (multtype);
2456
2457 if (!vectype
2458 || !target_supports_mult_synth_alg (&alg, variant,
2459 vectype, synth_shift_p))
2460 return NULL;
2461
2462 tree accumulator;
2463
2464 /* Clear out the sequence of statements so we can populate it below. */
2465 gimple *stmt = NULL;
2466
2467 if (cast_to_unsigned_p)
2468 {
2469 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2470 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2471 append_pattern_def_seq (stmt_vinfo, stmt);
2472 op = tmp_op;
2473 }
2474
2475 if (alg.op[0] == alg_zero)
2476 accumulator = build_int_cst (multtype, 0);
2477 else
2478 accumulator = op;
2479
2480 bool needs_fixup = (variant == negate_variant)
2481 || (variant == add_variant);
2482
2483 for (int i = 1; i < alg.ops; i++)
2484 {
2485 tree shft_log = build_int_cst (multtype, alg.log[i]);
2486 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2487 tree tmp_var = NULL_TREE;
2488
2489 switch (alg.op[i])
2490 {
2491 case alg_shift:
2492 if (synth_shift_p)
2493 stmt
2494 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2495 stmt_vinfo);
2496 else
2497 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2498 shft_log);
2499 break;
2500 case alg_add_t_m2:
2501 tmp_var
2502 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2503 stmt_vinfo, synth_shift_p);
2504 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2505 tmp_var);
2506 break;
2507 case alg_sub_t_m2:
2508 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2509 shft_log, stmt_vinfo,
2510 synth_shift_p);
2511 /* In some algorithms the first step involves zeroing the
2512 accumulator. If subtracting from such an accumulator
2513 just emit the negation directly. */
2514 if (integer_zerop (accumulator))
2515 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2516 else
2517 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2518 tmp_var);
2519 break;
2520 case alg_add_t2_m:
2521 tmp_var
2522 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2523 stmt_vinfo, synth_shift_p);
2524 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2525 break;
2526 case alg_sub_t2_m:
2527 tmp_var
2528 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2529 stmt_vinfo, synth_shift_p);
2530 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2531 break;
2532 case alg_add_factor:
2533 tmp_var
2534 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2535 stmt_vinfo, synth_shift_p);
2536 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2537 tmp_var);
2538 break;
2539 case alg_sub_factor:
2540 tmp_var
2541 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2542 stmt_vinfo, synth_shift_p);
2543 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2544 accumulator);
2545 break;
2546 default:
2547 gcc_unreachable ();
2548 }
2549 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2550 but rather return it directly. */
2551
2552 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2553 append_pattern_def_seq (stmt_vinfo, stmt);
2554 accumulator = accum_tmp;
2555 }
2556 if (variant == negate_variant)
2557 {
2558 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2559 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2560 accumulator = accum_tmp;
2561 if (cast_to_unsigned_p)
2562 append_pattern_def_seq (stmt_vinfo, stmt);
2563 }
2564 else if (variant == add_variant)
2565 {
2566 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2567 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2568 accumulator = accum_tmp;
2569 if (cast_to_unsigned_p)
2570 append_pattern_def_seq (stmt_vinfo, stmt);
2571 }
2572 /* Move back to a signed if needed. */
2573 if (cast_to_unsigned_p)
2574 {
2575 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2576 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2577 }
2578
2579 return stmt;
2580 }
2581
2582 /* Detect multiplication by constant and convert it into a sequence of
2583 shifts and additions, subtractions, negations. We reuse the
2584 choose_mult_variant algorithms from expmed.c
2585
2586 Input/Output:
2587
2588 STMT_VINFO: The stmt from which the pattern search begins,
2589 i.e. the mult stmt.
2590
2591 Output:
2592
2593 * TYPE_OUT: The type of the output of this pattern.
2594
2595 * Return value: A new stmt that will be used to replace
2596 the multiplication. */
2597
2598 static gimple *
vect_recog_mult_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2599 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2600 {
2601 gimple *last_stmt = stmt_vinfo->stmt;
2602 tree oprnd0, oprnd1, vectype, itype;
2603 gimple *pattern_stmt;
2604
2605 if (!is_gimple_assign (last_stmt))
2606 return NULL;
2607
2608 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2609 return NULL;
2610
2611 oprnd0 = gimple_assign_rhs1 (last_stmt);
2612 oprnd1 = gimple_assign_rhs2 (last_stmt);
2613 itype = TREE_TYPE (oprnd0);
2614
2615 if (TREE_CODE (oprnd0) != SSA_NAME
2616 || TREE_CODE (oprnd1) != INTEGER_CST
2617 || !INTEGRAL_TYPE_P (itype)
2618 || !type_has_mode_precision_p (itype))
2619 return NULL;
2620
2621 vectype = get_vectype_for_scalar_type (itype);
2622 if (vectype == NULL_TREE)
2623 return NULL;
2624
2625 /* If the target can handle vectorized multiplication natively,
2626 don't attempt to optimize this. */
2627 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2628 if (mul_optab != unknown_optab)
2629 {
2630 machine_mode vec_mode = TYPE_MODE (vectype);
2631 int icode = (int) optab_handler (mul_optab, vec_mode);
2632 if (icode != CODE_FOR_nothing)
2633 return NULL;
2634 }
2635
2636 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2637 if (!pattern_stmt)
2638 return NULL;
2639
2640 /* Pattern detected. */
2641 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2642
2643 *type_out = vectype;
2644
2645 return pattern_stmt;
2646 }
2647
2648 /* Detect a signed division by a constant that wouldn't be
2649 otherwise vectorized:
2650
2651 type a_t, b_t;
2652
2653 S1 a_t = b_t / N;
2654
2655 where type 'type' is an integral type and N is a constant.
2656
2657 Similarly handle modulo by a constant:
2658
2659 S4 a_t = b_t % N;
2660
2661 Input/Output:
2662
2663 * STMT_VINFO: The stmt from which the pattern search begins,
2664 i.e. the division stmt. S1 is replaced by if N is a power
2665 of two constant and type is signed:
2666 S3 y_t = b_t < 0 ? N - 1 : 0;
2667 S2 x_t = b_t + y_t;
2668 S1' a_t = x_t >> log2 (N);
2669
2670 S4 is replaced if N is a power of two constant and
2671 type is signed by (where *_T temporaries have unsigned type):
2672 S9 y_T = b_t < 0 ? -1U : 0U;
2673 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2674 S7 z_t = (type) z_T;
2675 S6 w_t = b_t + z_t;
2676 S5 x_t = w_t & (N - 1);
2677 S4' a_t = x_t - z_t;
2678
2679 Output:
2680
2681 * TYPE_OUT: The type of the output of this pattern.
2682
2683 * Return value: A new stmt that will be used to replace the division
2684 S1 or modulo S4 stmt. */
2685
2686 static gimple *
vect_recog_divmod_pattern(stmt_vec_info stmt_vinfo,tree * type_out)2687 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2688 {
2689 gimple *last_stmt = stmt_vinfo->stmt;
2690 tree oprnd0, oprnd1, vectype, itype, cond;
2691 gimple *pattern_stmt, *def_stmt;
2692 enum tree_code rhs_code;
2693 optab optab;
2694 tree q;
2695 int dummy_int, prec;
2696
2697 if (!is_gimple_assign (last_stmt))
2698 return NULL;
2699
2700 rhs_code = gimple_assign_rhs_code (last_stmt);
2701 switch (rhs_code)
2702 {
2703 case TRUNC_DIV_EXPR:
2704 case EXACT_DIV_EXPR:
2705 case TRUNC_MOD_EXPR:
2706 break;
2707 default:
2708 return NULL;
2709 }
2710
2711 oprnd0 = gimple_assign_rhs1 (last_stmt);
2712 oprnd1 = gimple_assign_rhs2 (last_stmt);
2713 itype = TREE_TYPE (oprnd0);
2714 if (TREE_CODE (oprnd0) != SSA_NAME
2715 || TREE_CODE (oprnd1) != INTEGER_CST
2716 || TREE_CODE (itype) != INTEGER_TYPE
2717 || !type_has_mode_precision_p (itype))
2718 return NULL;
2719
2720 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2721 vectype = get_vectype_for_scalar_type (itype);
2722 if (vectype == NULL_TREE)
2723 return NULL;
2724
2725 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2726 {
2727 /* If the target can handle vectorized division or modulo natively,
2728 don't attempt to optimize this, since native division is likely
2729 to give smaller code. */
2730 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2731 if (optab != unknown_optab)
2732 {
2733 machine_mode vec_mode = TYPE_MODE (vectype);
2734 int icode = (int) optab_handler (optab, vec_mode);
2735 if (icode != CODE_FOR_nothing)
2736 return NULL;
2737 }
2738 }
2739
2740 prec = TYPE_PRECISION (itype);
2741 if (integer_pow2p (oprnd1))
2742 {
2743 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2744 return NULL;
2745
2746 /* Pattern detected. */
2747 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2748
2749 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2750 build_int_cst (itype, 0));
2751 if (rhs_code == TRUNC_DIV_EXPR
2752 || rhs_code == EXACT_DIV_EXPR)
2753 {
2754 tree var = vect_recog_temp_ssa_var (itype, NULL);
2755 tree shift;
2756 def_stmt
2757 = gimple_build_assign (var, COND_EXPR, cond,
2758 fold_build2 (MINUS_EXPR, itype, oprnd1,
2759 build_int_cst (itype, 1)),
2760 build_int_cst (itype, 0));
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 var = vect_recog_temp_ssa_var (itype, NULL);
2763 def_stmt
2764 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2765 gimple_assign_lhs (def_stmt));
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767
2768 shift = build_int_cst (itype, tree_log2 (oprnd1));
2769 pattern_stmt
2770 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2771 RSHIFT_EXPR, var, shift);
2772 }
2773 else
2774 {
2775 tree signmask;
2776 if (compare_tree_int (oprnd1, 2) == 0)
2777 {
2778 signmask = vect_recog_temp_ssa_var (itype, NULL);
2779 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2780 build_int_cst (itype, 1),
2781 build_int_cst (itype, 0));
2782 append_pattern_def_seq (stmt_vinfo, def_stmt);
2783 }
2784 else
2785 {
2786 tree utype
2787 = build_nonstandard_integer_type (prec, 1);
2788 tree vecutype = get_vectype_for_scalar_type (utype);
2789 tree shift
2790 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2791 - tree_log2 (oprnd1));
2792 tree var = vect_recog_temp_ssa_var (utype, NULL);
2793
2794 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2795 build_int_cst (utype, -1),
2796 build_int_cst (utype, 0));
2797 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2798 var = vect_recog_temp_ssa_var (utype, NULL);
2799 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2800 gimple_assign_lhs (def_stmt),
2801 shift);
2802 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2803 signmask = vect_recog_temp_ssa_var (itype, NULL);
2804 def_stmt
2805 = gimple_build_assign (signmask, NOP_EXPR, var);
2806 append_pattern_def_seq (stmt_vinfo, def_stmt);
2807 }
2808 def_stmt
2809 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2810 PLUS_EXPR, oprnd0, signmask);
2811 append_pattern_def_seq (stmt_vinfo, def_stmt);
2812 def_stmt
2813 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2814 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2815 fold_build2 (MINUS_EXPR, itype, oprnd1,
2816 build_int_cst (itype, 1)));
2817 append_pattern_def_seq (stmt_vinfo, def_stmt);
2818
2819 pattern_stmt
2820 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2821 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2822 signmask);
2823 }
2824
2825 *type_out = vectype;
2826 return pattern_stmt;
2827 }
2828
2829 if (prec > HOST_BITS_PER_WIDE_INT
2830 || integer_zerop (oprnd1))
2831 return NULL;
2832
2833 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2834 return NULL;
2835
2836 if (TYPE_UNSIGNED (itype))
2837 {
2838 unsigned HOST_WIDE_INT mh, ml;
2839 int pre_shift, post_shift;
2840 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2841 & GET_MODE_MASK (itype_mode));
2842 tree t1, t2, t3, t4;
2843
2844 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2845 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2846 return NULL;
2847
2848 /* Find a suitable multiplier and right shift count
2849 instead of multiplying with D. */
2850 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2851
2852 /* If the suggested multiplier is more than SIZE bits, we can do better
2853 for even divisors, using an initial right shift. */
2854 if (mh != 0 && (d & 1) == 0)
2855 {
2856 pre_shift = ctz_or_zero (d);
2857 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2858 &ml, &post_shift, &dummy_int);
2859 gcc_assert (!mh);
2860 }
2861 else
2862 pre_shift = 0;
2863
2864 if (mh != 0)
2865 {
2866 if (post_shift - 1 >= prec)
2867 return NULL;
2868
2869 /* t1 = oprnd0 h* ml;
2870 t2 = oprnd0 - t1;
2871 t3 = t2 >> 1;
2872 t4 = t1 + t3;
2873 q = t4 >> (post_shift - 1); */
2874 t1 = vect_recog_temp_ssa_var (itype, NULL);
2875 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2876 build_int_cst (itype, ml));
2877 append_pattern_def_seq (stmt_vinfo, def_stmt);
2878
2879 t2 = vect_recog_temp_ssa_var (itype, NULL);
2880 def_stmt
2881 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2882 append_pattern_def_seq (stmt_vinfo, def_stmt);
2883
2884 t3 = vect_recog_temp_ssa_var (itype, NULL);
2885 def_stmt
2886 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2887 append_pattern_def_seq (stmt_vinfo, def_stmt);
2888
2889 t4 = vect_recog_temp_ssa_var (itype, NULL);
2890 def_stmt
2891 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2892
2893 if (post_shift != 1)
2894 {
2895 append_pattern_def_seq (stmt_vinfo, def_stmt);
2896
2897 q = vect_recog_temp_ssa_var (itype, NULL);
2898 pattern_stmt
2899 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2900 build_int_cst (itype, post_shift - 1));
2901 }
2902 else
2903 {
2904 q = t4;
2905 pattern_stmt = def_stmt;
2906 }
2907 }
2908 else
2909 {
2910 if (pre_shift >= prec || post_shift >= prec)
2911 return NULL;
2912
2913 /* t1 = oprnd0 >> pre_shift;
2914 t2 = t1 h* ml;
2915 q = t2 >> post_shift; */
2916 if (pre_shift)
2917 {
2918 t1 = vect_recog_temp_ssa_var (itype, NULL);
2919 def_stmt
2920 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2921 build_int_cst (NULL, pre_shift));
2922 append_pattern_def_seq (stmt_vinfo, def_stmt);
2923 }
2924 else
2925 t1 = oprnd0;
2926
2927 t2 = vect_recog_temp_ssa_var (itype, NULL);
2928 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2929 build_int_cst (itype, ml));
2930
2931 if (post_shift)
2932 {
2933 append_pattern_def_seq (stmt_vinfo, def_stmt);
2934
2935 q = vect_recog_temp_ssa_var (itype, NULL);
2936 def_stmt
2937 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2938 build_int_cst (itype, post_shift));
2939 }
2940 else
2941 q = t2;
2942
2943 pattern_stmt = def_stmt;
2944 }
2945 }
2946 else
2947 {
2948 unsigned HOST_WIDE_INT ml;
2949 int post_shift;
2950 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2951 unsigned HOST_WIDE_INT abs_d;
2952 bool add = false;
2953 tree t1, t2, t3, t4;
2954
2955 /* Give up for -1. */
2956 if (d == -1)
2957 return NULL;
2958
2959 /* Since d might be INT_MIN, we have to cast to
2960 unsigned HOST_WIDE_INT before negating to avoid
2961 undefined signed overflow. */
2962 abs_d = (d >= 0
2963 ? (unsigned HOST_WIDE_INT) d
2964 : - (unsigned HOST_WIDE_INT) d);
2965
2966 /* n rem d = n rem -d */
2967 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2968 {
2969 d = abs_d;
2970 oprnd1 = build_int_cst (itype, abs_d);
2971 }
2972 if (HOST_BITS_PER_WIDE_INT >= prec
2973 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2974 /* This case is not handled correctly below. */
2975 return NULL;
2976
2977 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2978 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2979 {
2980 add = true;
2981 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2982 }
2983 if (post_shift >= prec)
2984 return NULL;
2985
2986 /* t1 = oprnd0 h* ml; */
2987 t1 = vect_recog_temp_ssa_var (itype, NULL);
2988 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2989 build_int_cst (itype, ml));
2990
2991 if (add)
2992 {
2993 /* t2 = t1 + oprnd0; */
2994 append_pattern_def_seq (stmt_vinfo, def_stmt);
2995 t2 = vect_recog_temp_ssa_var (itype, NULL);
2996 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2997 }
2998 else
2999 t2 = t1;
3000
3001 if (post_shift)
3002 {
3003 /* t3 = t2 >> post_shift; */
3004 append_pattern_def_seq (stmt_vinfo, def_stmt);
3005 t3 = vect_recog_temp_ssa_var (itype, NULL);
3006 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3007 build_int_cst (itype, post_shift));
3008 }
3009 else
3010 t3 = t2;
3011
3012 wide_int oprnd0_min, oprnd0_max;
3013 int msb = 1;
3014 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3015 {
3016 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3017 msb = 0;
3018 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3019 msb = -1;
3020 }
3021
3022 if (msb == 0 && d >= 0)
3023 {
3024 /* q = t3; */
3025 q = t3;
3026 pattern_stmt = def_stmt;
3027 }
3028 else
3029 {
3030 /* t4 = oprnd0 >> (prec - 1);
3031 or if we know from VRP that oprnd0 >= 0
3032 t4 = 0;
3033 or if we know from VRP that oprnd0 < 0
3034 t4 = -1; */
3035 append_pattern_def_seq (stmt_vinfo, def_stmt);
3036 t4 = vect_recog_temp_ssa_var (itype, NULL);
3037 if (msb != 1)
3038 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3039 build_int_cst (itype, msb));
3040 else
3041 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3042 build_int_cst (itype, prec - 1));
3043 append_pattern_def_seq (stmt_vinfo, def_stmt);
3044
3045 /* q = t3 - t4; or q = t4 - t3; */
3046 q = vect_recog_temp_ssa_var (itype, NULL);
3047 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3048 d < 0 ? t3 : t4);
3049 }
3050 }
3051
3052 if (rhs_code == TRUNC_MOD_EXPR)
3053 {
3054 tree r, t1;
3055
3056 /* We divided. Now finish by:
3057 t1 = q * oprnd1;
3058 r = oprnd0 - t1; */
3059 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3060
3061 t1 = vect_recog_temp_ssa_var (itype, NULL);
3062 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3063 append_pattern_def_seq (stmt_vinfo, def_stmt);
3064
3065 r = vect_recog_temp_ssa_var (itype, NULL);
3066 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3067 }
3068
3069 /* Pattern detected. */
3070 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3071
3072 *type_out = vectype;
3073 return pattern_stmt;
3074 }
3075
3076 /* Function vect_recog_mixed_size_cond_pattern
3077
3078 Try to find the following pattern:
3079
3080 type x_t, y_t;
3081 TYPE a_T, b_T, c_T;
3082 loop:
3083 S1 a_T = x_t CMP y_t ? b_T : c_T;
3084
3085 where type 'TYPE' is an integral type which has different size
3086 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3087 than 'type', the constants need to fit into an integer type
3088 with the same width as 'type') or results of conversion from 'type'.
3089
3090 Input:
3091
3092 * STMT_VINFO: The stmt from which the pattern search begins.
3093
3094 Output:
3095
3096 * TYPE_OUT: The type of the output of this pattern.
3097
3098 * Return value: A new stmt that will be used to replace the pattern.
3099 Additionally a def_stmt is added.
3100
3101 a_it = x_t CMP y_t ? b_it : c_it;
3102 a_T = (TYPE) a_it; */
3103
3104 static gimple *
vect_recog_mixed_size_cond_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3105 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3106 {
3107 gimple *last_stmt = stmt_vinfo->stmt;
3108 tree cond_expr, then_clause, else_clause;
3109 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3110 gimple *pattern_stmt, *def_stmt;
3111 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3112 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3113 bool promotion;
3114 tree comp_scalar_type;
3115
3116 if (!is_gimple_assign (last_stmt)
3117 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3118 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3119 return NULL;
3120
3121 cond_expr = gimple_assign_rhs1 (last_stmt);
3122 then_clause = gimple_assign_rhs2 (last_stmt);
3123 else_clause = gimple_assign_rhs3 (last_stmt);
3124
3125 if (!COMPARISON_CLASS_P (cond_expr))
3126 return NULL;
3127
3128 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3129 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3130 if (comp_vectype == NULL_TREE)
3131 return NULL;
3132
3133 type = gimple_expr_type (last_stmt);
3134 if (types_compatible_p (type, comp_scalar_type)
3135 || ((TREE_CODE (then_clause) != INTEGER_CST
3136 || TREE_CODE (else_clause) != INTEGER_CST)
3137 && !INTEGRAL_TYPE_P (comp_scalar_type))
3138 || !INTEGRAL_TYPE_P (type))
3139 return NULL;
3140
3141 if ((TREE_CODE (then_clause) != INTEGER_CST
3142 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3143 &def_stmt0, &promotion))
3144 || (TREE_CODE (else_clause) != INTEGER_CST
3145 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3146 &def_stmt1, &promotion)))
3147 return NULL;
3148
3149 if (orig_type0 && orig_type1
3150 && !types_compatible_p (orig_type0, orig_type1))
3151 return NULL;
3152
3153 if (orig_type0)
3154 {
3155 if (!types_compatible_p (orig_type0, comp_scalar_type))
3156 return NULL;
3157 then_clause = gimple_assign_rhs1 (def_stmt0);
3158 itype = orig_type0;
3159 }
3160
3161 if (orig_type1)
3162 {
3163 if (!types_compatible_p (orig_type1, comp_scalar_type))
3164 return NULL;
3165 else_clause = gimple_assign_rhs1 (def_stmt1);
3166 itype = orig_type1;
3167 }
3168
3169
3170 HOST_WIDE_INT cmp_mode_size
3171 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3172
3173 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3174 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3175 return NULL;
3176
3177 vectype = get_vectype_for_scalar_type (type);
3178 if (vectype == NULL_TREE)
3179 return NULL;
3180
3181 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3182 return NULL;
3183
3184 if (itype == NULL_TREE)
3185 itype = build_nonstandard_integer_type (cmp_mode_size,
3186 TYPE_UNSIGNED (type));
3187
3188 if (itype == NULL_TREE
3189 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3190 return NULL;
3191
3192 vecitype = get_vectype_for_scalar_type (itype);
3193 if (vecitype == NULL_TREE)
3194 return NULL;
3195
3196 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3197 return NULL;
3198
3199 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3200 {
3201 if ((TREE_CODE (then_clause) == INTEGER_CST
3202 && !int_fits_type_p (then_clause, itype))
3203 || (TREE_CODE (else_clause) == INTEGER_CST
3204 && !int_fits_type_p (else_clause, itype)))
3205 return NULL;
3206 }
3207
3208 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3209 COND_EXPR, unshare_expr (cond_expr),
3210 fold_convert (itype, then_clause),
3211 fold_convert (itype, else_clause));
3212 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3213 NOP_EXPR, gimple_assign_lhs (def_stmt));
3214
3215 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3216 *type_out = vectype;
3217
3218 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3219
3220 return pattern_stmt;
3221 }
3222
3223
3224 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3225 true if bool VAR can and should be optimized that way. Assume it shouldn't
3226 in case it's a result of a comparison which can be directly vectorized into
3227 a vector comparison. Fills in STMTS with all stmts visited during the
3228 walk. */
3229
3230 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3231 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3232 {
3233 tree rhs1;
3234 enum tree_code rhs_code;
3235
3236 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3237 if (!def_stmt_info)
3238 return false;
3239
3240 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3241 if (!def_stmt)
3242 return false;
3243
3244 if (stmts.contains (def_stmt))
3245 return true;
3246
3247 rhs1 = gimple_assign_rhs1 (def_stmt);
3248 rhs_code = gimple_assign_rhs_code (def_stmt);
3249 switch (rhs_code)
3250 {
3251 case SSA_NAME:
3252 if (! check_bool_pattern (rhs1, vinfo, stmts))
3253 return false;
3254 break;
3255
3256 CASE_CONVERT:
3257 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3258 return false;
3259 if (! check_bool_pattern (rhs1, vinfo, stmts))
3260 return false;
3261 break;
3262
3263 case BIT_NOT_EXPR:
3264 if (! check_bool_pattern (rhs1, vinfo, stmts))
3265 return false;
3266 break;
3267
3268 case BIT_AND_EXPR:
3269 case BIT_IOR_EXPR:
3270 case BIT_XOR_EXPR:
3271 if (! check_bool_pattern (rhs1, vinfo, stmts)
3272 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3273 return false;
3274 break;
3275
3276 default:
3277 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3278 {
3279 tree vecitype, comp_vectype;
3280
3281 /* If the comparison can throw, then is_gimple_condexpr will be
3282 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3283 if (stmt_could_throw_p (cfun, def_stmt))
3284 return false;
3285
3286 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3287 if (comp_vectype == NULL_TREE)
3288 return false;
3289
3290 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3291 if (mask_type
3292 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3293 return false;
3294
3295 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3296 {
3297 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3298 tree itype
3299 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3300 vecitype = get_vectype_for_scalar_type (itype);
3301 if (vecitype == NULL_TREE)
3302 return false;
3303 }
3304 else
3305 vecitype = comp_vectype;
3306 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3307 return false;
3308 }
3309 else
3310 return false;
3311 break;
3312 }
3313
3314 bool res = stmts.add (def_stmt);
3315 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3316 gcc_assert (!res);
3317
3318 return true;
3319 }
3320
3321
3322 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3323 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3324 pattern sequence. */
3325
3326 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3327 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3328 {
3329 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3330 NOP_EXPR, var);
3331 append_pattern_def_seq (stmt_info, cast_stmt,
3332 get_vectype_for_scalar_type (type));
3333 return gimple_assign_lhs (cast_stmt);
3334 }
3335
3336 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3337 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3338 type, OUT_TYPE is the desired final integer type of the whole pattern.
3339 STMT_INFO is the info of the pattern root and is where pattern stmts should
3340 be associated with. DEFS is a map of pattern defs. */
3341
3342 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3343 adjust_bool_pattern (tree var, tree out_type,
3344 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3345 {
3346 gimple *stmt = SSA_NAME_DEF_STMT (var);
3347 enum tree_code rhs_code, def_rhs_code;
3348 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3349 location_t loc;
3350 gimple *pattern_stmt, *def_stmt;
3351 tree trueval = NULL_TREE;
3352
3353 rhs1 = gimple_assign_rhs1 (stmt);
3354 rhs2 = gimple_assign_rhs2 (stmt);
3355 rhs_code = gimple_assign_rhs_code (stmt);
3356 loc = gimple_location (stmt);
3357 switch (rhs_code)
3358 {
3359 case SSA_NAME:
3360 CASE_CONVERT:
3361 irhs1 = *defs.get (rhs1);
3362 itype = TREE_TYPE (irhs1);
3363 pattern_stmt
3364 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3365 SSA_NAME, irhs1);
3366 break;
3367
3368 case BIT_NOT_EXPR:
3369 irhs1 = *defs.get (rhs1);
3370 itype = TREE_TYPE (irhs1);
3371 pattern_stmt
3372 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3373 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3374 break;
3375
3376 case BIT_AND_EXPR:
3377 /* Try to optimize x = y & (a < b ? 1 : 0); into
3378 x = (a < b ? y : 0);
3379
3380 E.g. for:
3381 bool a_b, b_b, c_b;
3382 TYPE d_T;
3383
3384 S1 a_b = x1 CMP1 y1;
3385 S2 b_b = x2 CMP2 y2;
3386 S3 c_b = a_b & b_b;
3387 S4 d_T = (TYPE) c_b;
3388
3389 we would normally emit:
3390
3391 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3392 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3393 S3' c_T = a_T & b_T;
3394 S4' d_T = c_T;
3395
3396 but we can save one stmt by using the
3397 result of one of the COND_EXPRs in the other COND_EXPR and leave
3398 BIT_AND_EXPR stmt out:
3399
3400 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3401 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3402 S4' f_T = c_T;
3403
3404 At least when VEC_COND_EXPR is implemented using masks
3405 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3406 computes the comparison masks and ands it, in one case with
3407 all ones vector, in the other case with a vector register.
3408 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3409 often more expensive. */
3410 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3411 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3413 {
3414 irhs1 = *defs.get (rhs1);
3415 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3417 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3418 {
3419 rhs_code = def_rhs_code;
3420 rhs1 = def_rhs1;
3421 rhs2 = gimple_assign_rhs2 (def_stmt);
3422 trueval = irhs1;
3423 goto do_compare;
3424 }
3425 else
3426 irhs2 = *defs.get (rhs2);
3427 goto and_ior_xor;
3428 }
3429 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3430 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3431 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3432 {
3433 irhs2 = *defs.get (rhs2);
3434 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3435 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3436 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3437 {
3438 rhs_code = def_rhs_code;
3439 rhs1 = def_rhs1;
3440 rhs2 = gimple_assign_rhs2 (def_stmt);
3441 trueval = irhs2;
3442 goto do_compare;
3443 }
3444 else
3445 irhs1 = *defs.get (rhs1);
3446 goto and_ior_xor;
3447 }
3448 /* FALLTHRU */
3449 case BIT_IOR_EXPR:
3450 case BIT_XOR_EXPR:
3451 irhs1 = *defs.get (rhs1);
3452 irhs2 = *defs.get (rhs2);
3453 and_ior_xor:
3454 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3455 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3456 {
3457 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3458 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3459 int out_prec = TYPE_PRECISION (out_type);
3460 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3461 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3462 stmt_info);
3463 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3464 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3465 stmt_info);
3466 else
3467 {
3468 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3469 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3470 }
3471 }
3472 itype = TREE_TYPE (irhs1);
3473 pattern_stmt
3474 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3475 rhs_code, irhs1, irhs2);
3476 break;
3477
3478 default:
3479 do_compare:
3480 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3481 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3482 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3483 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3484 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3485 {
3486 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3487 itype
3488 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3489 }
3490 else
3491 itype = TREE_TYPE (rhs1);
3492 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3493 if (trueval == NULL_TREE)
3494 trueval = build_int_cst (itype, 1);
3495 else
3496 gcc_checking_assert (useless_type_conversion_p (itype,
3497 TREE_TYPE (trueval)));
3498 pattern_stmt
3499 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3500 COND_EXPR, cond_expr, trueval,
3501 build_int_cst (itype, 0));
3502 break;
3503 }
3504
3505 gimple_set_location (pattern_stmt, loc);
3506 append_pattern_def_seq (stmt_info, pattern_stmt,
3507 get_vectype_for_scalar_type (itype));
3508 defs.put (var, gimple_assign_lhs (pattern_stmt));
3509 }
3510
3511 /* Comparison function to qsort a vector of gimple stmts after UID. */
3512
3513 static int
sort_after_uid(const void * p1,const void * p2)3514 sort_after_uid (const void *p1, const void *p2)
3515 {
3516 const gimple *stmt1 = *(const gimple * const *)p1;
3517 const gimple *stmt2 = *(const gimple * const *)p2;
3518 return gimple_uid (stmt1) - gimple_uid (stmt2);
3519 }
3520
3521 /* Create pattern stmts for all stmts participating in the bool pattern
3522 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3523 OUT_TYPE. Return the def of the pattern root. */
3524
3525 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)3526 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3527 tree out_type, stmt_vec_info stmt_info)
3528 {
3529 /* Gather original stmts in the bool pattern in their order of appearance
3530 in the IL. */
3531 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3532 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3533 i != bool_stmt_set.end (); ++i)
3534 bool_stmts.quick_push (*i);
3535 bool_stmts.qsort (sort_after_uid);
3536
3537 /* Now process them in that order, producing pattern stmts. */
3538 hash_map <tree, tree> defs;
3539 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3540 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3541 out_type, stmt_info, defs);
3542
3543 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3544 gimple *pattern_stmt
3545 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3546 return gimple_assign_lhs (pattern_stmt);
3547 }
3548
3549 /* Helper for search_type_for_mask. */
3550
3551 static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3552 search_type_for_mask_1 (tree var, vec_info *vinfo,
3553 hash_map<gimple *, tree> &cache)
3554 {
3555 tree rhs1;
3556 enum tree_code rhs_code;
3557 tree res = NULL_TREE, res2;
3558
3559 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3560 return NULL_TREE;
3561
3562 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3563 if (!def_stmt_info)
3564 return NULL_TREE;
3565
3566 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3567 if (!def_stmt)
3568 return NULL_TREE;
3569
3570 tree *c = cache.get (def_stmt);
3571 if (c)
3572 return *c;
3573
3574 rhs_code = gimple_assign_rhs_code (def_stmt);
3575 rhs1 = gimple_assign_rhs1 (def_stmt);
3576
3577 switch (rhs_code)
3578 {
3579 case SSA_NAME:
3580 case BIT_NOT_EXPR:
3581 CASE_CONVERT:
3582 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583 break;
3584
3585 case BIT_AND_EXPR:
3586 case BIT_IOR_EXPR:
3587 case BIT_XOR_EXPR:
3588 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590 cache);
3591 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592 res = res2;
3593 break;
3594
3595 default:
3596 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597 {
3598 tree comp_vectype, mask_type;
3599
3600 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601 {
3602 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604 vinfo, cache);
3605 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606 res = res2;
3607 break;
3608 }
3609
3610 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611 if (comp_vectype == NULL_TREE)
3612 {
3613 res = NULL_TREE;
3614 break;
3615 }
3616
3617 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618 if (!mask_type
3619 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620 {
3621 res = NULL_TREE;
3622 break;
3623 }
3624
3625 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627 {
3628 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630 }
3631 else
3632 res = TREE_TYPE (rhs1);
3633 }
3634 }
3635
3636 cache.put (def_stmt, res);
3637 return res;
3638 }
3639
3640 /* Return the proper type for converting bool VAR into
3641 an integer value or NULL_TREE if no such type exists.
3642 The type is chosen so that converted value has the
3643 same number of elements as VAR's vector type. */
3644
3645 static tree
search_type_for_mask(tree var,vec_info * vinfo)3646 search_type_for_mask (tree var, vec_info *vinfo)
3647 {
3648 hash_map<gimple *, tree> cache;
3649 return search_type_for_mask_1 (var, vinfo, cache);
3650 }
3651
3652 /* Function vect_recog_bool_pattern
3653
3654 Try to find pattern like following:
3655
3656 bool a_b, b_b, c_b, d_b, e_b;
3657 TYPE f_T;
3658 loop:
3659 S1 a_b = x1 CMP1 y1;
3660 S2 b_b = x2 CMP2 y2;
3661 S3 c_b = a_b & b_b;
3662 S4 d_b = x3 CMP3 y3;
3663 S5 e_b = c_b | d_b;
3664 S6 f_T = (TYPE) e_b;
3665
3666 where type 'TYPE' is an integral type. Or a similar pattern
3667 ending in
3668
3669 S6 f_Y = e_b ? r_Y : s_Y;
3670
3671 as results from if-conversion of a complex condition.
3672
3673 Input:
3674
3675 * STMT_VINFO: The stmt at the end from which the pattern
3676 search begins, i.e. cast of a bool to
3677 an integer type.
3678
3679 Output:
3680
3681 * TYPE_OUT: The type of the output of this pattern.
3682
3683 * Return value: A new stmt that will be used to replace the pattern.
3684
3685 Assuming size of TYPE is the same as size of all comparisons
3686 (otherwise some casts would be added where needed), the above
3687 sequence we create related pattern stmts:
3688 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3689 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3690 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3691 S5' e_T = c_T | d_T;
3692 S6' f_T = e_T;
3693
3694 Instead of the above S3' we could emit:
3695 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3696 S3' c_T = a_T | b_T;
3697 but the above is more efficient. */
3698
3699 static gimple *
vect_recog_bool_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3700 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3701 {
3702 gimple *last_stmt = stmt_vinfo->stmt;
3703 enum tree_code rhs_code;
3704 tree var, lhs, rhs, vectype;
3705 vec_info *vinfo = stmt_vinfo->vinfo;
3706 gimple *pattern_stmt;
3707
3708 if (!is_gimple_assign (last_stmt))
3709 return NULL;
3710
3711 var = gimple_assign_rhs1 (last_stmt);
3712 lhs = gimple_assign_lhs (last_stmt);
3713 rhs_code = gimple_assign_rhs_code (last_stmt);
3714
3715 if (rhs_code == VIEW_CONVERT_EXPR)
3716 var = TREE_OPERAND (var, 0);
3717
3718 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3719 return NULL;
3720
3721 hash_set<gimple *> bool_stmts;
3722
3723 if (CONVERT_EXPR_CODE_P (rhs_code)
3724 || rhs_code == VIEW_CONVERT_EXPR)
3725 {
3726 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3727 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3728 return NULL;
3729 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3730 if (vectype == NULL_TREE)
3731 return NULL;
3732
3733 if (check_bool_pattern (var, vinfo, bool_stmts))
3734 {
3735 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3736 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3737 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3738 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3739 else
3740 pattern_stmt
3741 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3742 }
3743 else
3744 {
3745 tree type = search_type_for_mask (var, vinfo);
3746 tree cst0, cst1, tmp;
3747
3748 if (!type)
3749 return NULL;
3750
3751 /* We may directly use cond with narrowed type to avoid
3752 multiple cond exprs with following result packing and
3753 perform single cond with packed mask instead. In case
3754 of widening we better make cond first and then extract
3755 results. */
3756 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3757 type = TREE_TYPE (lhs);
3758
3759 cst0 = build_int_cst (type, 0);
3760 cst1 = build_int_cst (type, 1);
3761 tmp = vect_recog_temp_ssa_var (type, NULL);
3762 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3763
3764 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3765 {
3766 tree new_vectype = get_vectype_for_scalar_type (type);
3767 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3768
3769 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3770 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3771 }
3772 }
3773
3774 *type_out = vectype;
3775 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3776
3777 return pattern_stmt;
3778 }
3779 else if (rhs_code == COND_EXPR
3780 && TREE_CODE (var) == SSA_NAME)
3781 {
3782 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3783 if (vectype == NULL_TREE)
3784 return NULL;
3785
3786 /* Build a scalar type for the boolean result that when
3787 vectorized matches the vector type of the result in
3788 size and number of elements. */
3789 unsigned prec
3790 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3791 TYPE_VECTOR_SUBPARTS (vectype));
3792
3793 tree type
3794 = build_nonstandard_integer_type (prec,
3795 TYPE_UNSIGNED (TREE_TYPE (var)));
3796 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3797 return NULL;
3798
3799 if (!check_bool_pattern (var, vinfo, bool_stmts))
3800 return NULL;
3801
3802 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3803
3804 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3805 pattern_stmt
3806 = gimple_build_assign (lhs, COND_EXPR,
3807 build2 (NE_EXPR, boolean_type_node,
3808 rhs, build_int_cst (type, 0)),
3809 gimple_assign_rhs2 (last_stmt),
3810 gimple_assign_rhs3 (last_stmt));
3811 *type_out = vectype;
3812 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3813
3814 return pattern_stmt;
3815 }
3816 else if (rhs_code == SSA_NAME
3817 && STMT_VINFO_DATA_REF (stmt_vinfo))
3818 {
3819 stmt_vec_info pattern_stmt_info;
3820 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3821 gcc_assert (vectype != NULL_TREE);
3822 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3823 return NULL;
3824
3825 if (check_bool_pattern (var, vinfo, bool_stmts))
3826 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3827 else
3828 {
3829 tree type = search_type_for_mask (var, vinfo);
3830 tree cst0, cst1, new_vectype;
3831
3832 if (!type)
3833 return NULL;
3834
3835 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3836 type = TREE_TYPE (vectype);
3837
3838 cst0 = build_int_cst (type, 0);
3839 cst1 = build_int_cst (type, 1);
3840 new_vectype = get_vectype_for_scalar_type (type);
3841
3842 rhs = vect_recog_temp_ssa_var (type, NULL);
3843 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3844 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3845 }
3846
3847 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3848 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3849 {
3850 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3851 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3852 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3853 rhs = rhs2;
3854 }
3855 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3856 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3857 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3858 *type_out = vectype;
3859 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3860
3861 return pattern_stmt;
3862 }
3863 else
3864 return NULL;
3865 }
3866
3867
3868 /* A helper for vect_recog_mask_conversion_pattern. Build
3869 conversion of MASK to a type suitable for masking VECTYPE.
3870 Built statement gets required vectype and is appended to
3871 a pattern sequence of STMT_VINFO.
3872
3873 Return converted mask. */
3874
3875 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo)3876 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3877 {
3878 gimple *stmt;
3879 tree masktype, tmp;
3880
3881 masktype = build_same_sized_truth_vector_type (vectype);
3882 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3883 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3884 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3885
3886 return tmp;
3887 }
3888
3889
3890 /* Function vect_recog_mask_conversion_pattern
3891
3892 Try to find statements which require boolean type
3893 converison. Additional conversion statements are
3894 added to handle such cases. For example:
3895
3896 bool m_1, m_2, m_3;
3897 int i_4, i_5;
3898 double d_6, d_7;
3899 char c_1, c_2, c_3;
3900
3901 S1 m_1 = i_4 > i_5;
3902 S2 m_2 = d_6 < d_7;
3903 S3 m_3 = m_1 & m_2;
3904 S4 c_1 = m_3 ? c_2 : c_3;
3905
3906 Will be transformed into:
3907
3908 S1 m_1 = i_4 > i_5;
3909 S2 m_2 = d_6 < d_7;
3910 S3'' m_2' = (_Bool[bitsize=32])m_2
3911 S3' m_3' = m_1 & m_2';
3912 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3913 S4' c_1' = m_3'' ? c_2 : c_3; */
3914
3915 static gimple *
vect_recog_mask_conversion_pattern(stmt_vec_info stmt_vinfo,tree * type_out)3916 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3917 {
3918 gimple *last_stmt = stmt_vinfo->stmt;
3919 enum tree_code rhs_code;
3920 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3921 tree vectype1, vectype2;
3922 stmt_vec_info pattern_stmt_info;
3923 vec_info *vinfo = stmt_vinfo->vinfo;
3924
3925 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3926 if (is_gimple_call (last_stmt)
3927 && gimple_call_internal_p (last_stmt))
3928 {
3929 gcall *pattern_stmt;
3930
3931 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3932 int mask_argno = internal_fn_mask_index (ifn);
3933 if (mask_argno < 0)
3934 return NULL;
3935
3936 bool store_p = internal_store_fn_p (ifn);
3937 if (store_p)
3938 {
3939 int rhs_index = internal_fn_stored_value_index (ifn);
3940 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3941 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3942 }
3943 else
3944 {
3945 lhs = gimple_call_lhs (last_stmt);
3946 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3947 }
3948
3949 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3950 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3951 if (!mask_arg_type)
3952 return NULL;
3953 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3954
3955 if (!vectype1 || !vectype2
3956 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3957 TYPE_VECTOR_SUBPARTS (vectype2)))
3958 return NULL;
3959
3960 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3961
3962 auto_vec<tree, 8> args;
3963 unsigned int nargs = gimple_call_num_args (last_stmt);
3964 args.safe_grow (nargs);
3965 for (unsigned int i = 0; i < nargs; ++i)
3966 args[i] = ((int) i == mask_argno
3967 ? tmp
3968 : gimple_call_arg (last_stmt, i));
3969 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3970
3971 if (!store_p)
3972 {
3973 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3974 gimple_call_set_lhs (pattern_stmt, lhs);
3975 }
3976 gimple_call_set_nothrow (pattern_stmt, true);
3977
3978 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3979 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3980 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3981
3982 *type_out = vectype1;
3983 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3984
3985 return pattern_stmt;
3986 }
3987
3988 if (!is_gimple_assign (last_stmt))
3989 return NULL;
3990
3991 gimple *pattern_stmt;
3992 lhs = gimple_assign_lhs (last_stmt);
3993 rhs1 = gimple_assign_rhs1 (last_stmt);
3994 rhs_code = gimple_assign_rhs_code (last_stmt);
3995
3996 /* Check for cond expression requiring mask conversion. */
3997 if (rhs_code == COND_EXPR)
3998 {
3999 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4000
4001 if (TREE_CODE (rhs1) == SSA_NAME)
4002 {
4003 rhs1_type = search_type_for_mask (rhs1, vinfo);
4004 if (!rhs1_type)
4005 return NULL;
4006 }
4007 else if (COMPARISON_CLASS_P (rhs1))
4008 {
4009 /* Check whether we're comparing scalar booleans and (if so)
4010 whether a better mask type exists than the mask associated
4011 with boolean-sized elements. This avoids unnecessary packs
4012 and unpacks if the booleans are set from comparisons of
4013 wider types. E.g. in:
4014
4015 int x1, x2, x3, x4, y1, y1;
4016 ...
4017 bool b1 = (x1 == x2);
4018 bool b2 = (x3 == x4);
4019 ... = b1 == b2 ? y1 : y2;
4020
4021 it is better for b1 and b2 to use the mask type associated
4022 with int elements rather bool (byte) elements. */
4023 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4024 if (!rhs1_type)
4025 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4026 }
4027 else
4028 return NULL;
4029
4030 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4031
4032 if (!vectype1 || !vectype2)
4033 return NULL;
4034
4035 /* Continue if a conversion is needed. Also continue if we have
4036 a comparison whose vector type would normally be different from
4037 VECTYPE2 when considered in isolation. In that case we'll
4038 replace the comparison with an SSA name (so that we can record
4039 its vector type) and behave as though the comparison was an SSA
4040 name from the outset. */
4041 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4042 TYPE_VECTOR_SUBPARTS (vectype2))
4043 && (TREE_CODE (rhs1) == SSA_NAME
4044 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4045 return NULL;
4046
4047 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4048 in place, we can handle it in vectorizable_condition. This avoids
4049 unnecessary promotion stmts and increased vectorization factor. */
4050 if (COMPARISON_CLASS_P (rhs1)
4051 && INTEGRAL_TYPE_P (rhs1_type)
4052 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4053 TYPE_VECTOR_SUBPARTS (vectype2)))
4054 {
4055 enum vect_def_type dt;
4056 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4057 && dt == vect_external_def
4058 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4059 && (dt == vect_external_def
4060 || dt == vect_constant_def))
4061 {
4062 tree wide_scalar_type = build_nonstandard_integer_type
4063 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4064 TYPE_UNSIGNED (rhs1_type));
4065 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4066 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4067 return NULL;
4068 }
4069 }
4070
4071 /* If rhs1 is a comparison we need to move it into a
4072 separate statement. */
4073 if (TREE_CODE (rhs1) != SSA_NAME)
4074 {
4075 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4076 pattern_stmt = gimple_build_assign (tmp, rhs1);
4077 rhs1 = tmp;
4078 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4079 }
4080
4081 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4082 TYPE_VECTOR_SUBPARTS (vectype2)))
4083 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4084 else
4085 tmp = rhs1;
4086
4087 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4088 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4089 gimple_assign_rhs2 (last_stmt),
4090 gimple_assign_rhs3 (last_stmt));
4091
4092 *type_out = vectype1;
4093 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4094
4095 return pattern_stmt;
4096 }
4097
4098 /* Now check for binary boolean operations requiring conversion for
4099 one of operands. */
4100 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4101 return NULL;
4102
4103 if (rhs_code != BIT_IOR_EXPR
4104 && rhs_code != BIT_XOR_EXPR
4105 && rhs_code != BIT_AND_EXPR
4106 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4107 return NULL;
4108
4109 rhs2 = gimple_assign_rhs2 (last_stmt);
4110
4111 rhs1_type = search_type_for_mask (rhs1, vinfo);
4112 rhs2_type = search_type_for_mask (rhs2, vinfo);
4113
4114 if (!rhs1_type || !rhs2_type
4115 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4116 return NULL;
4117
4118 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4119 {
4120 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4121 if (!vectype1)
4122 return NULL;
4123 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4124 }
4125 else
4126 {
4127 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4128 if (!vectype1)
4129 return NULL;
4130 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4131 }
4132
4133 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4134 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4135
4136 *type_out = vectype1;
4137 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4138
4139 return pattern_stmt;
4140 }
4141
4142 /* STMT_INFO is a load or store. If the load or store is conditional, return
4143 the boolean condition under which it occurs, otherwise return null. */
4144
4145 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4146 vect_get_load_store_mask (stmt_vec_info stmt_info)
4147 {
4148 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4149 {
4150 gcc_assert (gimple_assign_single_p (def_assign));
4151 return NULL_TREE;
4152 }
4153
4154 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4155 {
4156 internal_fn ifn = gimple_call_internal_fn (def_call);
4157 int mask_index = internal_fn_mask_index (ifn);
4158 return gimple_call_arg (def_call, mask_index);
4159 }
4160
4161 gcc_unreachable ();
4162 }
4163
4164 /* Return the scalar offset type that an internal gather/scatter function
4165 should use. GS_INFO describes the gather/scatter operation. */
4166
4167 static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4168 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4169 {
4170 tree offset_type = TREE_TYPE (gs_info->offset);
4171 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4172
4173 /* Enforced by vect_check_gather_scatter. */
4174 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4175 gcc_assert (element_bits >= offset_bits);
4176
4177 /* If the offset is narrower than the elements, extend it according
4178 to its sign. */
4179 if (element_bits > offset_bits)
4180 return build_nonstandard_integer_type (element_bits,
4181 TYPE_UNSIGNED (offset_type));
4182
4183 return offset_type;
4184 }
4185
4186 /* Return MASK if MASK is suitable for masking an operation on vectors
4187 of type VECTYPE, otherwise convert it into such a form and return
4188 the result. Associate any conversion statements with STMT_INFO's
4189 pattern. */
4190
4191 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4192 vect_convert_mask_for_vectype (tree mask, tree vectype,
4193 stmt_vec_info stmt_info, vec_info *vinfo)
4194 {
4195 tree mask_type = search_type_for_mask (mask, vinfo);
4196 if (mask_type)
4197 {
4198 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4199 if (mask_vectype
4200 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4201 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4202 mask = build_mask_conversion (mask, vectype, stmt_info);
4203 }
4204 return mask;
4205 }
4206
4207 /* Return the equivalent of:
4208
4209 fold_convert (TYPE, VALUE)
4210
4211 with the expectation that the operation will be vectorized.
4212 If new statements are needed, add them as pattern statements
4213 to STMT_INFO. */
4214
4215 static tree
vect_add_conversion_to_pattern(tree type,tree value,stmt_vec_info stmt_info)4216 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4217 {
4218 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4219 return value;
4220
4221 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4222 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4223 append_pattern_def_seq (stmt_info, conversion,
4224 get_vectype_for_scalar_type (type));
4225 return new_value;
4226 }
4227
4228 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4229 internal function. Return the final statement on success and set
4230 *TYPE_OUT to the vector type being loaded or stored.
4231
4232 This function only handles gathers and scatters that were recognized
4233 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4234
4235 static gimple *
vect_recog_gather_scatter_pattern(stmt_vec_info stmt_info,tree * type_out)4236 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4237 {
4238 /* Currently we only support this for loop vectorization. */
4239 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4240 if (!loop_vinfo)
4241 return NULL;
4242
4243 /* Make sure that we're looking at a gather load or scatter store. */
4244 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4245 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4246 return NULL;
4247
4248 /* Get the boolean that controls whether the load or store happens.
4249 This is null if the operation is unconditional. */
4250 tree mask = vect_get_load_store_mask (stmt_info);
4251
4252 /* Make sure that the target supports an appropriate internal
4253 function for the gather/scatter operation. */
4254 gather_scatter_info gs_info;
4255 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4256 || gs_info.decl)
4257 return NULL;
4258
4259 /* Convert the mask to the right form. */
4260 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4261 if (mask)
4262 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4263 loop_vinfo);
4264
4265 /* Get the invariant base and non-invariant offset, converting the
4266 latter to the same width as the vector elements. */
4267 tree base = gs_info.base;
4268 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4269 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4270 stmt_info);
4271
4272 /* Build the new pattern statement. */
4273 tree scale = size_int (gs_info.scale);
4274 gcall *pattern_stmt;
4275 if (DR_IS_READ (dr))
4276 {
4277 if (mask != NULL)
4278 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4279 offset, scale, mask);
4280 else
4281 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4282 offset, scale);
4283 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4284 gimple_call_set_lhs (pattern_stmt, load_lhs);
4285 }
4286 else
4287 {
4288 tree rhs = vect_get_store_rhs (stmt_info);
4289 if (mask != NULL)
4290 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4291 base, offset, scale, rhs,
4292 mask);
4293 else
4294 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4295 base, offset, scale, rhs);
4296 }
4297 gimple_call_set_nothrow (pattern_stmt, true);
4298
4299 /* Copy across relevant vectorization info and associate DR with the
4300 new pattern statement instead of the original statement. */
4301 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4302 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4303
4304 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4305 *type_out = vectype;
4306 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4307
4308 return pattern_stmt;
4309 }
4310
4311 /* Return true if TYPE is a non-boolean integer type. These are the types
4312 that we want to consider for narrowing. */
4313
4314 static bool
vect_narrowable_type_p(tree type)4315 vect_narrowable_type_p (tree type)
4316 {
4317 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4318 }
4319
4320 /* Return true if the operation given by CODE can be truncated to N bits
4321 when only N bits of the output are needed. This is only true if bit N+1
4322 of the inputs has no effect on the low N bits of the result. */
4323
4324 static bool
vect_truncatable_operation_p(tree_code code)4325 vect_truncatable_operation_p (tree_code code)
4326 {
4327 switch (code)
4328 {
4329 case PLUS_EXPR:
4330 case MINUS_EXPR:
4331 case MULT_EXPR:
4332 case BIT_AND_EXPR:
4333 case BIT_IOR_EXPR:
4334 case BIT_XOR_EXPR:
4335 case COND_EXPR:
4336 return true;
4337
4338 default:
4339 return false;
4340 }
4341 }
4342
4343 /* Record that STMT_INFO could be changed from operating on TYPE to
4344 operating on a type with the precision and sign given by PRECISION
4345 and SIGN respectively. PRECISION is an arbitrary bit precision;
4346 it might not be a whole number of bytes. */
4347
4348 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)4349 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4350 unsigned int precision, signop sign)
4351 {
4352 /* Round the precision up to a whole number of bytes. */
4353 precision = vect_element_precision (precision);
4354 if (precision < TYPE_PRECISION (type)
4355 && (!stmt_info->operation_precision
4356 || stmt_info->operation_precision > precision))
4357 {
4358 stmt_info->operation_precision = precision;
4359 stmt_info->operation_sign = sign;
4360 }
4361 }
4362
4363 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4364 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4365 is an arbitrary bit precision; it might not be a whole number of bytes. */
4366
4367 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)4368 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4369 unsigned int min_input_precision)
4370 {
4371 /* This operation in isolation only requires the inputs to have
4372 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4373 that MIN_INPUT_PRECISION is a natural precision for the chain
4374 as a whole. E.g. consider something like:
4375
4376 unsigned short *x, *y;
4377 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4378
4379 The right shift can be done on unsigned chars, and only requires the
4380 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4381 approach would mean turning a natural chain of single-vector unsigned
4382 short operations into one that truncates "*x" and then extends
4383 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4384 operation and one vector for each unsigned char operation.
4385 This would be a significant pessimization.
4386
4387 Instead only propagate the maximum of this precision and the precision
4388 required by the users of the result. This means that we don't pessimize
4389 the case above but continue to optimize things like:
4390
4391 unsigned char *y;
4392 unsigned short *x;
4393 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4394
4395 Here we would truncate two vectors of *x to a single vector of
4396 unsigned chars and use single-vector unsigned char operations for
4397 everything else, rather than doing two unsigned short copies of
4398 "(*x & 0xf0) >> 4" and then truncating the result. */
4399 min_input_precision = MAX (min_input_precision,
4400 stmt_info->min_output_precision);
4401
4402 if (min_input_precision < TYPE_PRECISION (type)
4403 && (!stmt_info->min_input_precision
4404 || stmt_info->min_input_precision > min_input_precision))
4405 stmt_info->min_input_precision = min_input_precision;
4406 }
4407
4408 /* Subroutine of vect_determine_min_output_precision. Return true if
4409 we can calculate a reduced number of output bits for STMT_INFO,
4410 whose result is LHS. */
4411
4412 static bool
vect_determine_min_output_precision_1(stmt_vec_info stmt_info,tree lhs)4413 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4414 {
4415 /* Take the maximum precision required by users of the result. */
4416 vec_info *vinfo = stmt_info->vinfo;
4417 unsigned int precision = 0;
4418 imm_use_iterator iter;
4419 use_operand_p use;
4420 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4421 {
4422 gimple *use_stmt = USE_STMT (use);
4423 if (is_gimple_debug (use_stmt))
4424 continue;
4425 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4426 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4427 return false;
4428 /* The input precision recorded for COND_EXPRs applies only to the
4429 "then" and "else" values. */
4430 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4431 if (assign
4432 && gimple_assign_rhs_code (assign) == COND_EXPR
4433 && use->use != gimple_assign_rhs2_ptr (assign)
4434 && use->use != gimple_assign_rhs3_ptr (assign))
4435 return false;
4436 precision = MAX (precision, use_stmt_info->min_input_precision);
4437 }
4438
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_NOTE, vect_location,
4441 "only the low %d bits of %T are significant\n",
4442 precision, lhs);
4443 stmt_info->min_output_precision = precision;
4444 return true;
4445 }
4446
4447 /* Calculate min_output_precision for STMT_INFO. */
4448
4449 static void
vect_determine_min_output_precision(stmt_vec_info stmt_info)4450 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4451 {
4452 /* We're only interested in statements with a narrowable result. */
4453 tree lhs = gimple_get_lhs (stmt_info->stmt);
4454 if (!lhs
4455 || TREE_CODE (lhs) != SSA_NAME
4456 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4457 return;
4458
4459 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4460 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4461 }
4462
4463 /* Use range information to decide whether STMT (described by STMT_INFO)
4464 could be done in a narrower type. This is effectively a forward
4465 propagation, since it uses context-independent information that applies
4466 to all users of an SSA name. */
4467
4468 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)4469 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4470 {
4471 tree lhs = gimple_assign_lhs (stmt);
4472 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4473 return;
4474
4475 tree type = TREE_TYPE (lhs);
4476 if (!vect_narrowable_type_p (type))
4477 return;
4478
4479 /* First see whether we have any useful range information for the result. */
4480 unsigned int precision = TYPE_PRECISION (type);
4481 signop sign = TYPE_SIGN (type);
4482 wide_int min_value, max_value;
4483 if (!vect_get_range_info (lhs, &min_value, &max_value))
4484 return;
4485
4486 tree_code code = gimple_assign_rhs_code (stmt);
4487 unsigned int nops = gimple_num_ops (stmt);
4488
4489 if (!vect_truncatable_operation_p (code))
4490 /* Check that all relevant input operands are compatible, and update
4491 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4492 for (unsigned int i = 1; i < nops; ++i)
4493 {
4494 tree op = gimple_op (stmt, i);
4495 if (TREE_CODE (op) == INTEGER_CST)
4496 {
4497 /* Don't require the integer to have RHS_TYPE (which it might
4498 not for things like shift amounts, etc.), but do require it
4499 to fit the type. */
4500 if (!int_fits_type_p (op, type))
4501 return;
4502
4503 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4504 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4505 }
4506 else if (TREE_CODE (op) == SSA_NAME)
4507 {
4508 /* Ignore codes that don't take uniform arguments. */
4509 if (!types_compatible_p (TREE_TYPE (op), type))
4510 return;
4511
4512 wide_int op_min_value, op_max_value;
4513 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4514 return;
4515
4516 min_value = wi::min (min_value, op_min_value, sign);
4517 max_value = wi::max (max_value, op_max_value, sign);
4518 }
4519 else
4520 return;
4521 }
4522
4523 /* Try to switch signed types for unsigned types if we can.
4524 This is better for two reasons. First, unsigned ops tend
4525 to be cheaper than signed ops. Second, it means that we can
4526 handle things like:
4527
4528 signed char c;
4529 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4530
4531 as:
4532
4533 signed char c;
4534 unsigned short res_1 = (unsigned short) c & 0xff00;
4535 int res = (int) res_1;
4536
4537 where the intermediate result res_1 has unsigned rather than
4538 signed type. */
4539 if (sign == SIGNED && !wi::neg_p (min_value))
4540 sign = UNSIGNED;
4541
4542 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4543 unsigned int precision1 = wi::min_precision (min_value, sign);
4544 unsigned int precision2 = wi::min_precision (max_value, sign);
4545 unsigned int value_precision = MAX (precision1, precision2);
4546 if (value_precision >= precision)
4547 return;
4548
4549 if (dump_enabled_p ())
4550 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4551 " without loss of precision: %G",
4552 sign == SIGNED ? "signed" : "unsigned",
4553 value_precision, stmt);
4554
4555 vect_set_operation_type (stmt_info, type, value_precision, sign);
4556 vect_set_min_input_precision (stmt_info, type, value_precision);
4557 }
4558
4559 /* Use information about the users of STMT's result to decide whether
4560 STMT (described by STMT_INFO) could be done in a narrower type.
4561 This is effectively a backward propagation. */
4562
4563 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)4564 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4565 {
4566 tree_code code = gimple_assign_rhs_code (stmt);
4567 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4568 tree type = TREE_TYPE (gimple_op (stmt, opno));
4569 if (!vect_narrowable_type_p (type))
4570 return;
4571
4572 unsigned int precision = TYPE_PRECISION (type);
4573 unsigned int operation_precision, min_input_precision;
4574 switch (code)
4575 {
4576 CASE_CONVERT:
4577 /* Only the bits that contribute to the output matter. Don't change
4578 the precision of the operation itself. */
4579 operation_precision = precision;
4580 min_input_precision = stmt_info->min_output_precision;
4581 break;
4582
4583 case LSHIFT_EXPR:
4584 case RSHIFT_EXPR:
4585 {
4586 tree shift = gimple_assign_rhs2 (stmt);
4587 if (TREE_CODE (shift) != INTEGER_CST
4588 || !wi::ltu_p (wi::to_widest (shift), precision))
4589 return;
4590 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4591 if (code == LSHIFT_EXPR)
4592 {
4593 /* Avoid creating an undefined shift.
4594
4595 ??? We could instead use min_output_precision as-is and
4596 optimize out-of-range shifts to zero. However, only
4597 degenerate testcases shift away all their useful input data,
4598 and it isn't natural to drop input operations in the middle
4599 of vectorization. This sort of thing should really be
4600 handled before vectorization. */
4601 operation_precision = MAX (stmt_info->min_output_precision,
4602 const_shift + 1);
4603 /* We need CONST_SHIFT fewer bits of the input. */
4604 min_input_precision = (MAX (operation_precision, const_shift)
4605 - const_shift);
4606 }
4607 else
4608 {
4609 /* We need CONST_SHIFT extra bits to do the operation. */
4610 operation_precision = (stmt_info->min_output_precision
4611 + const_shift);
4612 min_input_precision = operation_precision;
4613 }
4614 break;
4615 }
4616
4617 default:
4618 if (vect_truncatable_operation_p (code))
4619 {
4620 /* Input bit N has no effect on output bits N-1 and lower. */
4621 operation_precision = stmt_info->min_output_precision;
4622 min_input_precision = operation_precision;
4623 break;
4624 }
4625 return;
4626 }
4627
4628 if (operation_precision < precision)
4629 {
4630 if (dump_enabled_p ())
4631 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4632 " without affecting users: %G",
4633 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4634 operation_precision, stmt);
4635 vect_set_operation_type (stmt_info, type, operation_precision,
4636 TYPE_SIGN (type));
4637 }
4638 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4639 }
4640
4641 /* Handle vect_determine_precisions for STMT_INFO, given that we
4642 have already done so for the users of its result. */
4643
4644 void
vect_determine_stmt_precisions(stmt_vec_info stmt_info)4645 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4646 {
4647 vect_determine_min_output_precision (stmt_info);
4648 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4649 {
4650 vect_determine_precisions_from_range (stmt_info, stmt);
4651 vect_determine_precisions_from_users (stmt_info, stmt);
4652 }
4653 }
4654
4655 /* Walk backwards through the vectorizable region to determine the
4656 values of these fields:
4657
4658 - min_output_precision
4659 - min_input_precision
4660 - operation_precision
4661 - operation_sign. */
4662
4663 void
vect_determine_precisions(vec_info * vinfo)4664 vect_determine_precisions (vec_info *vinfo)
4665 {
4666 DUMP_VECT_SCOPE ("vect_determine_precisions");
4667
4668 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4669 {
4670 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4671 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4672 unsigned int nbbs = loop->num_nodes;
4673
4674 for (unsigned int i = 0; i < nbbs; i++)
4675 {
4676 basic_block bb = bbs[nbbs - i - 1];
4677 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4678 !gsi_end_p (si); gsi_prev (&si))
4679 vect_determine_stmt_precisions
4680 (vinfo->lookup_stmt (gsi_stmt (si)));
4681 }
4682 }
4683 else
4684 {
4685 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4686 gimple_stmt_iterator si = bb_vinfo->region_end;
4687 gimple *stmt;
4688 do
4689 {
4690 if (!gsi_stmt (si))
4691 si = gsi_last_bb (bb_vinfo->bb);
4692 else
4693 gsi_prev (&si);
4694 stmt = gsi_stmt (si);
4695 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4696 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4697 vect_determine_stmt_precisions (stmt_info);
4698 }
4699 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4700 }
4701 }
4702
4703 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4704
4705 struct vect_recog_func
4706 {
4707 vect_recog_func_ptr fn;
4708 const char *name;
4709 };
4710
4711 /* Note that ordering matters - the first pattern matching on a stmt is
4712 taken which means usually the more complex one needs to preceed the
4713 less comples onex (widen_sum only after dot_prod or sad for example). */
4714 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4715 { vect_recog_over_widening_pattern, "over_widening" },
4716 /* Must come after over_widening, which narrows the shift as much as
4717 possible beforehand. */
4718 { vect_recog_average_pattern, "average" },
4719 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4720 { vect_recog_widen_mult_pattern, "widen_mult" },
4721 { vect_recog_dot_prod_pattern, "dot_prod" },
4722 { vect_recog_sad_pattern, "sad" },
4723 { vect_recog_widen_sum_pattern, "widen_sum" },
4724 { vect_recog_pow_pattern, "pow" },
4725 { vect_recog_widen_shift_pattern, "widen_shift" },
4726 { vect_recog_rotate_pattern, "rotate" },
4727 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4728 { vect_recog_divmod_pattern, "divmod" },
4729 { vect_recog_mult_pattern, "mult" },
4730 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4731 { vect_recog_bool_pattern, "bool" },
4732 /* This must come before mask conversion, and includes the parts
4733 of mask conversion that are needed for gather and scatter
4734 internal functions. */
4735 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4736 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4737 };
4738
4739 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4740
4741 /* Mark statements that are involved in a pattern. */
4742
4743 static inline void
vect_mark_pattern_stmts(stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)4744 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4745 tree pattern_vectype)
4746 {
4747 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4748
4749 gimple *orig_pattern_stmt = NULL;
4750 if (is_pattern_stmt_p (orig_stmt_info))
4751 {
4752 /* We're replacing a statement in an existing pattern definition
4753 sequence. */
4754 orig_pattern_stmt = orig_stmt_info->stmt;
4755 if (dump_enabled_p ())
4756 dump_printf_loc (MSG_NOTE, vect_location,
4757 "replacing earlier pattern %G", orig_pattern_stmt);
4758
4759 /* To keep the book-keeping simple, just swap the lhs of the
4760 old and new statements, so that the old one has a valid but
4761 unused lhs. */
4762 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4763 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4764 gimple_set_lhs (pattern_stmt, old_lhs);
4765
4766 if (dump_enabled_p ())
4767 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4768
4769 /* Switch to the statement that ORIG replaces. */
4770 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4771
4772 /* We shouldn't be replacing the main pattern statement. */
4773 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4774 != orig_pattern_stmt);
4775 }
4776
4777 if (def_seq)
4778 for (gimple_stmt_iterator si = gsi_start (def_seq);
4779 !gsi_end_p (si); gsi_next (&si))
4780 {
4781 stmt_vec_info pattern_stmt_info
4782 = vect_init_pattern_stmt (gsi_stmt (si),
4783 orig_stmt_info, pattern_vectype);
4784 /* Stmts in the def sequence are not vectorizable cycle or
4785 induction defs, instead they should all be vect_internal_def
4786 feeding the main pattern stmt which retains this def type. */
4787 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4788 }
4789
4790 if (orig_pattern_stmt)
4791 {
4792 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4793
4794 /* Insert all the new pattern statements before the original one. */
4795 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4796 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4797 orig_def_seq);
4798 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4799 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4800
4801 /* Remove the pattern statement that this new pattern replaces. */
4802 gsi_remove (&gsi, false);
4803 }
4804 else
4805 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4806 }
4807
4808 /* Function vect_pattern_recog_1
4809
4810 Input:
4811 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4812 computation pattern.
4813 STMT_INFO: A stmt from which the pattern search should start.
4814
4815 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4816 a sequence of statements that has the same functionality and can be
4817 used to replace STMT_INFO. It returns the last statement in the sequence
4818 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4819 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4820 statement, having first checked that the target supports the new operation
4821 in that type.
4822
4823 This function also does some bookkeeping, as explained in the documentation
4824 for vect_recog_pattern. */
4825
4826 static void
vect_pattern_recog_1(vect_recog_func * recog_func,stmt_vec_info stmt_info)4827 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4828 {
4829 vec_info *vinfo = stmt_info->vinfo;
4830 gimple *pattern_stmt;
4831 loop_vec_info loop_vinfo;
4832 tree pattern_vectype;
4833
4834 /* If this statement has already been replaced with pattern statements,
4835 leave the original statement alone, since the first match wins.
4836 Instead try to match against the definition statements that feed
4837 the main pattern statement. */
4838 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4839 {
4840 gimple_stmt_iterator gsi;
4841 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4842 !gsi_end_p (gsi); gsi_next (&gsi))
4843 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4844 return;
4845 }
4846
4847 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4848 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4849 if (!pattern_stmt)
4850 {
4851 /* Clear any half-formed pattern definition sequence. */
4852 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4853 return;
4854 }
4855
4856 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4857 gcc_assert (pattern_vectype);
4858
4859 /* Found a vectorizable pattern. */
4860 if (dump_enabled_p ())
4861 dump_printf_loc (MSG_NOTE, vect_location,
4862 "%s pattern recognized: %G",
4863 recog_func->name, pattern_stmt);
4864
4865 /* Mark the stmts that are involved in the pattern. */
4866 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4867
4868 /* Patterns cannot be vectorized using SLP, because they change the order of
4869 computation. */
4870 if (loop_vinfo)
4871 {
4872 unsigned ix, ix2;
4873 stmt_vec_info *elem_ptr;
4874 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4875 elem_ptr, *elem_ptr == stmt_info);
4876 }
4877 }
4878
4879
4880 /* Function vect_pattern_recog
4881
4882 Input:
4883 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4884 computation idioms.
4885
4886 Output - for each computation idiom that is detected we create a new stmt
4887 that provides the same functionality and that can be vectorized. We
4888 also record some information in the struct_stmt_info of the relevant
4889 stmts, as explained below:
4890
4891 At the entry to this function we have the following stmts, with the
4892 following initial value in the STMT_VINFO fields:
4893
4894 stmt in_pattern_p related_stmt vec_stmt
4895 S1: a_i = .... - - -
4896 S2: a_2 = ..use(a_i).. - - -
4897 S3: a_1 = ..use(a_2).. - - -
4898 S4: a_0 = ..use(a_1).. - - -
4899 S5: ... = ..use(a_0).. - - -
4900
4901 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4902 represented by a single stmt. We then:
4903 - create a new stmt S6 equivalent to the pattern (the stmt is not
4904 inserted into the code)
4905 - fill in the STMT_VINFO fields as follows:
4906
4907 in_pattern_p related_stmt vec_stmt
4908 S1: a_i = .... - - -
4909 S2: a_2 = ..use(a_i).. - - -
4910 S3: a_1 = ..use(a_2).. - - -
4911 S4: a_0 = ..use(a_1).. true S6 -
4912 '---> S6: a_new = .... - S4 -
4913 S5: ... = ..use(a_0).. - - -
4914
4915 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4916 to each other through the RELATED_STMT field).
4917
4918 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4919 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4920 remain irrelevant unless used by stmts other than S4.
4921
4922 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4923 (because they are marked as irrelevant). It will vectorize S6, and record
4924 a pointer to the new vector stmt VS6 from S6 (as usual).
4925 S4 will be skipped, and S5 will be vectorized as usual:
4926
4927 in_pattern_p related_stmt vec_stmt
4928 S1: a_i = .... - - -
4929 S2: a_2 = ..use(a_i).. - - -
4930 S3: a_1 = ..use(a_2).. - - -
4931 > VS6: va_new = .... - - -
4932 S4: a_0 = ..use(a_1).. true S6 VS6
4933 '---> S6: a_new = .... - S4 VS6
4934 > VS5: ... = ..vuse(va_new).. - - -
4935 S5: ... = ..use(a_0).. - - -
4936
4937 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4938 elsewhere), and we'll end up with:
4939
4940 VS6: va_new = ....
4941 VS5: ... = ..vuse(va_new)..
4942
4943 In case of more than one pattern statements, e.g., widen-mult with
4944 intermediate type:
4945
4946 S1 a_t = ;
4947 S2 a_T = (TYPE) a_t;
4948 '--> S3: a_it = (interm_type) a_t;
4949 S4 prod_T = a_T * CONST;
4950 '--> S5: prod_T' = a_it w* CONST;
4951
4952 there may be other users of a_T outside the pattern. In that case S2 will
4953 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4954 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4955 be recorded in S3. */
4956
4957 void
vect_pattern_recog(vec_info * vinfo)4958 vect_pattern_recog (vec_info *vinfo)
4959 {
4960 struct loop *loop;
4961 basic_block *bbs;
4962 unsigned int nbbs;
4963 gimple_stmt_iterator si;
4964 unsigned int i, j;
4965
4966 vect_determine_precisions (vinfo);
4967
4968 DUMP_VECT_SCOPE ("vect_pattern_recog");
4969
4970 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4971 {
4972 loop = LOOP_VINFO_LOOP (loop_vinfo);
4973 bbs = LOOP_VINFO_BBS (loop_vinfo);
4974 nbbs = loop->num_nodes;
4975
4976 /* Scan through the loop stmts, applying the pattern recognition
4977 functions starting at each stmt visited: */
4978 for (i = 0; i < nbbs; i++)
4979 {
4980 basic_block bb = bbs[i];
4981 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4982 {
4983 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4984 /* Scan over all generic vect_recog_xxx_pattern functions. */
4985 for (j = 0; j < NUM_PATTERNS; j++)
4986 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4987 stmt_info);
4988 }
4989 }
4990 }
4991 else
4992 {
4993 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4994 for (si = bb_vinfo->region_begin;
4995 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4996 {
4997 gimple *stmt = gsi_stmt (si);
4998 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4999 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
5000 continue;
5001
5002 /* Scan over all generic vect_recog_xxx_pattern functions. */
5003 for (j = 0; j < NUM_PATTERNS; j++)
5004 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
5005 }
5006 }
5007 }
5008