1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48
49 /* Pattern recognition functions */
50 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55 tree *);
56 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57 tree *);
58 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60 tree *);
61 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62 tree *, tree *);
63 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65 tree *, tree *);
66 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67 tree *, tree *);
68
69 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70 tree *, tree *);
71
72 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73 tree *, tree *);
74 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77 tree *);
78
79 struct vect_recog_func
80 {
81 vect_recog_func_ptr fn;
82 const char *name;
83 };
84
85 /* Note that ordering matters - the first pattern matching on a stmt
86 is taken which means usually the more complex one needs to preceed
87 the less comples onex (widen_sum only after dot_prod or sad for example). */
88 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89 { vect_recog_widen_mult_pattern, "widen_mult" },
90 { vect_recog_dot_prod_pattern, "dot_prod" },
91 { vect_recog_sad_pattern, "sad" },
92 { vect_recog_widen_sum_pattern, "widen_sum" },
93 { vect_recog_pow_pattern, "pow" },
94 { vect_recog_widen_shift_pattern, "widen_shift" },
95 { vect_recog_over_widening_pattern, "over_widening" },
96 { vect_recog_rotate_pattern, "rotate" },
97 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98 { vect_recog_divmod_pattern, "divmod" },
99 { vect_recog_mult_pattern, "mult" },
100 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101 { vect_recog_bool_pattern, "bool" },
102 /* This must come before mask conversion, and includes the parts
103 of mask conversion that are needed for gather and scatter
104 internal functions. */
105 { vect_recog_gather_scatter_pattern, "gather_scatter" },
106 { vect_recog_mask_conversion_pattern, "mask_conversion" }
107 };
108
109 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)110 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
111 {
112 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113 stmt);
114 }
115
116 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)117 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
118 {
119 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120 append_pattern_def_seq (stmt_info, stmt);
121 }
122
123 /* Check whether STMT2 is in the same loop or basic block as STMT1.
124 Which of the two applies depends on whether we're currently doing
125 loop-based or basic-block-based vectorization, as determined by
126 the vinfo_for_stmt for STMT1 (which must be defined).
127
128 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129 to be defined as well. */
130
131 static bool
vect_same_loop_or_bb_p(gimple * stmt1,gimple * stmt2)132 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
133 {
134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
136 }
137
138 /* If the LHS of DEF_STMT has a single use, and that statement is
139 in the same loop or basic block, return it. */
140
141 static gimple *
vect_single_imm_use(gimple * def_stmt)142 vect_single_imm_use (gimple *def_stmt)
143 {
144 tree lhs = gimple_assign_lhs (def_stmt);
145 use_operand_p use_p;
146 gimple *use_stmt;
147
148 if (!single_imm_use (lhs, &use_p, &use_stmt))
149 return NULL;
150
151 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152 return NULL;
153
154 return use_stmt;
155 }
156
157 /* Check whether NAME, an ssa-name used in USE_STMT,
158 is a result of a type promotion, such that:
159 DEF_STMT: NAME = NOP (name0)
160 If CHECK_SIGN is TRUE, check that either both types are signed or both are
161 unsigned. */
162
163 static bool
type_conversion_p(tree name,gimple * use_stmt,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)164 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165 tree *orig_type, gimple **def_stmt, bool *promotion)
166 {
167 gimple *dummy_gimple;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
172
173 stmt_vinfo = vinfo_for_stmt (use_stmt);
174 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175 return false;
176
177 if (dt != vect_internal_def
178 && dt != vect_external_def && dt != vect_constant_def)
179 return false;
180
181 if (!*def_stmt)
182 return false;
183
184 if (dt == vect_internal_def)
185 {
186 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188 return false;
189 }
190
191 if (!is_gimple_assign (*def_stmt))
192 return false;
193
194 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195 return false;
196
197 oprnd0 = gimple_assign_rhs1 (*def_stmt);
198
199 *orig_type = TREE_TYPE (oprnd0);
200 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202 return false;
203
204 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205 *promotion = true;
206 else
207 *promotion = false;
208
209 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210 return false;
211
212 return true;
213 }
214
215 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
216 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
217
218 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)219 vect_recog_temp_ssa_var (tree type, gimple *stmt)
220 {
221 return make_temp_ssa_name (type, stmt, "patt");
222 }
223
224 /* Return true if STMT_VINFO describes a reduction for which reassociation
225 is allowed. If STMT_INFO is part of a group, assume that it's part of
226 a reduction chain and optimistically assume that all statements
227 except the last allow reassociation. */
228
229 static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)230 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
231 {
232 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
233 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
234 : GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
235 }
236
237 /* Function vect_recog_dot_prod_pattern
238
239 Try to find the following pattern:
240
241 type x_t, y_t;
242 TYPE1 prod;
243 TYPE2 sum = init;
244 loop:
245 sum_0 = phi <init, sum_1>
246 S1 x_t = ...
247 S2 y_t = ...
248 S3 x_T = (TYPE1) x_t;
249 S4 y_T = (TYPE1) y_t;
250 S5 prod = x_T * y_T;
251 [S6 prod = (TYPE2) prod; #optional]
252 S7 sum_1 = prod + sum_0;
253
254 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255 same size of 'TYPE1' or bigger. This is a special case of a reduction
256 computation.
257
258 Input:
259
260 * STMTS: Contains a stmt from which the pattern search begins. In the
261 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
262 will be detected.
263
264 Output:
265
266 * TYPE_IN: The type of the input arguments to the pattern.
267
268 * TYPE_OUT: The type of the output of this pattern.
269
270 * Return value: A new stmt that will be used to replace the sequence of
271 stmts that constitute the pattern. In this case it will be:
272 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273
274 Note: The dot-prod idiom is a widening reduction pattern that is
275 vectorized without preserving all the intermediate results. It
276 produces only N/2 (widened) results (by summing up pairs of
277 intermediate results) rather than all N results. Therefore, we
278 cannot allow this pattern when we want to get all the results and in
279 the correct order (as is the case when this computation is in an
280 inner-loop nested in an outer-loop that us being vectorized). */
281
282 static gimple *
vect_recog_dot_prod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)283 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
284 tree *type_out)
285 {
286 gimple *stmt, *last_stmt = (*stmts)[0];
287 tree oprnd0, oprnd1;
288 tree oprnd00, oprnd01;
289 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
290 tree type, half_type;
291 gimple *pattern_stmt;
292 tree prod_type;
293 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
294 struct loop *loop;
295 tree var;
296 bool promotion;
297
298 if (!loop_info)
299 return NULL;
300
301 loop = LOOP_VINFO_LOOP (loop_info);
302
303 /* We don't allow changing the order of the computation in the inner-loop
304 when doing outer-loop vectorization. */
305 if (loop && nested_in_vect_loop_p (loop, last_stmt))
306 return NULL;
307
308 if (!is_gimple_assign (last_stmt))
309 return NULL;
310
311 type = gimple_expr_type (last_stmt);
312
313 /* Look for the following pattern
314 DX = (TYPE1) X;
315 DY = (TYPE1) Y;
316 DPROD = DX * DY;
317 DDPROD = (TYPE2) DPROD;
318 sum_1 = DDPROD + sum_0;
319 In which
320 - DX is double the size of X
321 - DY is double the size of Y
322 - DX, DY, DPROD all have the same type
323 - sum is the same size of DPROD or bigger
324 - sum has been recognized as a reduction variable.
325
326 This is equivalent to:
327 DPROD = X w* Y; #widen mult
328 sum_1 = DPROD w+ sum_0; #widen summation
329 or
330 DPROD = X w* Y; #widen mult
331 sum_1 = DPROD + sum_0; #summation
332 */
333
334 /* Starting from LAST_STMT, follow the defs of its uses in search
335 of the above pattern. */
336
337 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
338 return NULL;
339
340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
341 {
342 /* Has been detected as widening-summation? */
343
344 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
345 type = gimple_expr_type (stmt);
346 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
347 return NULL;
348 oprnd0 = gimple_assign_rhs1 (stmt);
349 oprnd1 = gimple_assign_rhs2 (stmt);
350 half_type = TREE_TYPE (oprnd0);
351 }
352 else
353 {
354 gimple *def_stmt;
355
356 if (!vect_reassociating_reduction_p (stmt_vinfo))
357 return NULL;
358 oprnd0 = gimple_assign_rhs1 (last_stmt);
359 oprnd1 = gimple_assign_rhs2 (last_stmt);
360 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
361 || !types_compatible_p (TREE_TYPE (oprnd1), type))
362 return NULL;
363 stmt = last_stmt;
364
365 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
366 &promotion)
367 && promotion)
368 {
369 stmt = def_stmt;
370 oprnd0 = gimple_assign_rhs1 (stmt);
371 }
372 else
373 half_type = type;
374 }
375
376 /* So far so good. Since last_stmt was detected as a (summation) reduction,
377 we know that oprnd1 is the reduction variable (defined by a loop-header
378 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
379 Left to check that oprnd0 is defined by a (widen_)mult_expr */
380 if (TREE_CODE (oprnd0) != SSA_NAME)
381 return NULL;
382
383 prod_type = half_type;
384 stmt = SSA_NAME_DEF_STMT (oprnd0);
385
386 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
387 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
388 return NULL;
389
390 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
391 inside the loop (in case we are analyzing an outer-loop). */
392 if (!is_gimple_assign (stmt))
393 return NULL;
394 stmt_vinfo = vinfo_for_stmt (stmt);
395 gcc_assert (stmt_vinfo);
396 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
397 return NULL;
398 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
399 return NULL;
400 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
401 {
402 /* Has been detected as a widening multiplication? */
403
404 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
405 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
406 return NULL;
407 stmt_vinfo = vinfo_for_stmt (stmt);
408 gcc_assert (stmt_vinfo);
409 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
410 oprnd00 = gimple_assign_rhs1 (stmt);
411 oprnd01 = gimple_assign_rhs2 (stmt);
412 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
413 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
414 }
415 else
416 {
417 tree half_type0, half_type1;
418 gimple *def_stmt;
419 tree oprnd0, oprnd1;
420
421 oprnd0 = gimple_assign_rhs1 (stmt);
422 oprnd1 = gimple_assign_rhs2 (stmt);
423 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
424 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
425 return NULL;
426 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
427 &promotion)
428 || !promotion)
429 return NULL;
430 oprnd00 = gimple_assign_rhs1 (def_stmt);
431 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
432 &promotion)
433 || !promotion)
434 return NULL;
435 oprnd01 = gimple_assign_rhs1 (def_stmt);
436 if (!types_compatible_p (half_type0, half_type1))
437 return NULL;
438 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
439 return NULL;
440 }
441
442 half_type = TREE_TYPE (oprnd00);
443 *type_in = half_type;
444 *type_out = type;
445
446 /* Pattern detected. Create a stmt to be used to replace the pattern: */
447 var = vect_recog_temp_ssa_var (type, NULL);
448 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
449 oprnd00, oprnd01, oprnd1);
450
451 if (dump_enabled_p ())
452 {
453 dump_printf_loc (MSG_NOTE, vect_location,
454 "vect_recog_dot_prod_pattern: detected: ");
455 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
456 }
457
458 return pattern_stmt;
459 }
460
461
462 /* Function vect_recog_sad_pattern
463
464 Try to find the following Sum of Absolute Difference (SAD) pattern:
465
466 type x_t, y_t;
467 signed TYPE1 diff, abs_diff;
468 TYPE2 sum = init;
469 loop:
470 sum_0 = phi <init, sum_1>
471 S1 x_t = ...
472 S2 y_t = ...
473 S3 x_T = (TYPE1) x_t;
474 S4 y_T = (TYPE1) y_t;
475 S5 diff = x_T - y_T;
476 S6 abs_diff = ABS_EXPR <diff>;
477 [S7 abs_diff = (TYPE2) abs_diff; #optional]
478 S8 sum_1 = abs_diff + sum_0;
479
480 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481 same size of 'TYPE1' or bigger. This is a special case of a reduction
482 computation.
483
484 Input:
485
486 * STMTS: Contains a stmt from which the pattern search begins. In the
487 example, when this function is called with S8, the pattern
488 {S3,S4,S5,S6,S7,S8} will be detected.
489
490 Output:
491
492 * TYPE_IN: The type of the input arguments to the pattern.
493
494 * TYPE_OUT: The type of the output of this pattern.
495
496 * Return value: A new stmt that will be used to replace the sequence of
497 stmts that constitute the pattern. In this case it will be:
498 SAD_EXPR <x_t, y_t, sum_0>
499 */
500
501 static gimple *
vect_recog_sad_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)502 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503 tree *type_out)
504 {
505 gimple *last_stmt = (*stmts)[0];
506 tree sad_oprnd0, sad_oprnd1;
507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508 tree half_type;
509 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
510 struct loop *loop;
511 bool promotion;
512
513 if (!loop_info)
514 return NULL;
515
516 loop = LOOP_VINFO_LOOP (loop_info);
517
518 /* We don't allow changing the order of the computation in the inner-loop
519 when doing outer-loop vectorization. */
520 if (loop && nested_in_vect_loop_p (loop, last_stmt))
521 return NULL;
522
523 if (!is_gimple_assign (last_stmt))
524 return NULL;
525
526 tree sum_type = gimple_expr_type (last_stmt);
527
528 /* Look for the following pattern
529 DX = (TYPE1) X;
530 DY = (TYPE1) Y;
531 DDIFF = DX - DY;
532 DAD = ABS_EXPR <DDIFF>;
533 DDPROD = (TYPE2) DPROD;
534 sum_1 = DAD + sum_0;
535 In which
536 - DX is at least double the size of X
537 - DY is at least double the size of Y
538 - DX, DY, DDIFF, DAD all have the same type
539 - sum is the same size of DAD or bigger
540 - sum has been recognized as a reduction variable.
541
542 This is equivalent to:
543 DDIFF = X w- Y; #widen sub
544 DAD = ABS_EXPR <DDIFF>;
545 sum_1 = DAD w+ sum_0; #widen summation
546 or
547 DDIFF = X w- Y; #widen sub
548 DAD = ABS_EXPR <DDIFF>;
549 sum_1 = DAD + sum_0; #summation
550 */
551
552 /* Starting from LAST_STMT, follow the defs of its uses in search
553 of the above pattern. */
554
555 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
556 return NULL;
557
558 tree plus_oprnd0, plus_oprnd1;
559
560 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
561 {
562 /* Has been detected as widening-summation? */
563
564 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
565 sum_type = gimple_expr_type (stmt);
566 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
567 return NULL;
568 plus_oprnd0 = gimple_assign_rhs1 (stmt);
569 plus_oprnd1 = gimple_assign_rhs2 (stmt);
570 half_type = TREE_TYPE (plus_oprnd0);
571 }
572 else
573 {
574 gimple *def_stmt;
575
576 if (!vect_reassociating_reduction_p (stmt_vinfo))
577 return NULL;
578 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
579 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
580 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
581 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
582 return NULL;
583
584 /* The type conversion could be promotion, demotion,
585 or just signed -> unsigned. */
586 if (type_conversion_p (plus_oprnd0, last_stmt, false,
587 &half_type, &def_stmt, &promotion))
588 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
589 else
590 half_type = sum_type;
591 }
592
593 /* So far so good. Since last_stmt was detected as a (summation) reduction,
594 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
595 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
596 Then check that plus_oprnd0 is defined by an abs_expr. */
597
598 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
599 return NULL;
600
601 tree abs_type = half_type;
602 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
603
604 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
605 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
606 return NULL;
607
608 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
609 inside the loop (in case we are analyzing an outer-loop). */
610 if (!is_gimple_assign (abs_stmt))
611 return NULL;
612
613 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
614 gcc_assert (abs_stmt_vinfo);
615 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
616 return NULL;
617 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
618 return NULL;
619
620 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
621 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
622 return NULL;
623 if (TYPE_UNSIGNED (abs_type))
624 return NULL;
625
626 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
627
628 if (TREE_CODE (abs_oprnd) != SSA_NAME)
629 return NULL;
630
631 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
632
633 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
634 if (!gimple_bb (diff_stmt)
635 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
636 return NULL;
637
638 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
639 inside the loop (in case we are analyzing an outer-loop). */
640 if (!is_gimple_assign (diff_stmt))
641 return NULL;
642
643 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
644 gcc_assert (diff_stmt_vinfo);
645 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
646 return NULL;
647 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
648 return NULL;
649
650 tree half_type0, half_type1;
651 gimple *def_stmt;
652
653 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
654 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
655
656 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
657 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
658 return NULL;
659 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
660 &half_type0, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
664
665 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
666 &half_type1, &def_stmt, &promotion)
667 || !promotion)
668 return NULL;
669 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
670
671 if (!types_compatible_p (half_type0, half_type1))
672 return NULL;
673 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
674 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
675 return NULL;
676
677 *type_in = TREE_TYPE (sad_oprnd0);
678 *type_out = sum_type;
679
680 /* Pattern detected. Create a stmt to be used to replace the pattern: */
681 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
682 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
683 sad_oprnd1, plus_oprnd1);
684
685 if (dump_enabled_p ())
686 {
687 dump_printf_loc (MSG_NOTE, vect_location,
688 "vect_recog_sad_pattern: detected: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
690 }
691
692 return pattern_stmt;
693 }
694
695
696 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697 and LSHIFT_EXPR.
698
699 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
700 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701
702 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
703 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
704 that satisfies the above restrictions, we can perform a widening opeartion
705 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
706 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
707
708 static bool
vect_handle_widen_op_by_const(gimple * stmt,enum tree_code code,tree const_oprnd,tree * oprnd,gimple ** wstmt,tree type,tree * half_type,gimple * def_stmt)709 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
710 tree const_oprnd, tree *oprnd,
711 gimple **wstmt, tree type,
712 tree *half_type, gimple *def_stmt)
713 {
714 tree new_type, new_oprnd;
715
716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
718
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724 {
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
728 }
729
730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
732
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734 return false;
735
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743 return false;
744
745 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
746 *oprnd = gimple_assign_rhs1 (def_stmt);
747 new_oprnd = make_ssa_name (new_type);
748 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
749 *oprnd = new_oprnd;
750
751 *half_type = new_type;
752 return true;
753 }
754
755
756 /* Function vect_recog_widen_mult_pattern
757
758 Try to find the following pattern:
759
760 type1 a_t;
761 type2 b_t;
762 TYPE a_T, b_T, prod_T;
763
764 S1 a_t = ;
765 S2 b_t = ;
766 S3 a_T = (TYPE) a_t;
767 S4 b_T = (TYPE) b_t;
768 S5 prod_T = a_T * b_T;
769
770 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
771
772 Also detect unsigned cases:
773
774 unsigned type1 a_t;
775 unsigned type2 b_t;
776 unsigned TYPE u_prod_T;
777 TYPE a_T, b_T, prod_T;
778
779 S1 a_t = ;
780 S2 b_t = ;
781 S3 a_T = (TYPE) a_t;
782 S4 b_T = (TYPE) b_t;
783 S5 prod_T = a_T * b_T;
784 S6 u_prod_T = (unsigned TYPE) prod_T;
785
786 and multiplication by constants:
787
788 type a_t;
789 TYPE a_T, prod_T;
790
791 S1 a_t = ;
792 S3 a_T = (TYPE) a_t;
793 S5 prod_T = a_T * CONST;
794
795 A special case of multiplication by constants is when 'TYPE' is 4 times
796 bigger than 'type', but CONST fits an intermediate type 2 times smaller
797 than 'TYPE'. In that case we create an additional pattern stmt for S3
798 to create a variable of the intermediate type, and perform widen-mult
799 on the intermediate type as well:
800
801 type a_t;
802 interm_type a_it;
803 TYPE a_T, prod_T, prod_T';
804
805 S1 a_t = ;
806 S3 a_T = (TYPE) a_t;
807 '--> a_it = (interm_type) a_t;
808 S5 prod_T = a_T * CONST;
809 '--> prod_T' = a_it w* CONST;
810
811 Input/Output:
812
813 * STMTS: Contains a stmt from which the pattern search begins. In the
814 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
815 is detected. In case of unsigned widen-mult, the original stmt (S5) is
816 replaced with S6 in STMTS. In case of multiplication by a constant
817 of an intermediate type (the last case above), STMTS also contains S3
818 (inserted before S5).
819
820 Output:
821
822 * TYPE_IN: The type of the input arguments to the pattern.
823
824 * TYPE_OUT: The type of the output of this pattern.
825
826 * Return value: A new stmt that will be used to replace the sequence of
827 stmts that constitute the pattern. In this case it will be:
828 WIDEN_MULT <a_t, b_t>
829 If the result of WIDEN_MULT needs to be converted to a larger type, the
830 returned stmt will be this type conversion stmt.
831 */
832
833 static gimple *
vect_recog_widen_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)834 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
835 tree *type_in, tree *type_out)
836 {
837 gimple *last_stmt = stmts->pop ();
838 gimple *def_stmt0, *def_stmt1;
839 tree oprnd0, oprnd1;
840 tree type, half_type0, half_type1;
841 gimple *new_stmt = NULL, *pattern_stmt = NULL;
842 tree vectype, vecitype;
843 tree var;
844 enum tree_code dummy_code;
845 int dummy_int;
846 vec<tree> dummy_vec;
847 bool op1_ok;
848 bool promotion;
849
850 if (!is_gimple_assign (last_stmt))
851 return NULL;
852
853 type = gimple_expr_type (last_stmt);
854
855 /* Starting from LAST_STMT, follow the defs of its uses in search
856 of the above pattern. */
857
858 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
859 return NULL;
860
861 oprnd0 = gimple_assign_rhs1 (last_stmt);
862 oprnd1 = gimple_assign_rhs2 (last_stmt);
863 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
864 || !types_compatible_p (TREE_TYPE (oprnd1), type))
865 return NULL;
866
867 /* Check argument 0. */
868 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
869 &promotion)
870 || !promotion)
871 return NULL;
872 /* Check argument 1. */
873 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
874 &def_stmt1, &promotion);
875
876 if (op1_ok && promotion)
877 {
878 oprnd0 = gimple_assign_rhs1 (def_stmt0);
879 oprnd1 = gimple_assign_rhs1 (def_stmt1);
880 }
881 else
882 {
883 if (TREE_CODE (oprnd1) == INTEGER_CST
884 && TREE_CODE (half_type0) == INTEGER_TYPE
885 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
886 &oprnd0, &new_stmt, type,
887 &half_type0, def_stmt0))
888 {
889 half_type1 = half_type0;
890 oprnd1 = fold_convert (half_type1, oprnd1);
891 }
892 else
893 return NULL;
894 }
895
896 /* If the two arguments have different sizes, convert the one with
897 the smaller type into the larger type. */
898 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
899 {
900 /* If we already used up the single-stmt slot give up. */
901 if (new_stmt)
902 return NULL;
903
904 tree* oprnd = NULL;
905 gimple *def_stmt = NULL;
906
907 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
908 {
909 def_stmt = def_stmt0;
910 half_type0 = half_type1;
911 oprnd = &oprnd0;
912 }
913 else
914 {
915 def_stmt = def_stmt1;
916 half_type1 = half_type0;
917 oprnd = &oprnd1;
918 }
919
920 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
921 tree new_oprnd = make_ssa_name (half_type0);
922 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
923 *oprnd = new_oprnd;
924 }
925
926 /* Handle unsigned case. Look for
927 S6 u_prod_T = (unsigned TYPE) prod_T;
928 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
929 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
930 {
931 gimple *use_stmt;
932 tree use_lhs;
933 tree use_type;
934
935 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
936 return NULL;
937
938 use_stmt = vect_single_imm_use (last_stmt);
939 if (!use_stmt || !is_gimple_assign (use_stmt)
940 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
941 return NULL;
942
943 use_lhs = gimple_assign_lhs (use_stmt);
944 use_type = TREE_TYPE (use_lhs);
945 if (!INTEGRAL_TYPE_P (use_type)
946 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
947 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
948 return NULL;
949
950 type = use_type;
951 last_stmt = use_stmt;
952 }
953
954 if (!types_compatible_p (half_type0, half_type1))
955 return NULL;
956
957 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
958 to get an intermediate result of type ITYPE. In this case we need
959 to build a statement to convert this intermediate result to type TYPE. */
960 tree itype = type;
961 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
962 itype = build_nonstandard_integer_type
963 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
964 TYPE_UNSIGNED (type));
965
966 /* Pattern detected. */
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_recog_widen_mult_pattern: detected:\n");
970
971 /* Check target support */
972 vectype = get_vectype_for_scalar_type (half_type0);
973 vecitype = get_vectype_for_scalar_type (itype);
974 if (!vectype
975 || !vecitype
976 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
977 vecitype, vectype,
978 &dummy_code, &dummy_code,
979 &dummy_int, &dummy_vec))
980 return NULL;
981
982 *type_in = vectype;
983 *type_out = get_vectype_for_scalar_type (type);
984
985 /* Pattern supported. Create a stmt to be used to replace the pattern: */
986 var = vect_recog_temp_ssa_var (itype, NULL);
987 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
988
989 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
990 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
991
992 /* If the original two operands have different sizes, we may need to convert
993 the smaller one into the larget type. If this is the case, at this point
994 the new stmt is already built. */
995 if (new_stmt)
996 {
997 append_pattern_def_seq (stmt_vinfo, new_stmt);
998 stmt_vec_info new_stmt_info
999 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1000 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1001 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1002 }
1003
1004 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005 the result of the widen-mult operation into type TYPE. */
1006 if (itype != type)
1007 {
1008 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1009 stmt_vec_info pattern_stmt_info
1010 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1011 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1012 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1013 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1014 NOP_EXPR,
1015 gimple_assign_lhs (pattern_stmt));
1016 }
1017
1018 if (dump_enabled_p ())
1019 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1020
1021 stmts->safe_push (last_stmt);
1022 return pattern_stmt;
1023 }
1024
1025
1026 /* Function vect_recog_pow_pattern
1027
1028 Try to find the following pattern:
1029
1030 x = POW (y, N);
1031
1032 with POW being one of pow, powf, powi, powif and N being
1033 either 2 or 0.5.
1034
1035 Input:
1036
1037 * LAST_STMT: A stmt from which the pattern search begins.
1038
1039 Output:
1040
1041 * TYPE_IN: The type of the input arguments to the pattern.
1042
1043 * TYPE_OUT: The type of the output of this pattern.
1044
1045 * Return value: A new stmt that will be used to replace the sequence of
1046 stmts that constitute the pattern. In this case it will be:
1047 x = x * x
1048 or
1049 x = sqrt (x)
1050 */
1051
1052 static gimple *
vect_recog_pow_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1053 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1054 tree *type_out)
1055 {
1056 gimple *last_stmt = (*stmts)[0];
1057 tree base, exp;
1058 gimple *stmt;
1059 tree var;
1060
1061 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1062 return NULL;
1063
1064 switch (gimple_call_combined_fn (last_stmt))
1065 {
1066 CASE_CFN_POW:
1067 CASE_CFN_POWI:
1068 break;
1069
1070 default:
1071 return NULL;
1072 }
1073
1074 base = gimple_call_arg (last_stmt, 0);
1075 exp = gimple_call_arg (last_stmt, 1);
1076 if (TREE_CODE (exp) != REAL_CST
1077 && TREE_CODE (exp) != INTEGER_CST)
1078 {
1079 if (flag_unsafe_math_optimizations
1080 && TREE_CODE (base) == REAL_CST
1081 && !gimple_call_internal_p (last_stmt))
1082 {
1083 combined_fn log_cfn;
1084 built_in_function exp_bfn;
1085 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1086 {
1087 case BUILT_IN_POW:
1088 log_cfn = CFN_BUILT_IN_LOG;
1089 exp_bfn = BUILT_IN_EXP;
1090 break;
1091 case BUILT_IN_POWF:
1092 log_cfn = CFN_BUILT_IN_LOGF;
1093 exp_bfn = BUILT_IN_EXPF;
1094 break;
1095 case BUILT_IN_POWL:
1096 log_cfn = CFN_BUILT_IN_LOGL;
1097 exp_bfn = BUILT_IN_EXPL;
1098 break;
1099 default:
1100 return NULL;
1101 }
1102 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1103 tree exp_decl = builtin_decl_implicit (exp_bfn);
1104 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1105 does that, but if C is a power of 2, we want to use
1106 exp2 (log2 (C) * x) in the non-vectorized version, but for
1107 vectorization we don't have vectorized exp2. */
1108 if (logc
1109 && TREE_CODE (logc) == REAL_CST
1110 && exp_decl
1111 && lookup_attribute ("omp declare simd",
1112 DECL_ATTRIBUTES (exp_decl)))
1113 {
1114 cgraph_node *node = cgraph_node::get_create (exp_decl);
1115 if (node->simd_clones == NULL)
1116 {
1117 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1118 || node->definition)
1119 return NULL;
1120 expand_simd_clones (node);
1121 if (node->simd_clones == NULL)
1122 return NULL;
1123 }
1124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1126 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1127 new_pattern_def_seq (stmt_vinfo, g);
1128 *type_in = TREE_TYPE (base);
1129 *type_out = NULL_TREE;
1130 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1131 g = gimple_build_call (exp_decl, 1, def);
1132 gimple_call_set_lhs (g, res);
1133 return g;
1134 }
1135 }
1136
1137 return NULL;
1138 }
1139
1140 /* We now have a pow or powi builtin function call with a constant
1141 exponent. */
1142
1143 *type_out = NULL_TREE;
1144
1145 /* Catch squaring. */
1146 if ((tree_fits_shwi_p (exp)
1147 && tree_to_shwi (exp) == 2)
1148 || (TREE_CODE (exp) == REAL_CST
1149 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1150 {
1151 *type_in = TREE_TYPE (base);
1152
1153 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1154 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1155 return stmt;
1156 }
1157
1158 /* Catch square root. */
1159 if (TREE_CODE (exp) == REAL_CST
1160 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1161 {
1162 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1163 if (*type_in
1164 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1165 OPTIMIZE_FOR_SPEED))
1166 {
1167 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1168 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1169 gimple_call_set_lhs (stmt, var);
1170 gimple_call_set_nothrow (stmt, true);
1171 return stmt;
1172 }
1173 }
1174
1175 return NULL;
1176 }
1177
1178
1179 /* Function vect_recog_widen_sum_pattern
1180
1181 Try to find the following pattern:
1182
1183 type x_t;
1184 TYPE x_T, sum = init;
1185 loop:
1186 sum_0 = phi <init, sum_1>
1187 S1 x_t = *p;
1188 S2 x_T = (TYPE) x_t;
1189 S3 sum_1 = x_T + sum_0;
1190
1191 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1192 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1193 a special case of a reduction computation.
1194
1195 Input:
1196
1197 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1198 when this function is called with S3, the pattern {S2,S3} will be detected.
1199
1200 Output:
1201
1202 * TYPE_IN: The type of the input arguments to the pattern.
1203
1204 * TYPE_OUT: The type of the output of this pattern.
1205
1206 * Return value: A new stmt that will be used to replace the sequence of
1207 stmts that constitute the pattern. In this case it will be:
1208 WIDEN_SUM <x_t, sum_0>
1209
1210 Note: The widening-sum idiom is a widening reduction pattern that is
1211 vectorized without preserving all the intermediate results. It
1212 produces only N/2 (widened) results (by summing up pairs of
1213 intermediate results) rather than all N results. Therefore, we
1214 cannot allow this pattern when we want to get all the results and in
1215 the correct order (as is the case when this computation is in an
1216 inner-loop nested in an outer-loop that us being vectorized). */
1217
1218 static gimple *
vect_recog_widen_sum_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1219 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1220 tree *type_out)
1221 {
1222 gimple *stmt, *last_stmt = (*stmts)[0];
1223 tree oprnd0, oprnd1;
1224 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1225 tree type, half_type;
1226 gimple *pattern_stmt;
1227 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 struct loop *loop;
1229 tree var;
1230 bool promotion;
1231
1232 if (!loop_info)
1233 return NULL;
1234
1235 loop = LOOP_VINFO_LOOP (loop_info);
1236
1237 /* We don't allow changing the order of the computation in the inner-loop
1238 when doing outer-loop vectorization. */
1239 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1240 return NULL;
1241
1242 if (!is_gimple_assign (last_stmt))
1243 return NULL;
1244
1245 type = gimple_expr_type (last_stmt);
1246
1247 /* Look for the following pattern
1248 DX = (TYPE) X;
1249 sum_1 = DX + sum_0;
1250 In which DX is at least double the size of X, and sum_1 has been
1251 recognized as a reduction variable.
1252 */
1253
1254 /* Starting from LAST_STMT, follow the defs of its uses in search
1255 of the above pattern. */
1256
1257 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1258 return NULL;
1259
1260 if (!vect_reassociating_reduction_p (stmt_vinfo))
1261 return NULL;
1262
1263 oprnd0 = gimple_assign_rhs1 (last_stmt);
1264 oprnd1 = gimple_assign_rhs2 (last_stmt);
1265 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1266 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1267 return NULL;
1268
1269 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1270 we know that oprnd1 is the reduction variable (defined by a loop-header
1271 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1272 Left to check that oprnd0 is defined by a cast from type 'type' to type
1273 'TYPE'. */
1274
1275 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1276 &promotion)
1277 || !promotion)
1278 return NULL;
1279
1280 oprnd0 = gimple_assign_rhs1 (stmt);
1281 *type_in = half_type;
1282 *type_out = type;
1283
1284 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1285 var = vect_recog_temp_ssa_var (type, NULL);
1286 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1287
1288 if (dump_enabled_p ())
1289 {
1290 dump_printf_loc (MSG_NOTE, vect_location,
1291 "vect_recog_widen_sum_pattern: detected: ");
1292 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1293 }
1294
1295 return pattern_stmt;
1296 }
1297
1298
1299 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1300
1301 Input:
1302 STMT - a statement to check.
1303 DEF - we support operations with two operands, one of which is constant.
1304 The other operand can be defined by a demotion operation, or by a
1305 previous statement in a sequence of over-promoted operations. In the
1306 later case DEF is used to replace that operand. (It is defined by a
1307 pattern statement we created for the previous statement in the
1308 sequence).
1309
1310 Input/output:
1311 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1312 NULL, it's the type of DEF.
1313 STMTS - additional pattern statements. If a pattern statement (type
1314 conversion) is created in this function, its original statement is
1315 added to STMTS.
1316
1317 Output:
1318 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1319 operands to use in the new pattern statement for STMT (will be created
1320 in vect_recog_over_widening_pattern ()).
1321 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1322 statements for STMT: the first one is a type promotion and the second
1323 one is the operation itself. We return the type promotion statement
1324 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1325 the second pattern statement. */
1326
1327 static bool
vect_operation_fits_smaller_type(gimple * stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple ** new_def_stmt,vec<gimple * > * stmts)1328 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1329 tree *op0, tree *op1, gimple **new_def_stmt,
1330 vec<gimple *> *stmts)
1331 {
1332 enum tree_code code;
1333 tree const_oprnd, oprnd;
1334 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1335 gimple *def_stmt, *new_stmt;
1336 bool first = false;
1337 bool promotion;
1338
1339 *op0 = NULL_TREE;
1340 *op1 = NULL_TREE;
1341 *new_def_stmt = NULL;
1342
1343 if (!is_gimple_assign (stmt))
1344 return false;
1345
1346 code = gimple_assign_rhs_code (stmt);
1347 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1348 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1349 return false;
1350
1351 oprnd = gimple_assign_rhs1 (stmt);
1352 const_oprnd = gimple_assign_rhs2 (stmt);
1353 type = gimple_expr_type (stmt);
1354
1355 if (TREE_CODE (oprnd) != SSA_NAME
1356 || TREE_CODE (const_oprnd) != INTEGER_CST)
1357 return false;
1358
1359 /* If oprnd has other uses besides that in stmt we cannot mark it
1360 as being part of a pattern only. */
1361 if (!has_single_use (oprnd))
1362 return false;
1363
1364 /* If we are in the middle of a sequence, we use DEF from a previous
1365 statement. Otherwise, OPRND has to be a result of type promotion. */
1366 if (*new_type)
1367 {
1368 half_type = *new_type;
1369 oprnd = def;
1370 }
1371 else
1372 {
1373 first = true;
1374 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1375 &promotion)
1376 || !promotion
1377 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1378 return false;
1379 }
1380
1381 /* Can we perform the operation on a smaller type? */
1382 switch (code)
1383 {
1384 case BIT_IOR_EXPR:
1385 case BIT_XOR_EXPR:
1386 case BIT_AND_EXPR:
1387 if (!int_fits_type_p (const_oprnd, half_type))
1388 {
1389 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1390 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1391 return false;
1392
1393 interm_type = build_nonstandard_integer_type (
1394 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1395 if (!int_fits_type_p (const_oprnd, interm_type))
1396 return false;
1397 }
1398
1399 break;
1400
1401 case LSHIFT_EXPR:
1402 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1403 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1404 return false;
1405
1406 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1407 (e.g., if the original value was char, the shift amount is at most 8
1408 if we want to use short). */
1409 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1410 return false;
1411
1412 interm_type = build_nonstandard_integer_type (
1413 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1414
1415 if (!vect_supportable_shift (code, interm_type))
1416 return false;
1417
1418 break;
1419
1420 case RSHIFT_EXPR:
1421 if (vect_supportable_shift (code, half_type))
1422 break;
1423
1424 /* Try intermediate type - HALF_TYPE is not supported. */
1425 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1426 return false;
1427
1428 interm_type = build_nonstandard_integer_type (
1429 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1430
1431 if (!vect_supportable_shift (code, interm_type))
1432 return false;
1433
1434 break;
1435
1436 default:
1437 gcc_unreachable ();
1438 }
1439
1440 /* There are four possible cases:
1441 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1442 the first statement in the sequence)
1443 a. The original, HALF_TYPE, is not enough - we replace the promotion
1444 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1445 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1446 promotion.
1447 2. OPRND is defined by a pattern statement we created.
1448 a. Its type is not sufficient for the operation, we create a new stmt:
1449 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1450 this statement in NEW_DEF_STMT, and it is later put in
1451 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1452 b. OPRND is good to use in the new statement. */
1453 if (first)
1454 {
1455 if (interm_type)
1456 {
1457 /* Replace the original type conversion HALF_TYPE->TYPE with
1458 HALF_TYPE->INTERM_TYPE. */
1459 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1460 {
1461 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1462 /* Check if the already created pattern stmt is what we need. */
1463 if (!is_gimple_assign (new_stmt)
1464 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1465 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1466 return false;
1467
1468 stmts->safe_push (def_stmt);
1469 oprnd = gimple_assign_lhs (new_stmt);
1470 }
1471 else
1472 {
1473 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1474 oprnd = gimple_assign_rhs1 (def_stmt);
1475 new_oprnd = make_ssa_name (interm_type);
1476 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1478 stmts->safe_push (def_stmt);
1479 oprnd = new_oprnd;
1480 }
1481 }
1482 else
1483 {
1484 /* Retrieve the operand before the type promotion. */
1485 oprnd = gimple_assign_rhs1 (def_stmt);
1486 }
1487 }
1488 else
1489 {
1490 if (interm_type)
1491 {
1492 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1493 new_oprnd = make_ssa_name (interm_type);
1494 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1495 oprnd = new_oprnd;
1496 *new_def_stmt = new_stmt;
1497 }
1498
1499 /* Otherwise, OPRND is already set. */
1500 }
1501
1502 if (interm_type)
1503 *new_type = interm_type;
1504 else
1505 *new_type = half_type;
1506
1507 *op0 = oprnd;
1508 *op1 = fold_convert (*new_type, const_oprnd);
1509
1510 return true;
1511 }
1512
1513
1514 /* Try to find a statement or a sequence of statements that can be performed
1515 on a smaller type:
1516
1517 type x_t;
1518 TYPE x_T, res0_T, res1_T;
1519 loop:
1520 S1 x_t = *p;
1521 S2 x_T = (TYPE) x_t;
1522 S3 res0_T = op (x_T, C0);
1523 S4 res1_T = op (res0_T, C1);
1524 S5 ... = () res1_T; - type demotion
1525
1526 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1527 constants.
1528 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1529 be 'type' or some intermediate type. For now, we expect S5 to be a type
1530 demotion operation. We also check that S3 and S4 have only one use. */
1531
1532 static gimple *
vect_recog_over_widening_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1533 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1534 tree *type_in, tree *type_out)
1535 {
1536 gimple *stmt = stmts->pop ();
1537 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1538 *use_stmt = NULL;
1539 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1540 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1541 bool first;
1542 tree type = NULL;
1543
1544 first = true;
1545 while (1)
1546 {
1547 if (!vinfo_for_stmt (stmt)
1548 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1549 return NULL;
1550
1551 new_def_stmt = NULL;
1552 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1553 &op0, &op1, &new_def_stmt,
1554 stmts))
1555 {
1556 if (first)
1557 return NULL;
1558 else
1559 break;
1560 }
1561
1562 /* STMT can be performed on a smaller type. Check its uses. */
1563 use_stmt = vect_single_imm_use (stmt);
1564 if (!use_stmt || !is_gimple_assign (use_stmt))
1565 return NULL;
1566
1567 /* Create pattern statement for STMT. */
1568 vectype = get_vectype_for_scalar_type (new_type);
1569 if (!vectype)
1570 return NULL;
1571
1572 /* We want to collect all the statements for which we create pattern
1573 statetments, except for the case when the last statement in the
1574 sequence doesn't have a corresponding pattern statement. In such
1575 case we associate the last pattern statement with the last statement
1576 in the sequence. Therefore, we only add the original statement to
1577 the list if we know that it is not the last. */
1578 if (prev_stmt)
1579 stmts->safe_push (prev_stmt);
1580
1581 var = vect_recog_temp_ssa_var (new_type, NULL);
1582 pattern_stmt
1583 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1584 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1585 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1586
1587 if (dump_enabled_p ())
1588 {
1589 dump_printf_loc (MSG_NOTE, vect_location,
1590 "created pattern stmt: ");
1591 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1592 }
1593
1594 type = gimple_expr_type (stmt);
1595 prev_stmt = stmt;
1596 stmt = use_stmt;
1597
1598 first = false;
1599 }
1600
1601 /* We got a sequence. We expect it to end with a type demotion operation.
1602 Otherwise, we quit (for now). There are three possible cases: the
1603 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1604 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1605 NEW_TYPE differs (we create a new conversion statement). */
1606 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1607 {
1608 use_lhs = gimple_assign_lhs (use_stmt);
1609 use_type = TREE_TYPE (use_lhs);
1610 /* Support only type demotion or signedess change. */
1611 if (!INTEGRAL_TYPE_P (use_type)
1612 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1613 return NULL;
1614
1615 /* Check that NEW_TYPE is not bigger than the conversion result. */
1616 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1617 return NULL;
1618
1619 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1620 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1621 {
1622 /* Create NEW_TYPE->USE_TYPE conversion. */
1623 new_oprnd = make_ssa_name (use_type);
1624 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1625 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1626
1627 *type_in = get_vectype_for_scalar_type (new_type);
1628 *type_out = get_vectype_for_scalar_type (use_type);
1629
1630 /* We created a pattern statement for the last statement in the
1631 sequence, so we don't need to associate it with the pattern
1632 statement created for PREV_STMT. Therefore, we add PREV_STMT
1633 to the list in order to mark it later in vect_pattern_recog_1. */
1634 if (prev_stmt)
1635 stmts->safe_push (prev_stmt);
1636 }
1637 else
1638 {
1639 if (prev_stmt)
1640 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1641 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1642
1643 *type_in = vectype;
1644 *type_out = NULL_TREE;
1645 }
1646
1647 stmts->safe_push (use_stmt);
1648 }
1649 else
1650 /* TODO: support general case, create a conversion to the correct type. */
1651 return NULL;
1652
1653 /* Pattern detected. */
1654 if (dump_enabled_p ())
1655 {
1656 dump_printf_loc (MSG_NOTE, vect_location,
1657 "vect_recog_over_widening_pattern: detected: ");
1658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1659 }
1660
1661 return pattern_stmt;
1662 }
1663
1664 /* Detect widening shift pattern:
1665
1666 type a_t;
1667 TYPE a_T, res_T;
1668
1669 S1 a_t = ;
1670 S2 a_T = (TYPE) a_t;
1671 S3 res_T = a_T << CONST;
1672
1673 where type 'TYPE' is at least double the size of type 'type'.
1674
1675 Also detect cases where the shift result is immediately converted
1676 to another type 'result_type' that is no larger in size than 'TYPE'.
1677 In those cases we perform a widen-shift that directly results in
1678 'result_type', to avoid a possible over-widening situation:
1679
1680 type a_t;
1681 TYPE a_T, res_T;
1682 result_type res_result;
1683
1684 S1 a_t = ;
1685 S2 a_T = (TYPE) a_t;
1686 S3 res_T = a_T << CONST;
1687 S4 res_result = (result_type) res_T;
1688 '--> res_result' = a_t w<< CONST;
1689
1690 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1691 create an additional pattern stmt for S2 to create a variable of an
1692 intermediate type, and perform widen-shift on the intermediate type:
1693
1694 type a_t;
1695 interm_type a_it;
1696 TYPE a_T, res_T, res_T';
1697
1698 S1 a_t = ;
1699 S2 a_T = (TYPE) a_t;
1700 '--> a_it = (interm_type) a_t;
1701 S3 res_T = a_T << CONST;
1702 '--> res_T' = a_it <<* CONST;
1703
1704 Input/Output:
1705
1706 * STMTS: Contains a stmt from which the pattern search begins.
1707 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1708 in STMTS. When an intermediate type is used and a pattern statement is
1709 created for S2, we also put S2 here (before S3).
1710
1711 Output:
1712
1713 * TYPE_IN: The type of the input arguments to the pattern.
1714
1715 * TYPE_OUT: The type of the output of this pattern.
1716
1717 * Return value: A new stmt that will be used to replace the sequence of
1718 stmts that constitute the pattern. In this case it will be:
1719 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1720
1721 static gimple *
vect_recog_widen_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1722 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1723 tree *type_in, tree *type_out)
1724 {
1725 gimple *last_stmt = stmts->pop ();
1726 gimple *def_stmt0;
1727 tree oprnd0, oprnd1;
1728 tree type, half_type0;
1729 gimple *pattern_stmt;
1730 tree vectype, vectype_out = NULL_TREE;
1731 tree var;
1732 enum tree_code dummy_code;
1733 int dummy_int;
1734 vec<tree> dummy_vec;
1735 gimple *use_stmt;
1736 bool promotion;
1737
1738 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1739 return NULL;
1740
1741 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1742 return NULL;
1743
1744 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1745 return NULL;
1746
1747 oprnd0 = gimple_assign_rhs1 (last_stmt);
1748 oprnd1 = gimple_assign_rhs2 (last_stmt);
1749 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1750 return NULL;
1751
1752 /* Check operand 0: it has to be defined by a type promotion. */
1753 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1754 &promotion)
1755 || !promotion)
1756 return NULL;
1757
1758 /* Check operand 1: has to be positive. We check that it fits the type
1759 in vect_handle_widen_op_by_const (). */
1760 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1761 return NULL;
1762
1763 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1764 type = gimple_expr_type (last_stmt);
1765
1766 /* Check for subsequent conversion to another type. */
1767 use_stmt = vect_single_imm_use (last_stmt);
1768 if (use_stmt && is_gimple_assign (use_stmt)
1769 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1770 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1771 {
1772 tree use_lhs = gimple_assign_lhs (use_stmt);
1773 tree use_type = TREE_TYPE (use_lhs);
1774
1775 if (INTEGRAL_TYPE_P (use_type)
1776 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1777 {
1778 last_stmt = use_stmt;
1779 type = use_type;
1780 }
1781 }
1782
1783 /* Check if this a widening operation. */
1784 gimple *wstmt = NULL;
1785 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1786 &oprnd0, &wstmt,
1787 type, &half_type0, def_stmt0))
1788 return NULL;
1789
1790 /* Pattern detected. */
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_NOTE, vect_location,
1793 "vect_recog_widen_shift_pattern: detected:\n");
1794
1795 /* Check target support. */
1796 vectype = get_vectype_for_scalar_type (half_type0);
1797 vectype_out = get_vectype_for_scalar_type (type);
1798
1799 if (!vectype
1800 || !vectype_out
1801 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1802 vectype_out, vectype,
1803 &dummy_code, &dummy_code,
1804 &dummy_int, &dummy_vec))
1805 return NULL;
1806
1807 *type_in = vectype;
1808 *type_out = vectype_out;
1809
1810 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1811 var = vect_recog_temp_ssa_var (type, NULL);
1812 pattern_stmt
1813 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1814 if (wstmt)
1815 {
1816 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1817 new_pattern_def_seq (stmt_vinfo, wstmt);
1818 stmt_vec_info new_stmt_info
1819 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1820 set_vinfo_for_stmt (wstmt, new_stmt_info);
1821 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1822 }
1823
1824 if (dump_enabled_p ())
1825 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1826
1827 stmts->safe_push (last_stmt);
1828 return pattern_stmt;
1829 }
1830
1831 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1832
1833 type a_t, b_t, c_t;
1834
1835 S0 a_t = b_t r<< c_t;
1836
1837 Input/Output:
1838
1839 * STMTS: Contains a stmt from which the pattern search begins,
1840 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1841 with a sequence:
1842
1843 S1 d_t = -c_t;
1844 S2 e_t = d_t & (B - 1);
1845 S3 f_t = b_t << c_t;
1846 S4 g_t = b_t >> e_t;
1847 S0 a_t = f_t | g_t;
1848
1849 where B is element bitsize of type.
1850
1851 Output:
1852
1853 * TYPE_IN: The type of the input arguments to the pattern.
1854
1855 * TYPE_OUT: The type of the output of this pattern.
1856
1857 * Return value: A new stmt that will be used to replace the rotate
1858 S0 stmt. */
1859
1860 static gimple *
vect_recog_rotate_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1861 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1862 {
1863 gimple *last_stmt = stmts->pop ();
1864 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1865 gimple *pattern_stmt, *def_stmt;
1866 enum tree_code rhs_code;
1867 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1868 vec_info *vinfo = stmt_vinfo->vinfo;
1869 enum vect_def_type dt;
1870 optab optab1, optab2;
1871 edge ext_def = NULL;
1872
1873 if (!is_gimple_assign (last_stmt))
1874 return NULL;
1875
1876 rhs_code = gimple_assign_rhs_code (last_stmt);
1877 switch (rhs_code)
1878 {
1879 case LROTATE_EXPR:
1880 case RROTATE_EXPR:
1881 break;
1882 default:
1883 return NULL;
1884 }
1885
1886 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1887 return NULL;
1888
1889 lhs = gimple_assign_lhs (last_stmt);
1890 oprnd0 = gimple_assign_rhs1 (last_stmt);
1891 type = TREE_TYPE (oprnd0);
1892 oprnd1 = gimple_assign_rhs2 (last_stmt);
1893 if (TREE_CODE (oprnd0) != SSA_NAME
1894 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1895 || !INTEGRAL_TYPE_P (type)
1896 || !TYPE_UNSIGNED (type))
1897 return NULL;
1898
1899 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1900 return NULL;
1901
1902 if (dt != vect_internal_def
1903 && dt != vect_constant_def
1904 && dt != vect_external_def)
1905 return NULL;
1906
1907 vectype = get_vectype_for_scalar_type (type);
1908 if (vectype == NULL_TREE)
1909 return NULL;
1910
1911 /* If vector/vector or vector/scalar rotate is supported by the target,
1912 don't do anything here. */
1913 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1914 if (optab1
1915 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1916 return NULL;
1917
1918 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1919 {
1920 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1921 if (optab2
1922 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1923 return NULL;
1924 }
1925
1926 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1927 don't do anything here either. */
1928 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1929 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1930 if (!optab1
1931 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1932 || !optab2
1933 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1934 {
1935 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1936 return NULL;
1937 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1938 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1939 if (!optab1
1940 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1941 || !optab2
1942 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1943 return NULL;
1944 }
1945
1946 *type_in = vectype;
1947 *type_out = vectype;
1948 if (*type_in == NULL_TREE)
1949 return NULL;
1950
1951 if (dt == vect_external_def
1952 && TREE_CODE (oprnd1) == SSA_NAME
1953 && is_a <loop_vec_info> (vinfo))
1954 {
1955 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1956 ext_def = loop_preheader_edge (loop);
1957 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1958 {
1959 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1960 if (bb == NULL
1961 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1962 ext_def = NULL;
1963 }
1964 }
1965
1966 def = NULL_TREE;
1967 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1968 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1969 def = oprnd1;
1970 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1971 {
1972 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1973 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1974 && TYPE_PRECISION (TREE_TYPE (rhs1))
1975 == TYPE_PRECISION (type))
1976 def = rhs1;
1977 }
1978
1979 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1980 if (def == NULL_TREE)
1981 {
1982 def = vect_recog_temp_ssa_var (type, NULL);
1983 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1984 append_pattern_def_seq (stmt_vinfo, def_stmt);
1985 }
1986 stype = TREE_TYPE (def);
1987 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1988
1989 if (TREE_CODE (def) == INTEGER_CST)
1990 {
1991 if (!tree_fits_uhwi_p (def)
1992 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1993 || integer_zerop (def))
1994 return NULL;
1995 def2 = build_int_cst (stype,
1996 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1997 }
1998 else
1999 {
2000 tree vecstype = get_vectype_for_scalar_type (stype);
2001 stmt_vec_info def_stmt_vinfo;
2002
2003 if (vecstype == NULL_TREE)
2004 return NULL;
2005 def2 = vect_recog_temp_ssa_var (stype, NULL);
2006 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2007 if (ext_def)
2008 {
2009 basic_block new_bb
2010 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2011 gcc_assert (!new_bb);
2012 }
2013 else
2014 {
2015 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2016 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2017 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2018 append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 }
2020
2021 def2 = vect_recog_temp_ssa_var (stype, NULL);
2022 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2023 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2024 gimple_assign_lhs (def_stmt), mask);
2025 if (ext_def)
2026 {
2027 basic_block new_bb
2028 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2029 gcc_assert (!new_bb);
2030 }
2031 else
2032 {
2033 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2034 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2035 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2036 append_pattern_def_seq (stmt_vinfo, def_stmt);
2037 }
2038 }
2039
2040 var1 = vect_recog_temp_ssa_var (type, NULL);
2041 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2042 ? LSHIFT_EXPR : RSHIFT_EXPR,
2043 oprnd0, def);
2044 append_pattern_def_seq (stmt_vinfo, def_stmt);
2045
2046 var2 = vect_recog_temp_ssa_var (type, NULL);
2047 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2048 ? RSHIFT_EXPR : LSHIFT_EXPR,
2049 oprnd0, def2);
2050 append_pattern_def_seq (stmt_vinfo, def_stmt);
2051
2052 /* Pattern detected. */
2053 if (dump_enabled_p ())
2054 dump_printf_loc (MSG_NOTE, vect_location,
2055 "vect_recog_rotate_pattern: detected:\n");
2056
2057 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2058 var = vect_recog_temp_ssa_var (type, NULL);
2059 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2060
2061 if (dump_enabled_p ())
2062 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2063
2064 stmts->safe_push (last_stmt);
2065 return pattern_stmt;
2066 }
2067
2068 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2069 vectorized:
2070
2071 type a_t;
2072 TYPE b_T, res_T;
2073
2074 S1 a_t = ;
2075 S2 b_T = ;
2076 S3 res_T = b_T op a_t;
2077
2078 where type 'TYPE' is a type with different size than 'type',
2079 and op is <<, >> or rotate.
2080
2081 Also detect cases:
2082
2083 type a_t;
2084 TYPE b_T, c_T, res_T;
2085
2086 S0 c_T = ;
2087 S1 a_t = (type) c_T;
2088 S2 b_T = ;
2089 S3 res_T = b_T op a_t;
2090
2091 Input/Output:
2092
2093 * STMTS: Contains a stmt from which the pattern search begins,
2094 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2095 with a shift/rotate which has same type on both operands, in the
2096 second case just b_T op c_T, in the first case with added cast
2097 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2098
2099 Output:
2100
2101 * TYPE_IN: The type of the input arguments to the pattern.
2102
2103 * TYPE_OUT: The type of the output of this pattern.
2104
2105 * Return value: A new stmt that will be used to replace the shift/rotate
2106 S3 stmt. */
2107
2108 static gimple *
vect_recog_vector_vector_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2109 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2110 tree *type_in, tree *type_out)
2111 {
2112 gimple *last_stmt = stmts->pop ();
2113 tree oprnd0, oprnd1, lhs, var;
2114 gimple *pattern_stmt, *def_stmt;
2115 enum tree_code rhs_code;
2116 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2117 vec_info *vinfo = stmt_vinfo->vinfo;
2118 enum vect_def_type dt;
2119
2120 if (!is_gimple_assign (last_stmt))
2121 return NULL;
2122
2123 rhs_code = gimple_assign_rhs_code (last_stmt);
2124 switch (rhs_code)
2125 {
2126 case LSHIFT_EXPR:
2127 case RSHIFT_EXPR:
2128 case LROTATE_EXPR:
2129 case RROTATE_EXPR:
2130 break;
2131 default:
2132 return NULL;
2133 }
2134
2135 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2136 return NULL;
2137
2138 lhs = gimple_assign_lhs (last_stmt);
2139 oprnd0 = gimple_assign_rhs1 (last_stmt);
2140 oprnd1 = gimple_assign_rhs2 (last_stmt);
2141 if (TREE_CODE (oprnd0) != SSA_NAME
2142 || TREE_CODE (oprnd1) != SSA_NAME
2143 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2144 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2145 || TYPE_PRECISION (TREE_TYPE (lhs))
2146 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2147 return NULL;
2148
2149 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2150 return NULL;
2151
2152 if (dt != vect_internal_def)
2153 return NULL;
2154
2155 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2156 *type_out = *type_in;
2157 if (*type_in == NULL_TREE)
2158 return NULL;
2159
2160 tree def = NULL_TREE;
2161 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2162 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2163 {
2164 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2165 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2166 && TYPE_PRECISION (TREE_TYPE (rhs1))
2167 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2168 {
2169 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2170 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2171 def = rhs1;
2172 else
2173 {
2174 tree mask
2175 = build_low_bits_mask (TREE_TYPE (rhs1),
2176 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2177 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2178 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2179 new_pattern_def_seq (stmt_vinfo, def_stmt);
2180 }
2181 }
2182 }
2183
2184 if (def == NULL_TREE)
2185 {
2186 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2187 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2188 new_pattern_def_seq (stmt_vinfo, def_stmt);
2189 }
2190
2191 /* Pattern detected. */
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_NOTE, vect_location,
2194 "vect_recog_vector_vector_shift_pattern: detected:\n");
2195
2196 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2197 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2198 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2199
2200 if (dump_enabled_p ())
2201 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2202
2203 stmts->safe_push (last_stmt);
2204 return pattern_stmt;
2205 }
2206
2207 /* Return true iff the target has a vector optab implementing the operation
2208 CODE on type VECTYPE. */
2209
2210 static bool
target_has_vecop_for_code(tree_code code,tree vectype)2211 target_has_vecop_for_code (tree_code code, tree vectype)
2212 {
2213 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2214 return voptab
2215 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2216 }
2217
2218 /* Verify that the target has optabs of VECTYPE to perform all the steps
2219 needed by the multiplication-by-immediate synthesis algorithm described by
2220 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2221 present. Return true iff the target supports all the steps. */
2222
2223 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2224 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2225 tree vectype, bool synth_shift_p)
2226 {
2227 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2228 return false;
2229
2230 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2231 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2232
2233 if (var == negate_variant
2234 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2235 return false;
2236
2237 /* If we must synthesize shifts with additions make sure that vector
2238 addition is available. */
2239 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2240 return false;
2241
2242 for (int i = 1; i < alg->ops; i++)
2243 {
2244 switch (alg->op[i])
2245 {
2246 case alg_shift:
2247 break;
2248 case alg_add_t_m2:
2249 case alg_add_t2_m:
2250 case alg_add_factor:
2251 if (!supports_vplus)
2252 return false;
2253 break;
2254 case alg_sub_t_m2:
2255 case alg_sub_t2_m:
2256 case alg_sub_factor:
2257 if (!supports_vminus)
2258 return false;
2259 break;
2260 case alg_unknown:
2261 case alg_m:
2262 case alg_zero:
2263 case alg_impossible:
2264 return false;
2265 default:
2266 gcc_unreachable ();
2267 }
2268 }
2269
2270 return true;
2271 }
2272
2273 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2274 putting the final result in DEST. Append all statements but the last into
2275 VINFO. Return the last statement. */
2276
2277 static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2278 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2279 stmt_vec_info vinfo)
2280 {
2281 HOST_WIDE_INT i;
2282 tree itype = TREE_TYPE (op);
2283 tree prev_res = op;
2284 gcc_assert (amnt >= 0);
2285 for (i = 0; i < amnt; i++)
2286 {
2287 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2288 : dest;
2289 gimple *stmt
2290 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2291 prev_res = tmp_var;
2292 if (i < amnt - 1)
2293 append_pattern_def_seq (vinfo, stmt);
2294 else
2295 return stmt;
2296 }
2297 gcc_unreachable ();
2298 return NULL;
2299 }
2300
2301 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2302 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2303 the process if necessary. Append the resulting assignment statements
2304 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2305 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2306 left shifts using additions. */
2307
2308 static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2309 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2310 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2311 {
2312 if (integer_zerop (op2)
2313 && (code == LSHIFT_EXPR
2314 || code == PLUS_EXPR))
2315 {
2316 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2317 return op1;
2318 }
2319
2320 gimple *stmt;
2321 tree itype = TREE_TYPE (op1);
2322 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2323
2324 if (code == LSHIFT_EXPR
2325 && synth_shift_p)
2326 {
2327 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2328 stmt_vinfo);
2329 append_pattern_def_seq (stmt_vinfo, stmt);
2330 return tmp_var;
2331 }
2332
2333 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2334 append_pattern_def_seq (stmt_vinfo, stmt);
2335 return tmp_var;
2336 }
2337
2338 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2339 and simple arithmetic operations to be vectorized. Record the statements
2340 produced in STMT_VINFO and return the last statement in the sequence or
2341 NULL if it's not possible to synthesize such a multiplication.
2342 This function mirrors the behavior of expand_mult_const in expmed.c but
2343 works on tree-ssa form. */
2344
2345 static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2346 vect_synth_mult_by_constant (tree op, tree val,
2347 stmt_vec_info stmt_vinfo)
2348 {
2349 tree itype = TREE_TYPE (op);
2350 machine_mode mode = TYPE_MODE (itype);
2351 struct algorithm alg;
2352 mult_variant variant;
2353 if (!tree_fits_shwi_p (val))
2354 return NULL;
2355
2356 /* Multiplication synthesis by shifts, adds and subs can introduce
2357 signed overflow where the original operation didn't. Perform the
2358 operations on an unsigned type and cast back to avoid this.
2359 In the future we may want to relax this for synthesis algorithms
2360 that we can prove do not cause unexpected overflow. */
2361 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2362
2363 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2364
2365 /* Targets that don't support vector shifts but support vector additions
2366 can synthesize shifts that way. */
2367 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2368
2369 HOST_WIDE_INT hwval = tree_to_shwi (val);
2370 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2371 The vectorizer's benefit analysis will decide whether it's beneficial
2372 to do this. */
2373 bool possible = choose_mult_variant (mode, hwval, &alg,
2374 &variant, MAX_COST);
2375 if (!possible)
2376 return NULL;
2377
2378 tree vectype = get_vectype_for_scalar_type (multtype);
2379
2380 if (!vectype
2381 || !target_supports_mult_synth_alg (&alg, variant,
2382 vectype, synth_shift_p))
2383 return NULL;
2384
2385 tree accumulator;
2386
2387 /* Clear out the sequence of statements so we can populate it below. */
2388 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2389 gimple *stmt = NULL;
2390
2391 if (cast_to_unsigned_p)
2392 {
2393 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2394 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2395 append_pattern_def_seq (stmt_vinfo, stmt);
2396 op = tmp_op;
2397 }
2398
2399 if (alg.op[0] == alg_zero)
2400 accumulator = build_int_cst (multtype, 0);
2401 else
2402 accumulator = op;
2403
2404 bool needs_fixup = (variant == negate_variant)
2405 || (variant == add_variant);
2406
2407 for (int i = 1; i < alg.ops; i++)
2408 {
2409 tree shft_log = build_int_cst (multtype, alg.log[i]);
2410 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2411 tree tmp_var = NULL_TREE;
2412
2413 switch (alg.op[i])
2414 {
2415 case alg_shift:
2416 if (synth_shift_p)
2417 stmt
2418 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2419 stmt_vinfo);
2420 else
2421 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2422 shft_log);
2423 break;
2424 case alg_add_t_m2:
2425 tmp_var
2426 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2427 stmt_vinfo, synth_shift_p);
2428 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2429 tmp_var);
2430 break;
2431 case alg_sub_t_m2:
2432 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2433 shft_log, stmt_vinfo,
2434 synth_shift_p);
2435 /* In some algorithms the first step involves zeroing the
2436 accumulator. If subtracting from such an accumulator
2437 just emit the negation directly. */
2438 if (integer_zerop (accumulator))
2439 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2440 else
2441 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2442 tmp_var);
2443 break;
2444 case alg_add_t2_m:
2445 tmp_var
2446 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2447 stmt_vinfo, synth_shift_p);
2448 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2449 break;
2450 case alg_sub_t2_m:
2451 tmp_var
2452 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2453 stmt_vinfo, synth_shift_p);
2454 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2455 break;
2456 case alg_add_factor:
2457 tmp_var
2458 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2459 stmt_vinfo, synth_shift_p);
2460 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2461 tmp_var);
2462 break;
2463 case alg_sub_factor:
2464 tmp_var
2465 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2466 stmt_vinfo, synth_shift_p);
2467 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2468 accumulator);
2469 break;
2470 default:
2471 gcc_unreachable ();
2472 }
2473 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2474 but rather return it directly. */
2475
2476 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2477 append_pattern_def_seq (stmt_vinfo, stmt);
2478 accumulator = accum_tmp;
2479 }
2480 if (variant == negate_variant)
2481 {
2482 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2483 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2484 accumulator = accum_tmp;
2485 if (cast_to_unsigned_p)
2486 append_pattern_def_seq (stmt_vinfo, stmt);
2487 }
2488 else if (variant == add_variant)
2489 {
2490 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2491 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2492 accumulator = accum_tmp;
2493 if (cast_to_unsigned_p)
2494 append_pattern_def_seq (stmt_vinfo, stmt);
2495 }
2496 /* Move back to a signed if needed. */
2497 if (cast_to_unsigned_p)
2498 {
2499 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2500 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2501 }
2502
2503 return stmt;
2504 }
2505
2506 /* Detect multiplication by constant and convert it into a sequence of
2507 shifts and additions, subtractions, negations. We reuse the
2508 choose_mult_variant algorithms from expmed.c
2509
2510 Input/Output:
2511
2512 STMTS: Contains a stmt from which the pattern search begins,
2513 i.e. the mult stmt.
2514
2515 Output:
2516
2517 * TYPE_IN: The type of the input arguments to the pattern.
2518
2519 * TYPE_OUT: The type of the output of this pattern.
2520
2521 * Return value: A new stmt that will be used to replace
2522 the multiplication. */
2523
2524 static gimple *
vect_recog_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2525 vect_recog_mult_pattern (vec<gimple *> *stmts,
2526 tree *type_in, tree *type_out)
2527 {
2528 gimple *last_stmt = stmts->pop ();
2529 tree oprnd0, oprnd1, vectype, itype;
2530 gimple *pattern_stmt;
2531 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2532
2533 if (!is_gimple_assign (last_stmt))
2534 return NULL;
2535
2536 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2537 return NULL;
2538
2539 oprnd0 = gimple_assign_rhs1 (last_stmt);
2540 oprnd1 = gimple_assign_rhs2 (last_stmt);
2541 itype = TREE_TYPE (oprnd0);
2542
2543 if (TREE_CODE (oprnd0) != SSA_NAME
2544 || TREE_CODE (oprnd1) != INTEGER_CST
2545 || !INTEGRAL_TYPE_P (itype)
2546 || !type_has_mode_precision_p (itype))
2547 return NULL;
2548
2549 vectype = get_vectype_for_scalar_type (itype);
2550 if (vectype == NULL_TREE)
2551 return NULL;
2552
2553 /* If the target can handle vectorized multiplication natively,
2554 don't attempt to optimize this. */
2555 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2556 if (mul_optab != unknown_optab)
2557 {
2558 machine_mode vec_mode = TYPE_MODE (vectype);
2559 int icode = (int) optab_handler (mul_optab, vec_mode);
2560 if (icode != CODE_FOR_nothing)
2561 return NULL;
2562 }
2563
2564 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2565 if (!pattern_stmt)
2566 return NULL;
2567
2568 /* Pattern detected. */
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_NOTE, vect_location,
2571 "vect_recog_mult_pattern: detected:\n");
2572
2573 if (dump_enabled_p ())
2574 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2575 pattern_stmt,0);
2576
2577 stmts->safe_push (last_stmt);
2578 *type_in = vectype;
2579 *type_out = vectype;
2580
2581 return pattern_stmt;
2582 }
2583
2584 /* Detect a signed division by a constant that wouldn't be
2585 otherwise vectorized:
2586
2587 type a_t, b_t;
2588
2589 S1 a_t = b_t / N;
2590
2591 where type 'type' is an integral type and N is a constant.
2592
2593 Similarly handle modulo by a constant:
2594
2595 S4 a_t = b_t % N;
2596
2597 Input/Output:
2598
2599 * STMTS: Contains a stmt from which the pattern search begins,
2600 i.e. the division stmt. S1 is replaced by if N is a power
2601 of two constant and type is signed:
2602 S3 y_t = b_t < 0 ? N - 1 : 0;
2603 S2 x_t = b_t + y_t;
2604 S1' a_t = x_t >> log2 (N);
2605
2606 S4 is replaced if N is a power of two constant and
2607 type is signed by (where *_T temporaries have unsigned type):
2608 S9 y_T = b_t < 0 ? -1U : 0U;
2609 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2610 S7 z_t = (type) z_T;
2611 S6 w_t = b_t + z_t;
2612 S5 x_t = w_t & (N - 1);
2613 S4' a_t = x_t - z_t;
2614
2615 Output:
2616
2617 * TYPE_IN: The type of the input arguments to the pattern.
2618
2619 * TYPE_OUT: The type of the output of this pattern.
2620
2621 * Return value: A new stmt that will be used to replace the division
2622 S1 or modulo S4 stmt. */
2623
2624 static gimple *
vect_recog_divmod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2625 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2626 tree *type_in, tree *type_out)
2627 {
2628 gimple *last_stmt = stmts->pop ();
2629 tree oprnd0, oprnd1, vectype, itype, cond;
2630 gimple *pattern_stmt, *def_stmt;
2631 enum tree_code rhs_code;
2632 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2633 vec_info *vinfo = stmt_vinfo->vinfo;
2634 optab optab;
2635 tree q;
2636 int dummy_int, prec;
2637 stmt_vec_info def_stmt_vinfo;
2638
2639 if (!is_gimple_assign (last_stmt))
2640 return NULL;
2641
2642 rhs_code = gimple_assign_rhs_code (last_stmt);
2643 switch (rhs_code)
2644 {
2645 case TRUNC_DIV_EXPR:
2646 case TRUNC_MOD_EXPR:
2647 break;
2648 default:
2649 return NULL;
2650 }
2651
2652 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2653 return NULL;
2654
2655 oprnd0 = gimple_assign_rhs1 (last_stmt);
2656 oprnd1 = gimple_assign_rhs2 (last_stmt);
2657 itype = TREE_TYPE (oprnd0);
2658 if (TREE_CODE (oprnd0) != SSA_NAME
2659 || TREE_CODE (oprnd1) != INTEGER_CST
2660 || TREE_CODE (itype) != INTEGER_TYPE
2661 || !type_has_mode_precision_p (itype))
2662 return NULL;
2663
2664 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2665 vectype = get_vectype_for_scalar_type (itype);
2666 if (vectype == NULL_TREE)
2667 return NULL;
2668
2669 /* If the target can handle vectorized division or modulo natively,
2670 don't attempt to optimize this. */
2671 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2672 if (optab != unknown_optab)
2673 {
2674 machine_mode vec_mode = TYPE_MODE (vectype);
2675 int icode = (int) optab_handler (optab, vec_mode);
2676 if (icode != CODE_FOR_nothing)
2677 return NULL;
2678 }
2679
2680 prec = TYPE_PRECISION (itype);
2681 if (integer_pow2p (oprnd1))
2682 {
2683 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2684 return NULL;
2685
2686 /* Pattern detected. */
2687 if (dump_enabled_p ())
2688 dump_printf_loc (MSG_NOTE, vect_location,
2689 "vect_recog_divmod_pattern: detected:\n");
2690
2691 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2692 build_int_cst (itype, 0));
2693 if (rhs_code == TRUNC_DIV_EXPR)
2694 {
2695 tree var = vect_recog_temp_ssa_var (itype, NULL);
2696 tree shift;
2697 def_stmt
2698 = gimple_build_assign (var, COND_EXPR, cond,
2699 fold_build2 (MINUS_EXPR, itype, oprnd1,
2700 build_int_cst (itype, 1)),
2701 build_int_cst (itype, 0));
2702 new_pattern_def_seq (stmt_vinfo, def_stmt);
2703 var = vect_recog_temp_ssa_var (itype, NULL);
2704 def_stmt
2705 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2706 gimple_assign_lhs (def_stmt));
2707 append_pattern_def_seq (stmt_vinfo, def_stmt);
2708
2709 shift = build_int_cst (itype, tree_log2 (oprnd1));
2710 pattern_stmt
2711 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2712 RSHIFT_EXPR, var, shift);
2713 }
2714 else
2715 {
2716 tree signmask;
2717 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2718 if (compare_tree_int (oprnd1, 2) == 0)
2719 {
2720 signmask = vect_recog_temp_ssa_var (itype, NULL);
2721 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2722 build_int_cst (itype, 1),
2723 build_int_cst (itype, 0));
2724 append_pattern_def_seq (stmt_vinfo, def_stmt);
2725 }
2726 else
2727 {
2728 tree utype
2729 = build_nonstandard_integer_type (prec, 1);
2730 tree vecutype = get_vectype_for_scalar_type (utype);
2731 tree shift
2732 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2733 - tree_log2 (oprnd1));
2734 tree var = vect_recog_temp_ssa_var (utype, NULL);
2735
2736 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2737 build_int_cst (utype, -1),
2738 build_int_cst (utype, 0));
2739 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2740 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2741 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2742 append_pattern_def_seq (stmt_vinfo, def_stmt);
2743 var = vect_recog_temp_ssa_var (utype, NULL);
2744 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2745 gimple_assign_lhs (def_stmt),
2746 shift);
2747 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2748 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2749 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2750 append_pattern_def_seq (stmt_vinfo, def_stmt);
2751 signmask = vect_recog_temp_ssa_var (itype, NULL);
2752 def_stmt
2753 = gimple_build_assign (signmask, NOP_EXPR, var);
2754 append_pattern_def_seq (stmt_vinfo, def_stmt);
2755 }
2756 def_stmt
2757 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2758 PLUS_EXPR, oprnd0, signmask);
2759 append_pattern_def_seq (stmt_vinfo, def_stmt);
2760 def_stmt
2761 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2762 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2763 fold_build2 (MINUS_EXPR, itype, oprnd1,
2764 build_int_cst (itype, 1)));
2765 append_pattern_def_seq (stmt_vinfo, def_stmt);
2766
2767 pattern_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2770 signmask);
2771 }
2772
2773 if (dump_enabled_p ())
2774 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2775 0);
2776
2777 stmts->safe_push (last_stmt);
2778
2779 *type_in = vectype;
2780 *type_out = vectype;
2781 return pattern_stmt;
2782 }
2783
2784 if (prec > HOST_BITS_PER_WIDE_INT
2785 || integer_zerop (oprnd1))
2786 return NULL;
2787
2788 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2789 return NULL;
2790
2791 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2792
2793 if (TYPE_UNSIGNED (itype))
2794 {
2795 unsigned HOST_WIDE_INT mh, ml;
2796 int pre_shift, post_shift;
2797 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2798 & GET_MODE_MASK (itype_mode));
2799 tree t1, t2, t3, t4;
2800
2801 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2802 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2803 return NULL;
2804
2805 /* Find a suitable multiplier and right shift count
2806 instead of multiplying with D. */
2807 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2808
2809 /* If the suggested multiplier is more than SIZE bits, we can do better
2810 for even divisors, using an initial right shift. */
2811 if (mh != 0 && (d & 1) == 0)
2812 {
2813 pre_shift = ctz_or_zero (d);
2814 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2815 &ml, &post_shift, &dummy_int);
2816 gcc_assert (!mh);
2817 }
2818 else
2819 pre_shift = 0;
2820
2821 if (mh != 0)
2822 {
2823 if (post_shift - 1 >= prec)
2824 return NULL;
2825
2826 /* t1 = oprnd0 h* ml;
2827 t2 = oprnd0 - t1;
2828 t3 = t2 >> 1;
2829 t4 = t1 + t3;
2830 q = t4 >> (post_shift - 1); */
2831 t1 = vect_recog_temp_ssa_var (itype, NULL);
2832 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2833 build_int_cst (itype, ml));
2834 append_pattern_def_seq (stmt_vinfo, def_stmt);
2835
2836 t2 = vect_recog_temp_ssa_var (itype, NULL);
2837 def_stmt
2838 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2839 append_pattern_def_seq (stmt_vinfo, def_stmt);
2840
2841 t3 = vect_recog_temp_ssa_var (itype, NULL);
2842 def_stmt
2843 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2844 append_pattern_def_seq (stmt_vinfo, def_stmt);
2845
2846 t4 = vect_recog_temp_ssa_var (itype, NULL);
2847 def_stmt
2848 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2849
2850 if (post_shift != 1)
2851 {
2852 append_pattern_def_seq (stmt_vinfo, def_stmt);
2853
2854 q = vect_recog_temp_ssa_var (itype, NULL);
2855 pattern_stmt
2856 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2857 build_int_cst (itype, post_shift - 1));
2858 }
2859 else
2860 {
2861 q = t4;
2862 pattern_stmt = def_stmt;
2863 }
2864 }
2865 else
2866 {
2867 if (pre_shift >= prec || post_shift >= prec)
2868 return NULL;
2869
2870 /* t1 = oprnd0 >> pre_shift;
2871 t2 = t1 h* ml;
2872 q = t2 >> post_shift; */
2873 if (pre_shift)
2874 {
2875 t1 = vect_recog_temp_ssa_var (itype, NULL);
2876 def_stmt
2877 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2878 build_int_cst (NULL, pre_shift));
2879 append_pattern_def_seq (stmt_vinfo, def_stmt);
2880 }
2881 else
2882 t1 = oprnd0;
2883
2884 t2 = vect_recog_temp_ssa_var (itype, NULL);
2885 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2886 build_int_cst (itype, ml));
2887
2888 if (post_shift)
2889 {
2890 append_pattern_def_seq (stmt_vinfo, def_stmt);
2891
2892 q = vect_recog_temp_ssa_var (itype, NULL);
2893 def_stmt
2894 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2895 build_int_cst (itype, post_shift));
2896 }
2897 else
2898 q = t2;
2899
2900 pattern_stmt = def_stmt;
2901 }
2902 }
2903 else
2904 {
2905 unsigned HOST_WIDE_INT ml;
2906 int post_shift;
2907 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2908 unsigned HOST_WIDE_INT abs_d;
2909 bool add = false;
2910 tree t1, t2, t3, t4;
2911
2912 /* Give up for -1. */
2913 if (d == -1)
2914 return NULL;
2915
2916 /* Since d might be INT_MIN, we have to cast to
2917 unsigned HOST_WIDE_INT before negating to avoid
2918 undefined signed overflow. */
2919 abs_d = (d >= 0
2920 ? (unsigned HOST_WIDE_INT) d
2921 : - (unsigned HOST_WIDE_INT) d);
2922
2923 /* n rem d = n rem -d */
2924 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2925 {
2926 d = abs_d;
2927 oprnd1 = build_int_cst (itype, abs_d);
2928 }
2929 else if (HOST_BITS_PER_WIDE_INT >= prec
2930 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2931 /* This case is not handled correctly below. */
2932 return NULL;
2933
2934 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2935 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2936 {
2937 add = true;
2938 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2939 }
2940 if (post_shift >= prec)
2941 return NULL;
2942
2943 /* t1 = oprnd0 h* ml; */
2944 t1 = vect_recog_temp_ssa_var (itype, NULL);
2945 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2946 build_int_cst (itype, ml));
2947
2948 if (add)
2949 {
2950 /* t2 = t1 + oprnd0; */
2951 append_pattern_def_seq (stmt_vinfo, def_stmt);
2952 t2 = vect_recog_temp_ssa_var (itype, NULL);
2953 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2954 }
2955 else
2956 t2 = t1;
2957
2958 if (post_shift)
2959 {
2960 /* t3 = t2 >> post_shift; */
2961 append_pattern_def_seq (stmt_vinfo, def_stmt);
2962 t3 = vect_recog_temp_ssa_var (itype, NULL);
2963 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2964 build_int_cst (itype, post_shift));
2965 }
2966 else
2967 t3 = t2;
2968
2969 wide_int oprnd0_min, oprnd0_max;
2970 int msb = 1;
2971 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2972 {
2973 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2974 msb = 0;
2975 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2976 msb = -1;
2977 }
2978
2979 if (msb == 0 && d >= 0)
2980 {
2981 /* q = t3; */
2982 q = t3;
2983 pattern_stmt = def_stmt;
2984 }
2985 else
2986 {
2987 /* t4 = oprnd0 >> (prec - 1);
2988 or if we know from VRP that oprnd0 >= 0
2989 t4 = 0;
2990 or if we know from VRP that oprnd0 < 0
2991 t4 = -1; */
2992 append_pattern_def_seq (stmt_vinfo, def_stmt);
2993 t4 = vect_recog_temp_ssa_var (itype, NULL);
2994 if (msb != 1)
2995 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2996 build_int_cst (itype, msb));
2997 else
2998 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2999 build_int_cst (itype, prec - 1));
3000 append_pattern_def_seq (stmt_vinfo, def_stmt);
3001
3002 /* q = t3 - t4; or q = t4 - t3; */
3003 q = vect_recog_temp_ssa_var (itype, NULL);
3004 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3005 d < 0 ? t3 : t4);
3006 }
3007 }
3008
3009 if (rhs_code == TRUNC_MOD_EXPR)
3010 {
3011 tree r, t1;
3012
3013 /* We divided. Now finish by:
3014 t1 = q * oprnd1;
3015 r = oprnd0 - t1; */
3016 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3017
3018 t1 = vect_recog_temp_ssa_var (itype, NULL);
3019 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3020 append_pattern_def_seq (stmt_vinfo, def_stmt);
3021
3022 r = vect_recog_temp_ssa_var (itype, NULL);
3023 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3024 }
3025
3026 /* Pattern detected. */
3027 if (dump_enabled_p ())
3028 {
3029 dump_printf_loc (MSG_NOTE, vect_location,
3030 "vect_recog_divmod_pattern: detected: ");
3031 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3032 }
3033
3034 stmts->safe_push (last_stmt);
3035
3036 *type_in = vectype;
3037 *type_out = vectype;
3038 return pattern_stmt;
3039 }
3040
3041 /* Function vect_recog_mixed_size_cond_pattern
3042
3043 Try to find the following pattern:
3044
3045 type x_t, y_t;
3046 TYPE a_T, b_T, c_T;
3047 loop:
3048 S1 a_T = x_t CMP y_t ? b_T : c_T;
3049
3050 where type 'TYPE' is an integral type which has different size
3051 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3052 than 'type', the constants need to fit into an integer type
3053 with the same width as 'type') or results of conversion from 'type'.
3054
3055 Input:
3056
3057 * LAST_STMT: A stmt from which the pattern search begins.
3058
3059 Output:
3060
3061 * TYPE_IN: The type of the input arguments to the pattern.
3062
3063 * TYPE_OUT: The type of the output of this pattern.
3064
3065 * Return value: A new stmt that will be used to replace the pattern.
3066 Additionally a def_stmt is added.
3067
3068 a_it = x_t CMP y_t ? b_it : c_it;
3069 a_T = (TYPE) a_it; */
3070
3071 static gimple *
vect_recog_mixed_size_cond_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3072 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3073 tree *type_out)
3074 {
3075 gimple *last_stmt = (*stmts)[0];
3076 tree cond_expr, then_clause, else_clause;
3077 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3078 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3079 gimple *pattern_stmt, *def_stmt;
3080 vec_info *vinfo = stmt_vinfo->vinfo;
3081 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3082 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3083 bool promotion;
3084 tree comp_scalar_type;
3085
3086 if (!is_gimple_assign (last_stmt)
3087 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3088 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3089 return NULL;
3090
3091 cond_expr = gimple_assign_rhs1 (last_stmt);
3092 then_clause = gimple_assign_rhs2 (last_stmt);
3093 else_clause = gimple_assign_rhs3 (last_stmt);
3094
3095 if (!COMPARISON_CLASS_P (cond_expr))
3096 return NULL;
3097
3098 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3099 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3100 if (comp_vectype == NULL_TREE)
3101 return NULL;
3102
3103 type = gimple_expr_type (last_stmt);
3104 if (types_compatible_p (type, comp_scalar_type)
3105 || ((TREE_CODE (then_clause) != INTEGER_CST
3106 || TREE_CODE (else_clause) != INTEGER_CST)
3107 && !INTEGRAL_TYPE_P (comp_scalar_type))
3108 || !INTEGRAL_TYPE_P (type))
3109 return NULL;
3110
3111 if ((TREE_CODE (then_clause) != INTEGER_CST
3112 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3113 &def_stmt0, &promotion))
3114 || (TREE_CODE (else_clause) != INTEGER_CST
3115 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3116 &def_stmt1, &promotion)))
3117 return NULL;
3118
3119 if (orig_type0 && orig_type1
3120 && !types_compatible_p (orig_type0, orig_type1))
3121 return NULL;
3122
3123 if (orig_type0)
3124 {
3125 if (!types_compatible_p (orig_type0, comp_scalar_type))
3126 return NULL;
3127 then_clause = gimple_assign_rhs1 (def_stmt0);
3128 itype = orig_type0;
3129 }
3130
3131 if (orig_type1)
3132 {
3133 if (!types_compatible_p (orig_type1, comp_scalar_type))
3134 return NULL;
3135 else_clause = gimple_assign_rhs1 (def_stmt1);
3136 itype = orig_type1;
3137 }
3138
3139
3140 HOST_WIDE_INT cmp_mode_size
3141 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3142
3143 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3144 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3145 return NULL;
3146
3147 vectype = get_vectype_for_scalar_type (type);
3148 if (vectype == NULL_TREE)
3149 return NULL;
3150
3151 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3152 return NULL;
3153
3154 if (itype == NULL_TREE)
3155 itype = build_nonstandard_integer_type (cmp_mode_size,
3156 TYPE_UNSIGNED (type));
3157
3158 if (itype == NULL_TREE
3159 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3160 return NULL;
3161
3162 vecitype = get_vectype_for_scalar_type (itype);
3163 if (vecitype == NULL_TREE)
3164 return NULL;
3165
3166 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3167 return NULL;
3168
3169 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3170 {
3171 if ((TREE_CODE (then_clause) == INTEGER_CST
3172 && !int_fits_type_p (then_clause, itype))
3173 || (TREE_CODE (else_clause) == INTEGER_CST
3174 && !int_fits_type_p (else_clause, itype)))
3175 return NULL;
3176 }
3177
3178 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3179 COND_EXPR, unshare_expr (cond_expr),
3180 fold_convert (itype, then_clause),
3181 fold_convert (itype, else_clause));
3182 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3183 NOP_EXPR, gimple_assign_lhs (def_stmt));
3184
3185 new_pattern_def_seq (stmt_vinfo, def_stmt);
3186 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3187 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3188 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3189 *type_in = vecitype;
3190 *type_out = vectype;
3191
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_NOTE, vect_location,
3194 "vect_recog_mixed_size_cond_pattern: detected:\n");
3195
3196 return pattern_stmt;
3197 }
3198
3199
3200 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3201 true if bool VAR can and should be optimized that way. Assume it shouldn't
3202 in case it's a result of a comparison which can be directly vectorized into
3203 a vector comparison. Fills in STMTS with all stmts visited during the
3204 walk. */
3205
3206 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3207 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3208 {
3209 gimple *def_stmt;
3210 enum vect_def_type dt;
3211 tree rhs1;
3212 enum tree_code rhs_code;
3213
3214 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3215 return false;
3216
3217 if (dt != vect_internal_def)
3218 return false;
3219
3220 if (!is_gimple_assign (def_stmt))
3221 return false;
3222
3223 if (stmts.contains (def_stmt))
3224 return true;
3225
3226 rhs1 = gimple_assign_rhs1 (def_stmt);
3227 rhs_code = gimple_assign_rhs_code (def_stmt);
3228 switch (rhs_code)
3229 {
3230 case SSA_NAME:
3231 if (! check_bool_pattern (rhs1, vinfo, stmts))
3232 return false;
3233 break;
3234
3235 CASE_CONVERT:
3236 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3237 return false;
3238 if (! check_bool_pattern (rhs1, vinfo, stmts))
3239 return false;
3240 break;
3241
3242 case BIT_NOT_EXPR:
3243 if (! check_bool_pattern (rhs1, vinfo, stmts))
3244 return false;
3245 break;
3246
3247 case BIT_AND_EXPR:
3248 case BIT_IOR_EXPR:
3249 case BIT_XOR_EXPR:
3250 if (! check_bool_pattern (rhs1, vinfo, stmts)
3251 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3252 return false;
3253 break;
3254
3255 default:
3256 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3257 {
3258 tree vecitype, comp_vectype;
3259
3260 /* If the comparison can throw, then is_gimple_condexpr will be
3261 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3262 if (stmt_could_throw_p (def_stmt))
3263 return false;
3264
3265 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3266 if (comp_vectype == NULL_TREE)
3267 return false;
3268
3269 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3270 if (mask_type
3271 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3272 return false;
3273
3274 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3275 {
3276 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3277 tree itype
3278 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3279 vecitype = get_vectype_for_scalar_type (itype);
3280 if (vecitype == NULL_TREE)
3281 return false;
3282 }
3283 else
3284 vecitype = comp_vectype;
3285 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3286 return false;
3287 }
3288 else
3289 return false;
3290 break;
3291 }
3292
3293 bool res = stmts.add (def_stmt);
3294 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3295 gcc_assert (!res);
3296
3297 return true;
3298 }
3299
3300
3301 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3302 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3303 pattern sequence. */
3304
3305 static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3306 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3307 {
3308 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3309 NOP_EXPR, var);
3310 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3311 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3312 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3313 append_pattern_def_seq (stmt_info, cast_stmt);
3314 return gimple_assign_lhs (cast_stmt);
3315 }
3316
3317 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3318 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3319 type, OUT_TYPE is the desired final integer type of the whole pattern.
3320 STMT_INFO is the info of the pattern root and is where pattern stmts should
3321 be associated with. DEFS is a map of pattern defs. */
3322
3323 static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3324 adjust_bool_pattern (tree var, tree out_type,
3325 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3326 {
3327 gimple *stmt = SSA_NAME_DEF_STMT (var);
3328 enum tree_code rhs_code, def_rhs_code;
3329 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3330 location_t loc;
3331 gimple *pattern_stmt, *def_stmt;
3332 tree trueval = NULL_TREE;
3333
3334 rhs1 = gimple_assign_rhs1 (stmt);
3335 rhs2 = gimple_assign_rhs2 (stmt);
3336 rhs_code = gimple_assign_rhs_code (stmt);
3337 loc = gimple_location (stmt);
3338 switch (rhs_code)
3339 {
3340 case SSA_NAME:
3341 CASE_CONVERT:
3342 irhs1 = *defs.get (rhs1);
3343 itype = TREE_TYPE (irhs1);
3344 pattern_stmt
3345 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3346 SSA_NAME, irhs1);
3347 break;
3348
3349 case BIT_NOT_EXPR:
3350 irhs1 = *defs.get (rhs1);
3351 itype = TREE_TYPE (irhs1);
3352 pattern_stmt
3353 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3354 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3355 break;
3356
3357 case BIT_AND_EXPR:
3358 /* Try to optimize x = y & (a < b ? 1 : 0); into
3359 x = (a < b ? y : 0);
3360
3361 E.g. for:
3362 bool a_b, b_b, c_b;
3363 TYPE d_T;
3364
3365 S1 a_b = x1 CMP1 y1;
3366 S2 b_b = x2 CMP2 y2;
3367 S3 c_b = a_b & b_b;
3368 S4 d_T = (TYPE) c_b;
3369
3370 we would normally emit:
3371
3372 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3373 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3374 S3' c_T = a_T & b_T;
3375 S4' d_T = c_T;
3376
3377 but we can save one stmt by using the
3378 result of one of the COND_EXPRs in the other COND_EXPR and leave
3379 BIT_AND_EXPR stmt out:
3380
3381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3383 S4' f_T = c_T;
3384
3385 At least when VEC_COND_EXPR is implemented using masks
3386 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3387 computes the comparison masks and ands it, in one case with
3388 all ones vector, in the other case with a vector register.
3389 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3390 often more expensive. */
3391 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3392 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3393 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3394 {
3395 irhs1 = *defs.get (rhs1);
3396 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3397 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3398 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3399 {
3400 rhs_code = def_rhs_code;
3401 rhs1 = def_rhs1;
3402 rhs2 = gimple_assign_rhs2 (def_stmt);
3403 trueval = irhs1;
3404 goto do_compare;
3405 }
3406 else
3407 irhs2 = *defs.get (rhs2);
3408 goto and_ior_xor;
3409 }
3410 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3411 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3413 {
3414 irhs2 = *defs.get (rhs2);
3415 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3417 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3418 {
3419 rhs_code = def_rhs_code;
3420 rhs1 = def_rhs1;
3421 rhs2 = gimple_assign_rhs2 (def_stmt);
3422 trueval = irhs2;
3423 goto do_compare;
3424 }
3425 else
3426 irhs1 = *defs.get (rhs1);
3427 goto and_ior_xor;
3428 }
3429 /* FALLTHRU */
3430 case BIT_IOR_EXPR:
3431 case BIT_XOR_EXPR:
3432 irhs1 = *defs.get (rhs1);
3433 irhs2 = *defs.get (rhs2);
3434 and_ior_xor:
3435 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3436 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3437 {
3438 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3439 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3440 int out_prec = TYPE_PRECISION (out_type);
3441 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3442 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3443 stmt_info);
3444 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3445 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3446 stmt_info);
3447 else
3448 {
3449 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3450 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3451 }
3452 }
3453 itype = TREE_TYPE (irhs1);
3454 pattern_stmt
3455 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3456 rhs_code, irhs1, irhs2);
3457 break;
3458
3459 default:
3460 do_compare:
3461 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3462 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3463 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3464 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3465 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3466 {
3467 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3468 itype
3469 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3470 }
3471 else
3472 itype = TREE_TYPE (rhs1);
3473 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3474 if (trueval == NULL_TREE)
3475 trueval = build_int_cst (itype, 1);
3476 else
3477 gcc_checking_assert (useless_type_conversion_p (itype,
3478 TREE_TYPE (trueval)));
3479 pattern_stmt
3480 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3481 COND_EXPR, cond_expr, trueval,
3482 build_int_cst (itype, 0));
3483 break;
3484 }
3485
3486 gimple_set_location (pattern_stmt, loc);
3487 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3488 pattern def seq stmts instead of just letting auto-detection do
3489 its work? */
3490 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3491 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3492 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3493 append_pattern_def_seq (stmt_info, pattern_stmt);
3494 defs.put (var, gimple_assign_lhs (pattern_stmt));
3495 }
3496
3497 /* Comparison function to qsort a vector of gimple stmts after UID. */
3498
3499 static int
sort_after_uid(const void * p1,const void * p2)3500 sort_after_uid (const void *p1, const void *p2)
3501 {
3502 const gimple *stmt1 = *(const gimple * const *)p1;
3503 const gimple *stmt2 = *(const gimple * const *)p2;
3504 return gimple_uid (stmt1) - gimple_uid (stmt2);
3505 }
3506
3507 /* Create pattern stmts for all stmts participating in the bool pattern
3508 specified by BOOL_STMT_SET and its root STMT with the desired type
3509 OUT_TYPE. Return the def of the pattern root. */
3510
3511 static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,gimple * stmt)3512 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3513 tree out_type, gimple *stmt)
3514 {
3515 /* Gather original stmts in the bool pattern in their order of appearance
3516 in the IL. */
3517 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3518 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3519 i != bool_stmt_set.end (); ++i)
3520 bool_stmts.quick_push (*i);
3521 bool_stmts.qsort (sort_after_uid);
3522
3523 /* Now process them in that order, producing pattern stmts. */
3524 hash_map <tree, tree> defs;
3525 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3526 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3527 out_type, vinfo_for_stmt (stmt), defs);
3528
3529 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3530 gimple *pattern_stmt
3531 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3532 return gimple_assign_lhs (pattern_stmt);
3533 }
3534
3535 /* Helper for search_type_for_mask. */
3536
3537 static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3538 search_type_for_mask_1 (tree var, vec_info *vinfo,
3539 hash_map<gimple *, tree> &cache)
3540 {
3541 gimple *def_stmt;
3542 enum vect_def_type dt;
3543 tree rhs1;
3544 enum tree_code rhs_code;
3545 tree res = NULL_TREE, res2;
3546
3547 if (TREE_CODE (var) != SSA_NAME)
3548 return NULL_TREE;
3549
3550 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3551 return NULL_TREE;
3552
3553 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3554 return NULL_TREE;
3555
3556 if (dt != vect_internal_def)
3557 return NULL_TREE;
3558
3559 if (!is_gimple_assign (def_stmt))
3560 return NULL_TREE;
3561
3562 tree *c = cache.get (def_stmt);
3563 if (c)
3564 return *c;
3565
3566 rhs_code = gimple_assign_rhs_code (def_stmt);
3567 rhs1 = gimple_assign_rhs1 (def_stmt);
3568
3569 switch (rhs_code)
3570 {
3571 case SSA_NAME:
3572 case BIT_NOT_EXPR:
3573 CASE_CONVERT:
3574 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3575 break;
3576
3577 case BIT_AND_EXPR:
3578 case BIT_IOR_EXPR:
3579 case BIT_XOR_EXPR:
3580 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3581 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3582 cache);
3583 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3584 res = res2;
3585 break;
3586
3587 default:
3588 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3589 {
3590 tree comp_vectype, mask_type;
3591
3592 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3593 {
3594 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3595 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3596 vinfo, cache);
3597 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3598 res = res2;
3599 break;
3600 }
3601
3602 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3603 if (comp_vectype == NULL_TREE)
3604 {
3605 res = NULL_TREE;
3606 break;
3607 }
3608
3609 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3610 if (!mask_type
3611 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3612 {
3613 res = NULL_TREE;
3614 break;
3615 }
3616
3617 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3618 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3619 {
3620 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3621 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3622 }
3623 else
3624 res = TREE_TYPE (rhs1);
3625 }
3626 }
3627
3628 cache.put (def_stmt, res);
3629 return res;
3630 }
3631
3632 /* Return the proper type for converting bool VAR into
3633 an integer value or NULL_TREE if no such type exists.
3634 The type is chosen so that converted value has the
3635 same number of elements as VAR's vector type. */
3636
3637 static tree
search_type_for_mask(tree var,vec_info * vinfo)3638 search_type_for_mask (tree var, vec_info *vinfo)
3639 {
3640 hash_map<gimple *, tree> cache;
3641 return search_type_for_mask_1 (var, vinfo, cache);
3642 }
3643
3644 /* Function vect_recog_bool_pattern
3645
3646 Try to find pattern like following:
3647
3648 bool a_b, b_b, c_b, d_b, e_b;
3649 TYPE f_T;
3650 loop:
3651 S1 a_b = x1 CMP1 y1;
3652 S2 b_b = x2 CMP2 y2;
3653 S3 c_b = a_b & b_b;
3654 S4 d_b = x3 CMP3 y3;
3655 S5 e_b = c_b | d_b;
3656 S6 f_T = (TYPE) e_b;
3657
3658 where type 'TYPE' is an integral type. Or a similar pattern
3659 ending in
3660
3661 S6 f_Y = e_b ? r_Y : s_Y;
3662
3663 as results from if-conversion of a complex condition.
3664
3665 Input:
3666
3667 * LAST_STMT: A stmt at the end from which the pattern
3668 search begins, i.e. cast of a bool to
3669 an integer type.
3670
3671 Output:
3672
3673 * TYPE_IN: The type of the input arguments to the pattern.
3674
3675 * TYPE_OUT: The type of the output of this pattern.
3676
3677 * Return value: A new stmt that will be used to replace the pattern.
3678
3679 Assuming size of TYPE is the same as size of all comparisons
3680 (otherwise some casts would be added where needed), the above
3681 sequence we create related pattern stmts:
3682 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3683 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3684 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3685 S5' e_T = c_T | d_T;
3686 S6' f_T = e_T;
3687
3688 Instead of the above S3' we could emit:
3689 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3690 S3' c_T = a_T | b_T;
3691 but the above is more efficient. */
3692
3693 static gimple *
vect_recog_bool_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3694 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3695 tree *type_out)
3696 {
3697 gimple *last_stmt = stmts->pop ();
3698 enum tree_code rhs_code;
3699 tree var, lhs, rhs, vectype;
3700 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3701 stmt_vec_info new_stmt_info;
3702 vec_info *vinfo = stmt_vinfo->vinfo;
3703 gimple *pattern_stmt;
3704
3705 if (!is_gimple_assign (last_stmt))
3706 return NULL;
3707
3708 var = gimple_assign_rhs1 (last_stmt);
3709 lhs = gimple_assign_lhs (last_stmt);
3710
3711 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3712 return NULL;
3713
3714 hash_set<gimple *> bool_stmts;
3715
3716 rhs_code = gimple_assign_rhs_code (last_stmt);
3717 if (CONVERT_EXPR_CODE_P (rhs_code))
3718 {
3719 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3720 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3721 return NULL;
3722 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3723 if (vectype == NULL_TREE)
3724 return NULL;
3725
3726 if (check_bool_pattern (var, vinfo, bool_stmts))
3727 {
3728 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3729 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3730 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3731 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3732 else
3733 pattern_stmt
3734 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3735 }
3736 else
3737 {
3738 tree type = search_type_for_mask (var, vinfo);
3739 tree cst0, cst1, tmp;
3740
3741 if (!type)
3742 return NULL;
3743
3744 /* We may directly use cond with narrowed type to avoid
3745 multiple cond exprs with following result packing and
3746 perform single cond with packed mask instead. In case
3747 of widening we better make cond first and then extract
3748 results. */
3749 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3750 type = TREE_TYPE (lhs);
3751
3752 cst0 = build_int_cst (type, 0);
3753 cst1 = build_int_cst (type, 1);
3754 tmp = vect_recog_temp_ssa_var (type, NULL);
3755 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3756
3757 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3758 {
3759 tree new_vectype = get_vectype_for_scalar_type (type);
3760 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3761 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3762 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3763 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3764
3765 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3767 }
3768 }
3769
3770 *type_out = vectype;
3771 *type_in = vectype;
3772 stmts->safe_push (last_stmt);
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_NOTE, vect_location,
3775 "vect_recog_bool_pattern: detected:\n");
3776
3777 return pattern_stmt;
3778 }
3779 else if (rhs_code == COND_EXPR
3780 && TREE_CODE (var) == SSA_NAME)
3781 {
3782 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3783 if (vectype == NULL_TREE)
3784 return NULL;
3785
3786 /* Build a scalar type for the boolean result that when
3787 vectorized matches the vector type of the result in
3788 size and number of elements. */
3789 unsigned prec
3790 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3791 TYPE_VECTOR_SUBPARTS (vectype));
3792
3793 tree type
3794 = build_nonstandard_integer_type (prec,
3795 TYPE_UNSIGNED (TREE_TYPE (var)));
3796 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3797 return NULL;
3798
3799 if (!check_bool_pattern (var, vinfo, bool_stmts))
3800 return NULL;
3801
3802 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3803
3804 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3805 pattern_stmt
3806 = gimple_build_assign (lhs, COND_EXPR,
3807 build2 (NE_EXPR, boolean_type_node,
3808 rhs, build_int_cst (type, 0)),
3809 gimple_assign_rhs2 (last_stmt),
3810 gimple_assign_rhs3 (last_stmt));
3811 *type_out = vectype;
3812 *type_in = vectype;
3813 stmts->safe_push (last_stmt);
3814 if (dump_enabled_p ())
3815 dump_printf_loc (MSG_NOTE, vect_location,
3816 "vect_recog_bool_pattern: detected:\n");
3817
3818 return pattern_stmt;
3819 }
3820 else if (rhs_code == SSA_NAME
3821 && STMT_VINFO_DATA_REF (stmt_vinfo))
3822 {
3823 stmt_vec_info pattern_stmt_info;
3824 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3825 gcc_assert (vectype != NULL_TREE);
3826 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3827 return NULL;
3828
3829 if (check_bool_pattern (var, vinfo, bool_stmts))
3830 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3831 else
3832 {
3833 tree type = search_type_for_mask (var, vinfo);
3834 tree cst0, cst1, new_vectype;
3835
3836 if (!type)
3837 return NULL;
3838
3839 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3840 type = TREE_TYPE (vectype);
3841
3842 cst0 = build_int_cst (type, 0);
3843 cst1 = build_int_cst (type, 1);
3844 new_vectype = get_vectype_for_scalar_type (type);
3845
3846 rhs = vect_recog_temp_ssa_var (type, NULL);
3847 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3848
3849 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3850 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3851 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3852 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3853 }
3854
3855 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3856 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3857 {
3858 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3859 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3860 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3861 rhs = rhs2;
3862 }
3863 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3864 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3865 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3866 STMT_VINFO_DATA_REF (pattern_stmt_info)
3867 = STMT_VINFO_DATA_REF (stmt_vinfo);
3868 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3869 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3870 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3871 *type_out = vectype;
3872 *type_in = vectype;
3873 stmts->safe_push (last_stmt);
3874 if (dump_enabled_p ())
3875 dump_printf_loc (MSG_NOTE, vect_location,
3876 "vect_recog_bool_pattern: detected:\n");
3877 return pattern_stmt;
3878 }
3879 else
3880 return NULL;
3881 }
3882
3883
3884 /* A helper for vect_recog_mask_conversion_pattern. Build
3885 conversion of MASK to a type suitable for masking VECTYPE.
3886 Built statement gets required vectype and is appended to
3887 a pattern sequence of STMT_VINFO.
3888
3889 Return converted mask. */
3890
3891 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo,vec_info * vinfo)3892 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3893 vec_info *vinfo)
3894 {
3895 gimple *stmt;
3896 tree masktype, tmp;
3897 stmt_vec_info new_stmt_info;
3898
3899 masktype = build_same_sized_truth_vector_type (vectype);
3900 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3901 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3902 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3903 set_vinfo_for_stmt (stmt, new_stmt_info);
3904 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3905 append_pattern_def_seq (stmt_vinfo, stmt);
3906
3907 return tmp;
3908 }
3909
3910
3911 /* Function vect_recog_mask_conversion_pattern
3912
3913 Try to find statements which require boolean type
3914 converison. Additional conversion statements are
3915 added to handle such cases. For example:
3916
3917 bool m_1, m_2, m_3;
3918 int i_4, i_5;
3919 double d_6, d_7;
3920 char c_1, c_2, c_3;
3921
3922 S1 m_1 = i_4 > i_5;
3923 S2 m_2 = d_6 < d_7;
3924 S3 m_3 = m_1 & m_2;
3925 S4 c_1 = m_3 ? c_2 : c_3;
3926
3927 Will be transformed into:
3928
3929 S1 m_1 = i_4 > i_5;
3930 S2 m_2 = d_6 < d_7;
3931 S3'' m_2' = (_Bool[bitsize=32])m_2
3932 S3' m_3' = m_1 & m_2';
3933 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3934 S4' c_1' = m_3'' ? c_2 : c_3; */
3935
3936 static gimple *
vect_recog_mask_conversion_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3937 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3938 tree *type_out)
3939 {
3940 gimple *last_stmt = stmts->pop ();
3941 enum tree_code rhs_code;
3942 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3943 tree vectype1, vectype2;
3944 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3945 stmt_vec_info pattern_stmt_info;
3946 vec_info *vinfo = stmt_vinfo->vinfo;
3947
3948 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3949 if (is_gimple_call (last_stmt)
3950 && gimple_call_internal_p (last_stmt)
3951 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3952 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3953 {
3954 gcall *pattern_stmt;
3955 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3956
3957 if (load)
3958 {
3959 lhs = gimple_call_lhs (last_stmt);
3960 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3961 }
3962 else
3963 {
3964 rhs2 = gimple_call_arg (last_stmt, 3);
3965 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3966 }
3967
3968 rhs1 = gimple_call_arg (last_stmt, 2);
3969 rhs1_type = search_type_for_mask (rhs1, vinfo);
3970 if (!rhs1_type)
3971 return NULL;
3972 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3973
3974 if (!vectype1 || !vectype2
3975 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3976 TYPE_VECTOR_SUBPARTS (vectype2)))
3977 return NULL;
3978
3979 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3980
3981 if (load)
3982 {
3983 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3984 pattern_stmt
3985 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3986 gimple_call_arg (last_stmt, 0),
3987 gimple_call_arg (last_stmt, 1),
3988 tmp);
3989 gimple_call_set_lhs (pattern_stmt, lhs);
3990 }
3991 else
3992 pattern_stmt
3993 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3994 gimple_call_arg (last_stmt, 0),
3995 gimple_call_arg (last_stmt, 1),
3996 tmp,
3997 gimple_call_arg (last_stmt, 3));
3998
3999 gimple_call_set_nothrow (pattern_stmt, true);
4000
4001 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4002 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4003 STMT_VINFO_DATA_REF (pattern_stmt_info)
4004 = STMT_VINFO_DATA_REF (stmt_vinfo);
4005 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4006 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4007 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4008
4009 *type_out = vectype1;
4010 *type_in = vectype1;
4011 stmts->safe_push (last_stmt);
4012 if (dump_enabled_p ())
4013 dump_printf_loc (MSG_NOTE, vect_location,
4014 "vect_recog_mask_conversion_pattern: detected:\n");
4015
4016 return pattern_stmt;
4017 }
4018
4019 if (!is_gimple_assign (last_stmt))
4020 return NULL;
4021
4022 gimple *pattern_stmt;
4023 lhs = gimple_assign_lhs (last_stmt);
4024 rhs1 = gimple_assign_rhs1 (last_stmt);
4025 rhs_code = gimple_assign_rhs_code (last_stmt);
4026
4027 /* Check for cond expression requiring mask conversion. */
4028 if (rhs_code == COND_EXPR)
4029 {
4030 /* vect_recog_mixed_size_cond_pattern could apply.
4031 Do nothing then. */
4032 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4033 return NULL;
4034
4035 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4036
4037 if (TREE_CODE (rhs1) == SSA_NAME)
4038 {
4039 rhs1_type = search_type_for_mask (rhs1, vinfo);
4040 if (!rhs1_type)
4041 return NULL;
4042 }
4043 else if (COMPARISON_CLASS_P (rhs1))
4044 {
4045 /* Check whether we're comparing scalar booleans and (if so)
4046 whether a better mask type exists than the mask associated
4047 with boolean-sized elements. This avoids unnecessary packs
4048 and unpacks if the booleans are set from comparisons of
4049 wider types. E.g. in:
4050
4051 int x1, x2, x3, x4, y1, y1;
4052 ...
4053 bool b1 = (x1 == x2);
4054 bool b2 = (x3 == x4);
4055 ... = b1 == b2 ? y1 : y2;
4056
4057 it is better for b1 and b2 to use the mask type associated
4058 with int elements rather bool (byte) elements. */
4059 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4060 if (!rhs1_type)
4061 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4062 }
4063 else
4064 return NULL;
4065
4066 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4067
4068 if (!vectype1 || !vectype2)
4069 return NULL;
4070
4071 /* Continue if a conversion is needed. Also continue if we have
4072 a comparison whose vector type would normally be different from
4073 VECTYPE2 when considered in isolation. In that case we'll
4074 replace the comparison with an SSA name (so that we can record
4075 its vector type) and behave as though the comparison was an SSA
4076 name from the outset. */
4077 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4078 TYPE_VECTOR_SUBPARTS (vectype2))
4079 && (TREE_CODE (rhs1) == SSA_NAME
4080 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4081 return NULL;
4082
4083 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4084 in place, we can handle it in vectorizable_condition. This avoids
4085 unnecessary promotion stmts and increased vectorization factor. */
4086 if (COMPARISON_CLASS_P (rhs1)
4087 && INTEGRAL_TYPE_P (rhs1_type)
4088 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4089 TYPE_VECTOR_SUBPARTS (vectype2)))
4090 {
4091 gimple *dummy;
4092 enum vect_def_type dt;
4093 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4094 &dummy, &dt)
4095 && dt == vect_external_def
4096 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4097 &dummy, &dt)
4098 && (dt == vect_external_def
4099 || dt == vect_constant_def))
4100 {
4101 tree wide_scalar_type = build_nonstandard_integer_type
4102 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4103 TYPE_UNSIGNED (rhs1_type));
4104 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4105 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4106 return NULL;
4107 }
4108 }
4109
4110 /* If rhs1 is a comparison we need to move it into a
4111 separate statement. */
4112 if (TREE_CODE (rhs1) != SSA_NAME)
4113 {
4114 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4115 pattern_stmt = gimple_build_assign (tmp, rhs1);
4116 rhs1 = tmp;
4117
4118 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4119 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4120 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4121 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4122 }
4123
4124 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4125 TYPE_VECTOR_SUBPARTS (vectype2)))
4126 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4127 else
4128 tmp = rhs1;
4129
4130 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4131 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4132 gimple_assign_rhs2 (last_stmt),
4133 gimple_assign_rhs3 (last_stmt));
4134
4135 *type_out = vectype1;
4136 *type_in = vectype1;
4137 stmts->safe_push (last_stmt);
4138 if (dump_enabled_p ())
4139 dump_printf_loc (MSG_NOTE, vect_location,
4140 "vect_recog_mask_conversion_pattern: detected:\n");
4141
4142 return pattern_stmt;
4143 }
4144
4145 /* Now check for binary boolean operations requiring conversion for
4146 one of operands. */
4147 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4148 return NULL;
4149
4150 if (rhs_code != BIT_IOR_EXPR
4151 && rhs_code != BIT_XOR_EXPR
4152 && rhs_code != BIT_AND_EXPR
4153 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4154 return NULL;
4155
4156 rhs2 = gimple_assign_rhs2 (last_stmt);
4157
4158 rhs1_type = search_type_for_mask (rhs1, vinfo);
4159 rhs2_type = search_type_for_mask (rhs2, vinfo);
4160
4161 if (!rhs1_type || !rhs2_type
4162 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4163 return NULL;
4164
4165 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4166 {
4167 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4168 if (!vectype1)
4169 return NULL;
4170 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4171 }
4172 else
4173 {
4174 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4175 if (!vectype1)
4176 return NULL;
4177 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4178 }
4179
4180 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4181 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4182
4183 *type_out = vectype1;
4184 *type_in = vectype1;
4185 stmts->safe_push (last_stmt);
4186 if (dump_enabled_p ())
4187 dump_printf_loc (MSG_NOTE, vect_location,
4188 "vect_recog_mask_conversion_pattern: detected:\n");
4189
4190 return pattern_stmt;
4191 }
4192
4193 /* STMT is a load or store. If the load or store is conditional, return
4194 the boolean condition under which it occurs, otherwise return null. */
4195
4196 static tree
vect_get_load_store_mask(gimple * stmt)4197 vect_get_load_store_mask (gimple *stmt)
4198 {
4199 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4200 {
4201 gcc_assert (gimple_assign_single_p (def_assign));
4202 return NULL_TREE;
4203 }
4204
4205 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4206 {
4207 internal_fn ifn = gimple_call_internal_fn (def_call);
4208 int mask_index = internal_fn_mask_index (ifn);
4209 return gimple_call_arg (def_call, mask_index);
4210 }
4211
4212 gcc_unreachable ();
4213 }
4214
4215 /* Return the scalar offset type that an internal gather/scatter function
4216 should use. GS_INFO describes the gather/scatter operation. */
4217
4218 static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4219 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4220 {
4221 tree offset_type = TREE_TYPE (gs_info->offset);
4222 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4223
4224 /* Enforced by vect_check_gather_scatter. */
4225 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4226 gcc_assert (element_bits >= offset_bits);
4227
4228 /* If the offset is narrower than the elements, extend it according
4229 to its sign. */
4230 if (element_bits > offset_bits)
4231 return build_nonstandard_integer_type (element_bits,
4232 TYPE_UNSIGNED (offset_type));
4233
4234 return offset_type;
4235 }
4236
4237 /* Return MASK if MASK is suitable for masking an operation on vectors
4238 of type VECTYPE, otherwise convert it into such a form and return
4239 the result. Associate any conversion statements with STMT_INFO's
4240 pattern. */
4241
4242 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4243 vect_convert_mask_for_vectype (tree mask, tree vectype,
4244 stmt_vec_info stmt_info, vec_info *vinfo)
4245 {
4246 tree mask_type = search_type_for_mask (mask, vinfo);
4247 if (mask_type)
4248 {
4249 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4250 if (mask_vectype
4251 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4252 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4253 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4254 }
4255 return mask;
4256 }
4257
4258 /* Return the equivalent of:
4259
4260 fold_convert (TYPE, VALUE)
4261
4262 with the expectation that the operation will be vectorized.
4263 If new statements are needed, add them as pattern statements
4264 to STMT_INFO. */
4265
4266 static tree
vect_add_conversion_to_patterm(tree type,tree value,stmt_vec_info stmt_info,vec_info * vinfo)4267 vect_add_conversion_to_patterm (tree type, tree value,
4268 stmt_vec_info stmt_info,
4269 vec_info *vinfo)
4270 {
4271 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4272 return value;
4273
4274 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4275 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4276 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4277 set_vinfo_for_stmt (conversion, new_stmt_info);
4278 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4279 append_pattern_def_seq (stmt_info, conversion);
4280 return new_value;
4281 }
4282
4283 /* Try to convert STMT into a call to a gather load or scatter store
4284 internal function. Return the final statement on success and set
4285 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4286
4287 This function only handles gathers and scatters that were recognized
4288 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4289
4290 static gimple *
vect_try_gather_scatter_pattern(gimple * stmt,stmt_vec_info last_stmt_info,tree * type_in,tree * type_out)4291 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4292 tree *type_in, tree *type_out)
4293 {
4294 /* Currently we only support this for loop vectorization. */
4295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4296 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4297 if (!loop_vinfo)
4298 return NULL;
4299
4300 /* Make sure that we're looking at a gather load or scatter store. */
4301 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4302 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4303 return NULL;
4304
4305 /* Get the boolean that controls whether the load or store happens.
4306 This is null if the operation is unconditional. */
4307 tree mask = vect_get_load_store_mask (stmt);
4308
4309 /* Make sure that the target supports an appropriate internal
4310 function for the gather/scatter operation. */
4311 gather_scatter_info gs_info;
4312 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4313 || gs_info.decl)
4314 return NULL;
4315
4316 /* Convert the mask to the right form. */
4317 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4318 if (mask)
4319 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4320 loop_vinfo);
4321
4322 /* Get the invariant base and non-invariant offset, converting the
4323 latter to the same width as the vector elements. */
4324 tree base = gs_info.base;
4325 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4326 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4327 last_stmt_info, loop_vinfo);
4328
4329 /* Build the new pattern statement. */
4330 tree scale = size_int (gs_info.scale);
4331 gcall *pattern_stmt;
4332 if (DR_IS_READ (dr))
4333 {
4334 if (mask != NULL)
4335 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4336 offset, scale, mask);
4337 else
4338 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4339 offset, scale);
4340 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4341 gimple_call_set_lhs (pattern_stmt, load_lhs);
4342 }
4343 else
4344 {
4345 tree rhs = vect_get_store_rhs (stmt);
4346 if (mask != NULL)
4347 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4348 base, offset, scale, rhs,
4349 mask);
4350 else
4351 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4352 base, offset, scale, rhs);
4353 }
4354 gimple_call_set_nothrow (pattern_stmt, true);
4355
4356 /* Copy across relevant vectorization info and associate DR with the
4357 new pattern statement instead of the original statement. */
4358 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4359 loop_vinfo);
4360 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4361 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4362 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4363 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4364 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4365 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4366 DR_STMT (dr) = pattern_stmt;
4367
4368 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4369 *type_out = vectype;
4370 *type_in = vectype;
4371
4372 if (dump_enabled_p ())
4373 dump_printf_loc (MSG_NOTE, vect_location,
4374 "gather/scatter pattern detected:\n");
4375
4376 return pattern_stmt;
4377 }
4378
4379 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4380
4381 static gimple *
vect_recog_gather_scatter_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)4382 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4383 tree *type_out)
4384 {
4385 gimple *last_stmt = stmts->pop ();
4386 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4387 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4388 last_stmt_info,
4389 type_in, type_out);
4390 if (pattern_stmt)
4391 stmts->safe_push (last_stmt);
4392 return pattern_stmt;
4393 }
4394
4395 /* Mark statements that are involved in a pattern. */
4396
4397 static inline void
vect_mark_pattern_stmts(gimple * orig_stmt,gimple * pattern_stmt,tree pattern_vectype)4398 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4399 tree pattern_vectype)
4400 {
4401 stmt_vec_info pattern_stmt_info, def_stmt_info;
4402 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4403 vec_info *vinfo = orig_stmt_info->vinfo;
4404 gimple *def_stmt;
4405
4406 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4407 if (pattern_stmt_info == NULL)
4408 {
4409 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4410 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4411 }
4412 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4413
4414 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4415 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4416 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4417 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4418 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4419 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4420 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4421 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4422 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4423 {
4424 gimple_stmt_iterator si;
4425 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4426 !gsi_end_p (si); gsi_next (&si))
4427 {
4428 def_stmt = gsi_stmt (si);
4429 def_stmt_info = vinfo_for_stmt (def_stmt);
4430 if (def_stmt_info == NULL)
4431 {
4432 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4433 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4434 }
4435 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4436 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4437 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4438 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4439 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4440 }
4441 }
4442 }
4443
4444 /* Function vect_pattern_recog_1
4445
4446 Input:
4447 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4448 computation pattern.
4449 STMT: A stmt from which the pattern search should start.
4450
4451 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4452 expression that computes the same functionality and can be used to
4453 replace the sequence of stmts that are involved in the pattern.
4454
4455 Output:
4456 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4457 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4458 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4459 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4460 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4461 to the available target pattern.
4462
4463 This function also does some bookkeeping, as explained in the documentation
4464 for vect_recog_pattern. */
4465
4466 static bool
vect_pattern_recog_1(vect_recog_func * recog_func,gimple_stmt_iterator si,vec<gimple * > * stmts_to_replace)4467 vect_pattern_recog_1 (vect_recog_func *recog_func,
4468 gimple_stmt_iterator si,
4469 vec<gimple *> *stmts_to_replace)
4470 {
4471 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4472 stmt_vec_info stmt_info;
4473 loop_vec_info loop_vinfo;
4474 tree pattern_vectype;
4475 tree type_in, type_out;
4476 enum tree_code code;
4477 int i;
4478 gimple *next;
4479
4480 stmts_to_replace->truncate (0);
4481 stmts_to_replace->quick_push (stmt);
4482 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4483 if (!pattern_stmt)
4484 return false;
4485
4486 stmt = stmts_to_replace->last ();
4487 stmt_info = vinfo_for_stmt (stmt);
4488 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4489
4490 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4491 || VECTOR_TYPE_P (type_in))
4492 {
4493 /* No need to check target support (already checked by the pattern
4494 recognition function). */
4495 pattern_vectype = type_out ? type_out : type_in;
4496 }
4497 else
4498 {
4499 /* Check target support */
4500 type_in = get_vectype_for_scalar_type (type_in);
4501 if (!type_in)
4502 return false;
4503 if (type_out)
4504 type_out = get_vectype_for_scalar_type (type_out);
4505 else
4506 type_out = type_in;
4507 if (!type_out)
4508 return false;
4509 pattern_vectype = type_out;
4510
4511 if (is_gimple_assign (pattern_stmt))
4512 {
4513 enum insn_code icode;
4514 code = gimple_assign_rhs_code (pattern_stmt);
4515 optab optab = optab_for_tree_code (code, type_in, optab_default);
4516 machine_mode vec_mode = TYPE_MODE (type_in);
4517 if (!optab
4518 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4519 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4520 return false;
4521 }
4522 else
4523 gcc_assert (is_gimple_call (pattern_stmt));
4524 }
4525
4526 /* Found a vectorizable pattern. */
4527 if (dump_enabled_p ())
4528 {
4529 dump_printf_loc (MSG_NOTE, vect_location,
4530 "%s pattern recognized: ", recog_func->name);
4531 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4532 }
4533
4534 /* Mark the stmts that are involved in the pattern. */
4535 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4536
4537 /* Patterns cannot be vectorized using SLP, because they change the order of
4538 computation. */
4539 if (loop_vinfo)
4540 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4541 if (next == stmt)
4542 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4543
4544 /* It is possible that additional pattern stmts are created and inserted in
4545 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4546 relevant statements. */
4547 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4548 && (unsigned) i < (stmts_to_replace->length () - 1);
4549 i++)
4550 {
4551 stmt_info = vinfo_for_stmt (stmt);
4552 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4553 if (dump_enabled_p ())
4554 {
4555 dump_printf_loc (MSG_NOTE, vect_location,
4556 "additional pattern stmt: ");
4557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4558 }
4559
4560 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4561 }
4562
4563 return true;
4564 }
4565
4566
4567 /* Function vect_pattern_recog
4568
4569 Input:
4570 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4571 computation idioms.
4572
4573 Output - for each computation idiom that is detected we create a new stmt
4574 that provides the same functionality and that can be vectorized. We
4575 also record some information in the struct_stmt_info of the relevant
4576 stmts, as explained below:
4577
4578 At the entry to this function we have the following stmts, with the
4579 following initial value in the STMT_VINFO fields:
4580
4581 stmt in_pattern_p related_stmt vec_stmt
4582 S1: a_i = .... - - -
4583 S2: a_2 = ..use(a_i).. - - -
4584 S3: a_1 = ..use(a_2).. - - -
4585 S4: a_0 = ..use(a_1).. - - -
4586 S5: ... = ..use(a_0).. - - -
4587
4588 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4589 represented by a single stmt. We then:
4590 - create a new stmt S6 equivalent to the pattern (the stmt is not
4591 inserted into the code)
4592 - fill in the STMT_VINFO fields as follows:
4593
4594 in_pattern_p related_stmt vec_stmt
4595 S1: a_i = .... - - -
4596 S2: a_2 = ..use(a_i).. - - -
4597 S3: a_1 = ..use(a_2).. - - -
4598 S4: a_0 = ..use(a_1).. true S6 -
4599 '---> S6: a_new = .... - S4 -
4600 S5: ... = ..use(a_0).. - - -
4601
4602 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4603 to each other through the RELATED_STMT field).
4604
4605 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4606 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4607 remain irrelevant unless used by stmts other than S4.
4608
4609 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4610 (because they are marked as irrelevant). It will vectorize S6, and record
4611 a pointer to the new vector stmt VS6 from S6 (as usual).
4612 S4 will be skipped, and S5 will be vectorized as usual:
4613
4614 in_pattern_p related_stmt vec_stmt
4615 S1: a_i = .... - - -
4616 S2: a_2 = ..use(a_i).. - - -
4617 S3: a_1 = ..use(a_2).. - - -
4618 > VS6: va_new = .... - - -
4619 S4: a_0 = ..use(a_1).. true S6 VS6
4620 '---> S6: a_new = .... - S4 VS6
4621 > VS5: ... = ..vuse(va_new).. - - -
4622 S5: ... = ..use(a_0).. - - -
4623
4624 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4625 elsewhere), and we'll end up with:
4626
4627 VS6: va_new = ....
4628 VS5: ... = ..vuse(va_new)..
4629
4630 In case of more than one pattern statements, e.g., widen-mult with
4631 intermediate type:
4632
4633 S1 a_t = ;
4634 S2 a_T = (TYPE) a_t;
4635 '--> S3: a_it = (interm_type) a_t;
4636 S4 prod_T = a_T * CONST;
4637 '--> S5: prod_T' = a_it w* CONST;
4638
4639 there may be other users of a_T outside the pattern. In that case S2 will
4640 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4641 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4642 be recorded in S3. */
4643
4644 void
vect_pattern_recog(vec_info * vinfo)4645 vect_pattern_recog (vec_info *vinfo)
4646 {
4647 struct loop *loop;
4648 basic_block *bbs;
4649 unsigned int nbbs;
4650 gimple_stmt_iterator si;
4651 unsigned int i, j;
4652 auto_vec<gimple *, 1> stmts_to_replace;
4653 gimple *stmt;
4654
4655 if (dump_enabled_p ())
4656 dump_printf_loc (MSG_NOTE, vect_location,
4657 "=== vect_pattern_recog ===\n");
4658
4659 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4660 {
4661 loop = LOOP_VINFO_LOOP (loop_vinfo);
4662 bbs = LOOP_VINFO_BBS (loop_vinfo);
4663 nbbs = loop->num_nodes;
4664
4665 /* Scan through the loop stmts, applying the pattern recognition
4666 functions starting at each stmt visited: */
4667 for (i = 0; i < nbbs; i++)
4668 {
4669 basic_block bb = bbs[i];
4670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4671 {
4672 /* Scan over all generic vect_recog_xxx_pattern functions. */
4673 for (j = 0; j < NUM_PATTERNS; j++)
4674 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4675 &stmts_to_replace))
4676 break;
4677 }
4678 }
4679 }
4680 else
4681 {
4682 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4683 for (si = bb_vinfo->region_begin;
4684 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4685 {
4686 if ((stmt = gsi_stmt (si))
4687 && vinfo_for_stmt (stmt)
4688 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4689 continue;
4690
4691 /* Scan over all generic vect_recog_xxx_pattern functions. */
4692 for (j = 0; j < NUM_PATTERNS; j++)
4693 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4694 &stmts_to_replace))
4695 break;
4696 }
4697 }
4698 }
4699