1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
40
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
54 tree *, tree *);
55 static gimple vect_recog_divmod_pattern (vec<gimple> *,
56 tree *, tree *);
57 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
58 tree *, tree *);
59 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
60 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
61 vect_recog_widen_mult_pattern,
62 vect_recog_widen_sum_pattern,
63 vect_recog_dot_prod_pattern,
64 vect_recog_pow_pattern,
65 vect_recog_widen_shift_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_vector_vector_shift_pattern,
68 vect_recog_divmod_pattern,
69 vect_recog_mixed_size_cond_pattern,
70 vect_recog_bool_pattern};
71
72 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)73 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
74 {
75 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
76 stmt);
77 }
78
79 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)80 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
81 {
82 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
83 append_pattern_def_seq (stmt_info, stmt);
84 }
85
86 /* Check whether STMT2 is in the same loop or basic block as STMT1.
87 Which of the two applies depends on whether we're currently doing
88 loop-based or basic-block-based vectorization, as determined by
89 the vinfo_for_stmt for STMT1 (which must be defined).
90
91 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
92 to be defined as well. */
93
94 static bool
vect_same_loop_or_bb_p(gimple stmt1,gimple stmt2)95 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
96 {
97 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
98 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
99 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
100
101 if (!gimple_bb (stmt2))
102 return false;
103
104 if (loop_vinfo)
105 {
106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
107 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
108 return false;
109 }
110 else
111 {
112 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
113 || gimple_code (stmt2) == GIMPLE_PHI)
114 return false;
115 }
116
117 gcc_assert (vinfo_for_stmt (stmt2));
118 return true;
119 }
120
121 /* If the LHS of DEF_STMT has a single use, and that statement is
122 in the same loop or basic block, return it. */
123
124 static gimple
vect_single_imm_use(gimple def_stmt)125 vect_single_imm_use (gimple def_stmt)
126 {
127 tree lhs = gimple_assign_lhs (def_stmt);
128 use_operand_p use_p;
129 gimple use_stmt;
130
131 if (!single_imm_use (lhs, &use_p, &use_stmt))
132 return NULL;
133
134 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
135 return NULL;
136
137 return use_stmt;
138 }
139
140 /* Check whether NAME, an ssa-name used in USE_STMT,
141 is a result of a type promotion or demotion, such that:
142 DEF_STMT: NAME = NOP (name0)
143 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
144 If CHECK_SIGN is TRUE, check that either both types are signed or both are
145 unsigned. */
146
147 static bool
type_conversion_p(tree name,gimple use_stmt,bool check_sign,tree * orig_type,gimple * def_stmt,bool * promotion)148 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
149 tree *orig_type, gimple *def_stmt, bool *promotion)
150 {
151 tree dummy;
152 gimple dummy_gimple;
153 loop_vec_info loop_vinfo;
154 stmt_vec_info stmt_vinfo;
155 tree type = TREE_TYPE (name);
156 tree oprnd0;
157 enum vect_def_type dt;
158 tree def;
159 bb_vec_info bb_vinfo;
160
161 stmt_vinfo = vinfo_for_stmt (use_stmt);
162 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
163 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
164 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
165 &def, &dt))
166 return false;
167
168 if (dt != vect_internal_def
169 && dt != vect_external_def && dt != vect_constant_def)
170 return false;
171
172 if (!*def_stmt)
173 return false;
174
175 if (!is_gimple_assign (*def_stmt))
176 return false;
177
178 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
179 return false;
180
181 oprnd0 = gimple_assign_rhs1 (*def_stmt);
182
183 *orig_type = TREE_TYPE (oprnd0);
184 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
185 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
186 return false;
187
188 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
189 *promotion = true;
190 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
191 *promotion = false;
192 else
193 return false;
194
195 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
196 bb_vinfo, &dummy_gimple, &dummy, &dt))
197 return false;
198
199 return true;
200 }
201
202 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
203 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
204
205 static tree
vect_recog_temp_ssa_var(tree type,gimple stmt)206 vect_recog_temp_ssa_var (tree type, gimple stmt)
207 {
208 return make_temp_ssa_name (type, stmt, "patt");
209 }
210
211 /* Function vect_recog_dot_prod_pattern
212
213 Try to find the following pattern:
214
215 type x_t, y_t;
216 TYPE1 prod;
217 TYPE2 sum = init;
218 loop:
219 sum_0 = phi <init, sum_1>
220 S1 x_t = ...
221 S2 y_t = ...
222 S3 x_T = (TYPE1) x_t;
223 S4 y_T = (TYPE1) y_t;
224 S5 prod = x_T * y_T;
225 [S6 prod = (TYPE2) prod; #optional]
226 S7 sum_1 = prod + sum_0;
227
228 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
229 same size of 'TYPE1' or bigger. This is a special case of a reduction
230 computation.
231
232 Input:
233
234 * STMTS: Contains a stmt from which the pattern search begins. In the
235 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
236 will be detected.
237
238 Output:
239
240 * TYPE_IN: The type of the input arguments to the pattern.
241
242 * TYPE_OUT: The type of the output of this pattern.
243
244 * Return value: A new stmt that will be used to replace the sequence of
245 stmts that constitute the pattern. In this case it will be:
246 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
247
248 Note: The dot-prod idiom is a widening reduction pattern that is
249 vectorized without preserving all the intermediate results. It
250 produces only N/2 (widened) results (by summing up pairs of
251 intermediate results) rather than all N results. Therefore, we
252 cannot allow this pattern when we want to get all the results and in
253 the correct order (as is the case when this computation is in an
254 inner-loop nested in an outer-loop that us being vectorized). */
255
256 static gimple
vect_recog_dot_prod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)257 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
258 tree *type_out)
259 {
260 gimple stmt, last_stmt = (*stmts)[0];
261 tree oprnd0, oprnd1;
262 tree oprnd00, oprnd01;
263 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
264 tree type, half_type;
265 gimple pattern_stmt;
266 tree prod_type;
267 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
268 struct loop *loop;
269 tree var;
270 bool promotion;
271
272 if (!loop_info)
273 return NULL;
274
275 loop = LOOP_VINFO_LOOP (loop_info);
276
277 if (!is_gimple_assign (last_stmt))
278 return NULL;
279
280 type = gimple_expr_type (last_stmt);
281
282 /* Look for the following pattern
283 DX = (TYPE1) X;
284 DY = (TYPE1) Y;
285 DPROD = DX * DY;
286 DDPROD = (TYPE2) DPROD;
287 sum_1 = DDPROD + sum_0;
288 In which
289 - DX is double the size of X
290 - DY is double the size of Y
291 - DX, DY, DPROD all have the same type
292 - sum is the same size of DPROD or bigger
293 - sum has been recognized as a reduction variable.
294
295 This is equivalent to:
296 DPROD = X w* Y; #widen mult
297 sum_1 = DPROD w+ sum_0; #widen summation
298 or
299 DPROD = X w* Y; #widen mult
300 sum_1 = DPROD + sum_0; #summation
301 */
302
303 /* Starting from LAST_STMT, follow the defs of its uses in search
304 of the above pattern. */
305
306 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
307 return NULL;
308
309 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
310 {
311 /* Has been detected as widening-summation? */
312
313 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
314 type = gimple_expr_type (stmt);
315 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
316 return NULL;
317 oprnd0 = gimple_assign_rhs1 (stmt);
318 oprnd1 = gimple_assign_rhs2 (stmt);
319 half_type = TREE_TYPE (oprnd0);
320 }
321 else
322 {
323 gimple def_stmt;
324
325 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
326 return NULL;
327 oprnd0 = gimple_assign_rhs1 (last_stmt);
328 oprnd1 = gimple_assign_rhs2 (last_stmt);
329 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
330 || !types_compatible_p (TREE_TYPE (oprnd1), type))
331 return NULL;
332 stmt = last_stmt;
333
334 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
335 &promotion)
336 && promotion)
337 {
338 stmt = def_stmt;
339 oprnd0 = gimple_assign_rhs1 (stmt);
340 }
341 else
342 half_type = type;
343 }
344
345 /* So far so good. Since last_stmt was detected as a (summation) reduction,
346 we know that oprnd1 is the reduction variable (defined by a loop-header
347 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
348 Left to check that oprnd0 is defined by a (widen_)mult_expr */
349 if (TREE_CODE (oprnd0) != SSA_NAME)
350 return NULL;
351
352 prod_type = half_type;
353 stmt = SSA_NAME_DEF_STMT (oprnd0);
354
355 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
356 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
357 return NULL;
358
359 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
360 inside the loop (in case we are analyzing an outer-loop). */
361 if (!is_gimple_assign (stmt))
362 return NULL;
363 stmt_vinfo = vinfo_for_stmt (stmt);
364 gcc_assert (stmt_vinfo);
365 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
366 return NULL;
367 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
368 return NULL;
369 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
370 {
371 /* Has been detected as a widening multiplication? */
372
373 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
374 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
375 return NULL;
376 stmt_vinfo = vinfo_for_stmt (stmt);
377 gcc_assert (stmt_vinfo);
378 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
379 oprnd00 = gimple_assign_rhs1 (stmt);
380 oprnd01 = gimple_assign_rhs2 (stmt);
381 }
382 else
383 {
384 tree half_type0, half_type1;
385 gimple def_stmt;
386 tree oprnd0, oprnd1;
387
388 oprnd0 = gimple_assign_rhs1 (stmt);
389 oprnd1 = gimple_assign_rhs2 (stmt);
390 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
391 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
392 return NULL;
393 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
394 &promotion)
395 || !promotion)
396 return NULL;
397 oprnd00 = gimple_assign_rhs1 (def_stmt);
398 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd01 = gimple_assign_rhs1 (def_stmt);
403 if (!types_compatible_p (half_type0, half_type1))
404 return NULL;
405 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
406 return NULL;
407 }
408
409 half_type = TREE_TYPE (oprnd00);
410 *type_in = half_type;
411 *type_out = type;
412
413 /* Pattern detected. Create a stmt to be used to replace the pattern: */
414 var = vect_recog_temp_ssa_var (type, NULL);
415 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
416 oprnd00, oprnd01, oprnd1);
417
418 if (dump_enabled_p ())
419 {
420 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
421 "vect_recog_dot_prod_pattern: detected: ");
422 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
423 }
424
425 /* We don't allow changing the order of the computation in the inner-loop
426 when doing outer-loop vectorization. */
427 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
428
429 return pattern_stmt;
430 }
431
432
433 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
434 and LSHIFT_EXPR.
435
436 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
437 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
438
439 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
440 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
441 that satisfies the above restrictions, we can perform a widening opeartion
442 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
443 with a_it = (interm_type) a_t; */
444
445 static bool
vect_handle_widen_op_by_const(gimple stmt,enum tree_code code,tree const_oprnd,tree * oprnd,vec<gimple> * stmts,tree type,tree * half_type,gimple def_stmt)446 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
447 tree const_oprnd, tree *oprnd,
448 vec<gimple> *stmts, tree type,
449 tree *half_type, gimple def_stmt)
450 {
451 tree new_type, new_oprnd;
452 gimple new_stmt;
453
454 if (code != MULT_EXPR && code != LSHIFT_EXPR)
455 return false;
456
457 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
458 || (code == LSHIFT_EXPR
459 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
460 != 1))
461 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
462 {
463 /* CONST_OPRND is a constant of HALF_TYPE. */
464 *oprnd = gimple_assign_rhs1 (def_stmt);
465 return true;
466 }
467
468 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
469 return false;
470
471 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
472 return false;
473
474 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
475 a type 2 times bigger than HALF_TYPE. */
476 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
477 TYPE_UNSIGNED (type));
478 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
479 || (code == LSHIFT_EXPR
480 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
481 return false;
482
483 /* Use NEW_TYPE for widening operation. */
484 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
485 {
486 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
487 /* Check if the already created pattern stmt is what we need. */
488 if (!is_gimple_assign (new_stmt)
489 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
490 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
491 return false;
492
493 stmts->safe_push (def_stmt);
494 *oprnd = gimple_assign_lhs (new_stmt);
495 }
496 else
497 {
498 /* Create a_T = (NEW_TYPE) a_t; */
499 *oprnd = gimple_assign_rhs1 (def_stmt);
500 new_oprnd = make_ssa_name (new_type, NULL);
501 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
502 NULL_TREE);
503 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
504 stmts->safe_push (def_stmt);
505 *oprnd = new_oprnd;
506 }
507
508 *half_type = new_type;
509 return true;
510 }
511
512
513 /* Function vect_recog_widen_mult_pattern
514
515 Try to find the following pattern:
516
517 type a_t, b_t;
518 TYPE a_T, b_T, prod_T;
519
520 S1 a_t = ;
521 S2 b_t = ;
522 S3 a_T = (TYPE) a_t;
523 S4 b_T = (TYPE) b_t;
524 S5 prod_T = a_T * b_T;
525
526 where type 'TYPE' is at least double the size of type 'type'.
527
528 Also detect unsigned cases:
529
530 unsigned type a_t, b_t;
531 unsigned TYPE u_prod_T;
532 TYPE a_T, b_T, prod_T;
533
534 S1 a_t = ;
535 S2 b_t = ;
536 S3 a_T = (TYPE) a_t;
537 S4 b_T = (TYPE) b_t;
538 S5 prod_T = a_T * b_T;
539 S6 u_prod_T = (unsigned TYPE) prod_T;
540
541 and multiplication by constants:
542
543 type a_t;
544 TYPE a_T, prod_T;
545
546 S1 a_t = ;
547 S3 a_T = (TYPE) a_t;
548 S5 prod_T = a_T * CONST;
549
550 A special case of multiplication by constants is when 'TYPE' is 4 times
551 bigger than 'type', but CONST fits an intermediate type 2 times smaller
552 than 'TYPE'. In that case we create an additional pattern stmt for S3
553 to create a variable of the intermediate type, and perform widen-mult
554 on the intermediate type as well:
555
556 type a_t;
557 interm_type a_it;
558 TYPE a_T, prod_T, prod_T';
559
560 S1 a_t = ;
561 S3 a_T = (TYPE) a_t;
562 '--> a_it = (interm_type) a_t;
563 S5 prod_T = a_T * CONST;
564 '--> prod_T' = a_it w* CONST;
565
566 Input/Output:
567
568 * STMTS: Contains a stmt from which the pattern search begins. In the
569 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
570 is detected. In case of unsigned widen-mult, the original stmt (S5) is
571 replaced with S6 in STMTS. In case of multiplication by a constant
572 of an intermediate type (the last case above), STMTS also contains S3
573 (inserted before S5).
574
575 Output:
576
577 * TYPE_IN: The type of the input arguments to the pattern.
578
579 * TYPE_OUT: The type of the output of this pattern.
580
581 * Return value: A new stmt that will be used to replace the sequence of
582 stmts that constitute the pattern. In this case it will be:
583 WIDEN_MULT <a_t, b_t>
584 */
585
586 static gimple
vect_recog_widen_mult_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)587 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
588 tree *type_in, tree *type_out)
589 {
590 gimple last_stmt = stmts->pop ();
591 gimple def_stmt0, def_stmt1;
592 tree oprnd0, oprnd1;
593 tree type, half_type0, half_type1;
594 gimple pattern_stmt;
595 tree vectype, vectype_out = NULL_TREE;
596 tree var;
597 enum tree_code dummy_code;
598 int dummy_int;
599 vec<tree> dummy_vec;
600 bool op1_ok;
601 bool promotion;
602
603 if (!is_gimple_assign (last_stmt))
604 return NULL;
605
606 type = gimple_expr_type (last_stmt);
607
608 /* Starting from LAST_STMT, follow the defs of its uses in search
609 of the above pattern. */
610
611 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
612 return NULL;
613
614 oprnd0 = gimple_assign_rhs1 (last_stmt);
615 oprnd1 = gimple_assign_rhs2 (last_stmt);
616 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
617 || !types_compatible_p (TREE_TYPE (oprnd1), type))
618 return NULL;
619
620 /* Check argument 0. */
621 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
622 &promotion)
623 || !promotion)
624 return NULL;
625 /* Check argument 1. */
626 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
627 &def_stmt1, &promotion);
628
629 if (op1_ok && promotion)
630 {
631 oprnd0 = gimple_assign_rhs1 (def_stmt0);
632 oprnd1 = gimple_assign_rhs1 (def_stmt1);
633 }
634 else
635 {
636 if (TREE_CODE (oprnd1) == INTEGER_CST
637 && TREE_CODE (half_type0) == INTEGER_TYPE
638 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
639 &oprnd0, stmts, type,
640 &half_type0, def_stmt0))
641 {
642 half_type1 = half_type0;
643 oprnd1 = fold_convert (half_type1, oprnd1);
644 }
645 else
646 return NULL;
647 }
648
649 /* Handle unsigned case. Look for
650 S6 u_prod_T = (unsigned TYPE) prod_T;
651 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
652 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
653 {
654 gimple use_stmt;
655 tree use_lhs;
656 tree use_type;
657
658 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
659 return NULL;
660
661 use_stmt = vect_single_imm_use (last_stmt);
662 if (!use_stmt || !is_gimple_assign (use_stmt)
663 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
664 return NULL;
665
666 use_lhs = gimple_assign_lhs (use_stmt);
667 use_type = TREE_TYPE (use_lhs);
668 if (!INTEGRAL_TYPE_P (use_type)
669 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
670 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
671 return NULL;
672
673 type = use_type;
674 last_stmt = use_stmt;
675 }
676
677 if (!types_compatible_p (half_type0, half_type1))
678 return NULL;
679
680 /* Pattern detected. */
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
683 "vect_recog_widen_mult_pattern: detected: ");
684
685 /* Check target support */
686 vectype = get_vectype_for_scalar_type (half_type0);
687 vectype_out = get_vectype_for_scalar_type (type);
688 if (!vectype
689 || !vectype_out
690 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
691 vectype_out, vectype,
692 &dummy_code, &dummy_code,
693 &dummy_int, &dummy_vec))
694 return NULL;
695
696 *type_in = vectype;
697 *type_out = vectype_out;
698
699 /* Pattern supported. Create a stmt to be used to replace the pattern: */
700 var = vect_recog_temp_ssa_var (type, NULL);
701 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
702 oprnd1);
703
704 if (dump_enabled_p ())
705 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
706
707 stmts->safe_push (last_stmt);
708 return pattern_stmt;
709 }
710
711
712 /* Function vect_recog_pow_pattern
713
714 Try to find the following pattern:
715
716 x = POW (y, N);
717
718 with POW being one of pow, powf, powi, powif and N being
719 either 2 or 0.5.
720
721 Input:
722
723 * LAST_STMT: A stmt from which the pattern search begins.
724
725 Output:
726
727 * TYPE_IN: The type of the input arguments to the pattern.
728
729 * TYPE_OUT: The type of the output of this pattern.
730
731 * Return value: A new stmt that will be used to replace the sequence of
732 stmts that constitute the pattern. In this case it will be:
733 x = x * x
734 or
735 x = sqrt (x)
736 */
737
738 static gimple
vect_recog_pow_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)739 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
740 tree *type_out)
741 {
742 gimple last_stmt = (*stmts)[0];
743 tree fn, base, exp = NULL;
744 gimple stmt;
745 tree var;
746
747 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
748 return NULL;
749
750 fn = gimple_call_fndecl (last_stmt);
751 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
752 return NULL;
753
754 switch (DECL_FUNCTION_CODE (fn))
755 {
756 case BUILT_IN_POWIF:
757 case BUILT_IN_POWI:
758 case BUILT_IN_POWF:
759 case BUILT_IN_POW:
760 base = gimple_call_arg (last_stmt, 0);
761 exp = gimple_call_arg (last_stmt, 1);
762 if (TREE_CODE (exp) != REAL_CST
763 && TREE_CODE (exp) != INTEGER_CST)
764 return NULL;
765 break;
766
767 default:
768 return NULL;
769 }
770
771 /* We now have a pow or powi builtin function call with a constant
772 exponent. */
773
774 *type_out = NULL_TREE;
775
776 /* Catch squaring. */
777 if ((host_integerp (exp, 0)
778 && tree_low_cst (exp, 0) == 2)
779 || (TREE_CODE (exp) == REAL_CST
780 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
781 {
782 *type_in = TREE_TYPE (base);
783
784 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
785 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
786 return stmt;
787 }
788
789 /* Catch square root. */
790 if (TREE_CODE (exp) == REAL_CST
791 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
792 {
793 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
794 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
795 if (*type_in)
796 {
797 gimple stmt = gimple_build_call (newfn, 1, base);
798 if (vectorizable_function (stmt, *type_in, *type_in)
799 != NULL_TREE)
800 {
801 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
802 gimple_call_set_lhs (stmt, var);
803 return stmt;
804 }
805 }
806 }
807
808 return NULL;
809 }
810
811
812 /* Function vect_recog_widen_sum_pattern
813
814 Try to find the following pattern:
815
816 type x_t;
817 TYPE x_T, sum = init;
818 loop:
819 sum_0 = phi <init, sum_1>
820 S1 x_t = *p;
821 S2 x_T = (TYPE) x_t;
822 S3 sum_1 = x_T + sum_0;
823
824 where type 'TYPE' is at least double the size of type 'type', i.e - we're
825 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
826 a special case of a reduction computation.
827
828 Input:
829
830 * LAST_STMT: A stmt from which the pattern search begins. In the example,
831 when this function is called with S3, the pattern {S2,S3} will be detected.
832
833 Output:
834
835 * TYPE_IN: The type of the input arguments to the pattern.
836
837 * TYPE_OUT: The type of the output of this pattern.
838
839 * Return value: A new stmt that will be used to replace the sequence of
840 stmts that constitute the pattern. In this case it will be:
841 WIDEN_SUM <x_t, sum_0>
842
843 Note: The widening-sum idiom is a widening reduction pattern that is
844 vectorized without preserving all the intermediate results. It
845 produces only N/2 (widened) results (by summing up pairs of
846 intermediate results) rather than all N results. Therefore, we
847 cannot allow this pattern when we want to get all the results and in
848 the correct order (as is the case when this computation is in an
849 inner-loop nested in an outer-loop that us being vectorized). */
850
851 static gimple
vect_recog_widen_sum_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)852 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
853 tree *type_out)
854 {
855 gimple stmt, last_stmt = (*stmts)[0];
856 tree oprnd0, oprnd1;
857 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
858 tree type, half_type;
859 gimple pattern_stmt;
860 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
861 struct loop *loop;
862 tree var;
863 bool promotion;
864
865 if (!loop_info)
866 return NULL;
867
868 loop = LOOP_VINFO_LOOP (loop_info);
869
870 if (!is_gimple_assign (last_stmt))
871 return NULL;
872
873 type = gimple_expr_type (last_stmt);
874
875 /* Look for the following pattern
876 DX = (TYPE) X;
877 sum_1 = DX + sum_0;
878 In which DX is at least double the size of X, and sum_1 has been
879 recognized as a reduction variable.
880 */
881
882 /* Starting from LAST_STMT, follow the defs of its uses in search
883 of the above pattern. */
884
885 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
886 return NULL;
887
888 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
889 return NULL;
890
891 oprnd0 = gimple_assign_rhs1 (last_stmt);
892 oprnd1 = gimple_assign_rhs2 (last_stmt);
893 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
894 || !types_compatible_p (TREE_TYPE (oprnd1), type))
895 return NULL;
896
897 /* So far so good. Since last_stmt was detected as a (summation) reduction,
898 we know that oprnd1 is the reduction variable (defined by a loop-header
899 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
900 Left to check that oprnd0 is defined by a cast from type 'type' to type
901 'TYPE'. */
902
903 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
904 &promotion)
905 || !promotion)
906 return NULL;
907
908 oprnd0 = gimple_assign_rhs1 (stmt);
909 *type_in = half_type;
910 *type_out = type;
911
912 /* Pattern detected. Create a stmt to be used to replace the pattern: */
913 var = vect_recog_temp_ssa_var (type, NULL);
914 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
915 oprnd0, oprnd1);
916
917 if (dump_enabled_p ())
918 {
919 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
920 "vect_recog_widen_sum_pattern: detected: ");
921 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
922 }
923
924 /* We don't allow changing the order of the computation in the inner-loop
925 when doing outer-loop vectorization. */
926 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
927
928 return pattern_stmt;
929 }
930
931
932 /* Return TRUE if the operation in STMT can be performed on a smaller type.
933
934 Input:
935 STMT - a statement to check.
936 DEF - we support operations with two operands, one of which is constant.
937 The other operand can be defined by a demotion operation, or by a
938 previous statement in a sequence of over-promoted operations. In the
939 later case DEF is used to replace that operand. (It is defined by a
940 pattern statement we created for the previous statement in the
941 sequence).
942
943 Input/output:
944 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
945 NULL, it's the type of DEF.
946 STMTS - additional pattern statements. If a pattern statement (type
947 conversion) is created in this function, its original statement is
948 added to STMTS.
949
950 Output:
951 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
952 operands to use in the new pattern statement for STMT (will be created
953 in vect_recog_over_widening_pattern ()).
954 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
955 statements for STMT: the first one is a type promotion and the second
956 one is the operation itself. We return the type promotion statement
957 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
958 the second pattern statement. */
959
960 static bool
vect_operation_fits_smaller_type(gimple stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple * new_def_stmt,vec<gimple> * stmts)961 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
962 tree *op0, tree *op1, gimple *new_def_stmt,
963 vec<gimple> *stmts)
964 {
965 enum tree_code code;
966 tree const_oprnd, oprnd;
967 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
968 gimple def_stmt, new_stmt;
969 bool first = false;
970 bool promotion;
971
972 *op0 = NULL_TREE;
973 *op1 = NULL_TREE;
974 *new_def_stmt = NULL;
975
976 if (!is_gimple_assign (stmt))
977 return false;
978
979 code = gimple_assign_rhs_code (stmt);
980 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
981 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
982 return false;
983
984 oprnd = gimple_assign_rhs1 (stmt);
985 const_oprnd = gimple_assign_rhs2 (stmt);
986 type = gimple_expr_type (stmt);
987
988 if (TREE_CODE (oprnd) != SSA_NAME
989 || TREE_CODE (const_oprnd) != INTEGER_CST)
990 return false;
991
992 /* If oprnd has other uses besides that in stmt we cannot mark it
993 as being part of a pattern only. */
994 if (!has_single_use (oprnd))
995 return false;
996
997 /* If we are in the middle of a sequence, we use DEF from a previous
998 statement. Otherwise, OPRND has to be a result of type promotion. */
999 if (*new_type)
1000 {
1001 half_type = *new_type;
1002 oprnd = def;
1003 }
1004 else
1005 {
1006 first = true;
1007 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1008 &promotion)
1009 || !promotion
1010 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1011 return false;
1012 }
1013
1014 /* Can we perform the operation on a smaller type? */
1015 switch (code)
1016 {
1017 case BIT_IOR_EXPR:
1018 case BIT_XOR_EXPR:
1019 case BIT_AND_EXPR:
1020 if (!int_fits_type_p (const_oprnd, half_type))
1021 {
1022 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1023 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1024 return false;
1025
1026 interm_type = build_nonstandard_integer_type (
1027 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1028 if (!int_fits_type_p (const_oprnd, interm_type))
1029 return false;
1030 }
1031
1032 break;
1033
1034 case LSHIFT_EXPR:
1035 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1036 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1037 return false;
1038
1039 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1040 (e.g., if the original value was char, the shift amount is at most 8
1041 if we want to use short). */
1042 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1043 return false;
1044
1045 interm_type = build_nonstandard_integer_type (
1046 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1047
1048 if (!vect_supportable_shift (code, interm_type))
1049 return false;
1050
1051 break;
1052
1053 case RSHIFT_EXPR:
1054 if (vect_supportable_shift (code, half_type))
1055 break;
1056
1057 /* Try intermediate type - HALF_TYPE is not supported. */
1058 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1059 return false;
1060
1061 interm_type = build_nonstandard_integer_type (
1062 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1063
1064 if (!vect_supportable_shift (code, interm_type))
1065 return false;
1066
1067 break;
1068
1069 default:
1070 gcc_unreachable ();
1071 }
1072
1073 /* There are four possible cases:
1074 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1075 the first statement in the sequence)
1076 a. The original, HALF_TYPE, is not enough - we replace the promotion
1077 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1078 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1079 promotion.
1080 2. OPRND is defined by a pattern statement we created.
1081 a. Its type is not sufficient for the operation, we create a new stmt:
1082 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1083 this statement in NEW_DEF_STMT, and it is later put in
1084 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1085 b. OPRND is good to use in the new statement. */
1086 if (first)
1087 {
1088 if (interm_type)
1089 {
1090 /* Replace the original type conversion HALF_TYPE->TYPE with
1091 HALF_TYPE->INTERM_TYPE. */
1092 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1093 {
1094 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1095 /* Check if the already created pattern stmt is what we need. */
1096 if (!is_gimple_assign (new_stmt)
1097 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1098 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1099 return false;
1100
1101 stmts->safe_push (def_stmt);
1102 oprnd = gimple_assign_lhs (new_stmt);
1103 }
1104 else
1105 {
1106 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1107 oprnd = gimple_assign_rhs1 (def_stmt);
1108 new_oprnd = make_ssa_name (interm_type, NULL);
1109 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1110 oprnd, NULL_TREE);
1111 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1112 stmts->safe_push (def_stmt);
1113 oprnd = new_oprnd;
1114 }
1115 }
1116 else
1117 {
1118 /* Retrieve the operand before the type promotion. */
1119 oprnd = gimple_assign_rhs1 (def_stmt);
1120 }
1121 }
1122 else
1123 {
1124 if (interm_type)
1125 {
1126 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1127 new_oprnd = make_ssa_name (interm_type, NULL);
1128 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1129 oprnd, NULL_TREE);
1130 oprnd = new_oprnd;
1131 *new_def_stmt = new_stmt;
1132 }
1133
1134 /* Otherwise, OPRND is already set. */
1135 }
1136
1137 if (interm_type)
1138 *new_type = interm_type;
1139 else
1140 *new_type = half_type;
1141
1142 *op0 = oprnd;
1143 *op1 = fold_convert (*new_type, const_oprnd);
1144
1145 return true;
1146 }
1147
1148
1149 /* Try to find a statement or a sequence of statements that can be performed
1150 on a smaller type:
1151
1152 type x_t;
1153 TYPE x_T, res0_T, res1_T;
1154 loop:
1155 S1 x_t = *p;
1156 S2 x_T = (TYPE) x_t;
1157 S3 res0_T = op (x_T, C0);
1158 S4 res1_T = op (res0_T, C1);
1159 S5 ... = () res1_T; - type demotion
1160
1161 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1162 constants.
1163 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1164 be 'type' or some intermediate type. For now, we expect S5 to be a type
1165 demotion operation. We also check that S3 and S4 have only one use. */
1166
1167 static gimple
vect_recog_over_widening_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1168 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1169 tree *type_in, tree *type_out)
1170 {
1171 gimple stmt = stmts->pop ();
1172 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1173 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1174 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1175 bool first;
1176 tree type = NULL;
1177
1178 first = true;
1179 while (1)
1180 {
1181 if (!vinfo_for_stmt (stmt)
1182 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1183 return NULL;
1184
1185 new_def_stmt = NULL;
1186 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1187 &op0, &op1, &new_def_stmt,
1188 stmts))
1189 {
1190 if (first)
1191 return NULL;
1192 else
1193 break;
1194 }
1195
1196 /* STMT can be performed on a smaller type. Check its uses. */
1197 use_stmt = vect_single_imm_use (stmt);
1198 if (!use_stmt || !is_gimple_assign (use_stmt))
1199 return NULL;
1200
1201 /* Create pattern statement for STMT. */
1202 vectype = get_vectype_for_scalar_type (new_type);
1203 if (!vectype)
1204 return NULL;
1205
1206 /* We want to collect all the statements for which we create pattern
1207 statetments, except for the case when the last statement in the
1208 sequence doesn't have a corresponding pattern statement. In such
1209 case we associate the last pattern statement with the last statement
1210 in the sequence. Therefore, we only add the original statement to
1211 the list if we know that it is not the last. */
1212 if (prev_stmt)
1213 stmts->safe_push (prev_stmt);
1214
1215 var = vect_recog_temp_ssa_var (new_type, NULL);
1216 pattern_stmt
1217 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1218 op0, op1);
1219 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1220 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1221
1222 if (dump_enabled_p ())
1223 {
1224 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1225 "created pattern stmt: ");
1226 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1227 }
1228
1229 type = gimple_expr_type (stmt);
1230 prev_stmt = stmt;
1231 stmt = use_stmt;
1232
1233 first = false;
1234 }
1235
1236 /* We got a sequence. We expect it to end with a type demotion operation.
1237 Otherwise, we quit (for now). There are three possible cases: the
1238 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1239 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1240 NEW_TYPE differs (we create a new conversion statement). */
1241 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1242 {
1243 use_lhs = gimple_assign_lhs (use_stmt);
1244 use_type = TREE_TYPE (use_lhs);
1245 /* Support only type demotion or signedess change. */
1246 if (!INTEGRAL_TYPE_P (use_type)
1247 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1248 return NULL;
1249
1250 /* Check that NEW_TYPE is not bigger than the conversion result. */
1251 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1252 return NULL;
1253
1254 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1255 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1256 {
1257 /* Create NEW_TYPE->USE_TYPE conversion. */
1258 new_oprnd = make_ssa_name (use_type, NULL);
1259 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1260 var, NULL_TREE);
1261 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1262
1263 *type_in = get_vectype_for_scalar_type (new_type);
1264 *type_out = get_vectype_for_scalar_type (use_type);
1265
1266 /* We created a pattern statement for the last statement in the
1267 sequence, so we don't need to associate it with the pattern
1268 statement created for PREV_STMT. Therefore, we add PREV_STMT
1269 to the list in order to mark it later in vect_pattern_recog_1. */
1270 if (prev_stmt)
1271 stmts->safe_push (prev_stmt);
1272 }
1273 else
1274 {
1275 if (prev_stmt)
1276 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1277 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1278
1279 *type_in = vectype;
1280 *type_out = NULL_TREE;
1281 }
1282
1283 stmts->safe_push (use_stmt);
1284 }
1285 else
1286 /* TODO: support general case, create a conversion to the correct type. */
1287 return NULL;
1288
1289 /* Pattern detected. */
1290 if (dump_enabled_p ())
1291 {
1292 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1293 "vect_recog_over_widening_pattern: detected: ");
1294 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1295 }
1296
1297 return pattern_stmt;
1298 }
1299
1300 /* Detect widening shift pattern:
1301
1302 type a_t;
1303 TYPE a_T, res_T;
1304
1305 S1 a_t = ;
1306 S2 a_T = (TYPE) a_t;
1307 S3 res_T = a_T << CONST;
1308
1309 where type 'TYPE' is at least double the size of type 'type'.
1310
1311 Also detect cases where the shift result is immediately converted
1312 to another type 'result_type' that is no larger in size than 'TYPE'.
1313 In those cases we perform a widen-shift that directly results in
1314 'result_type', to avoid a possible over-widening situation:
1315
1316 type a_t;
1317 TYPE a_T, res_T;
1318 result_type res_result;
1319
1320 S1 a_t = ;
1321 S2 a_T = (TYPE) a_t;
1322 S3 res_T = a_T << CONST;
1323 S4 res_result = (result_type) res_T;
1324 '--> res_result' = a_t w<< CONST;
1325
1326 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1327 create an additional pattern stmt for S2 to create a variable of an
1328 intermediate type, and perform widen-shift on the intermediate type:
1329
1330 type a_t;
1331 interm_type a_it;
1332 TYPE a_T, res_T, res_T';
1333
1334 S1 a_t = ;
1335 S2 a_T = (TYPE) a_t;
1336 '--> a_it = (interm_type) a_t;
1337 S3 res_T = a_T << CONST;
1338 '--> res_T' = a_it <<* CONST;
1339
1340 Input/Output:
1341
1342 * STMTS: Contains a stmt from which the pattern search begins.
1343 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1344 in STMTS. When an intermediate type is used and a pattern statement is
1345 created for S2, we also put S2 here (before S3).
1346
1347 Output:
1348
1349 * TYPE_IN: The type of the input arguments to the pattern.
1350
1351 * TYPE_OUT: The type of the output of this pattern.
1352
1353 * Return value: A new stmt that will be used to replace the sequence of
1354 stmts that constitute the pattern. In this case it will be:
1355 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1356
1357 static gimple
vect_recog_widen_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1358 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1359 tree *type_in, tree *type_out)
1360 {
1361 gimple last_stmt = stmts->pop ();
1362 gimple def_stmt0;
1363 tree oprnd0, oprnd1;
1364 tree type, half_type0;
1365 gimple pattern_stmt;
1366 tree vectype, vectype_out = NULL_TREE;
1367 tree var;
1368 enum tree_code dummy_code;
1369 int dummy_int;
1370 vec<tree> dummy_vec;
1371 gimple use_stmt;
1372 bool promotion;
1373
1374 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1375 return NULL;
1376
1377 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1378 return NULL;
1379
1380 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1381 return NULL;
1382
1383 oprnd0 = gimple_assign_rhs1 (last_stmt);
1384 oprnd1 = gimple_assign_rhs2 (last_stmt);
1385 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1386 return NULL;
1387
1388 /* Check operand 0: it has to be defined by a type promotion. */
1389 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1390 &promotion)
1391 || !promotion)
1392 return NULL;
1393
1394 /* Check operand 1: has to be positive. We check that it fits the type
1395 in vect_handle_widen_op_by_const (). */
1396 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1397 return NULL;
1398
1399 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1400 type = gimple_expr_type (last_stmt);
1401
1402 /* Check for subsequent conversion to another type. */
1403 use_stmt = vect_single_imm_use (last_stmt);
1404 if (use_stmt && is_gimple_assign (use_stmt)
1405 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1406 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1407 {
1408 tree use_lhs = gimple_assign_lhs (use_stmt);
1409 tree use_type = TREE_TYPE (use_lhs);
1410
1411 if (INTEGRAL_TYPE_P (use_type)
1412 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1413 {
1414 last_stmt = use_stmt;
1415 type = use_type;
1416 }
1417 }
1418
1419 /* Check if this a widening operation. */
1420 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1421 &oprnd0, stmts,
1422 type, &half_type0, def_stmt0))
1423 return NULL;
1424
1425 /* Pattern detected. */
1426 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1428 "vect_recog_widen_shift_pattern: detected: ");
1429
1430 /* Check target support. */
1431 vectype = get_vectype_for_scalar_type (half_type0);
1432 vectype_out = get_vectype_for_scalar_type (type);
1433
1434 if (!vectype
1435 || !vectype_out
1436 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1437 vectype_out, vectype,
1438 &dummy_code, &dummy_code,
1439 &dummy_int, &dummy_vec))
1440 return NULL;
1441
1442 *type_in = vectype;
1443 *type_out = vectype_out;
1444
1445 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1446 var = vect_recog_temp_ssa_var (type, NULL);
1447 pattern_stmt =
1448 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1449
1450 if (dump_enabled_p ())
1451 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1452
1453 stmts->safe_push (last_stmt);
1454 return pattern_stmt;
1455 }
1456
1457 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1458 vectorized:
1459
1460 type a_t;
1461 TYPE b_T, res_T;
1462
1463 S1 a_t = ;
1464 S2 b_T = ;
1465 S3 res_T = b_T op a_t;
1466
1467 where type 'TYPE' is a type with different size than 'type',
1468 and op is <<, >> or rotate.
1469
1470 Also detect cases:
1471
1472 type a_t;
1473 TYPE b_T, c_T, res_T;
1474
1475 S0 c_T = ;
1476 S1 a_t = (type) c_T;
1477 S2 b_T = ;
1478 S3 res_T = b_T op a_t;
1479
1480 Input/Output:
1481
1482 * STMTS: Contains a stmt from which the pattern search begins,
1483 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1484 with a shift/rotate which has same type on both operands, in the
1485 second case just b_T op c_T, in the first case with added cast
1486 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1487
1488 Output:
1489
1490 * TYPE_IN: The type of the input arguments to the pattern.
1491
1492 * TYPE_OUT: The type of the output of this pattern.
1493
1494 * Return value: A new stmt that will be used to replace the shift/rotate
1495 S3 stmt. */
1496
1497 static gimple
vect_recog_vector_vector_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1498 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1499 tree *type_in, tree *type_out)
1500 {
1501 gimple last_stmt = stmts->pop ();
1502 tree oprnd0, oprnd1, lhs, var;
1503 gimple pattern_stmt, def_stmt;
1504 enum tree_code rhs_code;
1505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1506 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1508 enum vect_def_type dt;
1509 tree def;
1510
1511 if (!is_gimple_assign (last_stmt))
1512 return NULL;
1513
1514 rhs_code = gimple_assign_rhs_code (last_stmt);
1515 switch (rhs_code)
1516 {
1517 case LSHIFT_EXPR:
1518 case RSHIFT_EXPR:
1519 case LROTATE_EXPR:
1520 case RROTATE_EXPR:
1521 break;
1522 default:
1523 return NULL;
1524 }
1525
1526 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1527 return NULL;
1528
1529 lhs = gimple_assign_lhs (last_stmt);
1530 oprnd0 = gimple_assign_rhs1 (last_stmt);
1531 oprnd1 = gimple_assign_rhs2 (last_stmt);
1532 if (TREE_CODE (oprnd0) != SSA_NAME
1533 || TREE_CODE (oprnd1) != SSA_NAME
1534 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1535 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1536 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1537 || TYPE_PRECISION (TREE_TYPE (lhs))
1538 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1539 return NULL;
1540
1541 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1542 &def, &dt))
1543 return NULL;
1544
1545 if (dt != vect_internal_def)
1546 return NULL;
1547
1548 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1549 *type_out = *type_in;
1550 if (*type_in == NULL_TREE)
1551 return NULL;
1552
1553 def = NULL_TREE;
1554 if (gimple_assign_cast_p (def_stmt))
1555 {
1556 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1557 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1558 && TYPE_PRECISION (TREE_TYPE (rhs1))
1559 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1560 def = rhs1;
1561 }
1562
1563 if (def == NULL_TREE)
1564 {
1565 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1566 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1567 NULL_TREE);
1568 new_pattern_def_seq (stmt_vinfo, def_stmt);
1569 }
1570
1571 /* Pattern detected. */
1572 if (dump_enabled_p ())
1573 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1574 "vect_recog_vector_vector_shift_pattern: detected: ");
1575
1576 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1577 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1578 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1579
1580 if (dump_enabled_p ())
1581 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1582
1583 stmts->safe_push (last_stmt);
1584 return pattern_stmt;
1585 }
1586
1587 /* Detect a signed division by a constant that wouldn't be
1588 otherwise vectorized:
1589
1590 type a_t, b_t;
1591
1592 S1 a_t = b_t / N;
1593
1594 where type 'type' is an integral type and N is a constant.
1595
1596 Similarly handle modulo by a constant:
1597
1598 S4 a_t = b_t % N;
1599
1600 Input/Output:
1601
1602 * STMTS: Contains a stmt from which the pattern search begins,
1603 i.e. the division stmt. S1 is replaced by if N is a power
1604 of two constant and type is signed:
1605 S3 y_t = b_t < 0 ? N - 1 : 0;
1606 S2 x_t = b_t + y_t;
1607 S1' a_t = x_t >> log2 (N);
1608
1609 S4 is replaced if N is a power of two constant and
1610 type is signed by (where *_T temporaries have unsigned type):
1611 S9 y_T = b_t < 0 ? -1U : 0U;
1612 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1613 S7 z_t = (type) z_T;
1614 S6 w_t = b_t + z_t;
1615 S5 x_t = w_t & (N - 1);
1616 S4' a_t = x_t - z_t;
1617
1618 Output:
1619
1620 * TYPE_IN: The type of the input arguments to the pattern.
1621
1622 * TYPE_OUT: The type of the output of this pattern.
1623
1624 * Return value: A new stmt that will be used to replace the division
1625 S1 or modulo S4 stmt. */
1626
1627 static gimple
vect_recog_divmod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1628 vect_recog_divmod_pattern (vec<gimple> *stmts,
1629 tree *type_in, tree *type_out)
1630 {
1631 gimple last_stmt = stmts->pop ();
1632 tree oprnd0, oprnd1, vectype, itype, cond;
1633 gimple pattern_stmt, def_stmt;
1634 enum tree_code rhs_code;
1635 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1636 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1637 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1638 optab optab;
1639 tree q;
1640 int dummy_int, prec;
1641 stmt_vec_info def_stmt_vinfo;
1642
1643 if (!is_gimple_assign (last_stmt))
1644 return NULL;
1645
1646 rhs_code = gimple_assign_rhs_code (last_stmt);
1647 switch (rhs_code)
1648 {
1649 case TRUNC_DIV_EXPR:
1650 case TRUNC_MOD_EXPR:
1651 break;
1652 default:
1653 return NULL;
1654 }
1655
1656 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1657 return NULL;
1658
1659 oprnd0 = gimple_assign_rhs1 (last_stmt);
1660 oprnd1 = gimple_assign_rhs2 (last_stmt);
1661 itype = TREE_TYPE (oprnd0);
1662 if (TREE_CODE (oprnd0) != SSA_NAME
1663 || TREE_CODE (oprnd1) != INTEGER_CST
1664 || TREE_CODE (itype) != INTEGER_TYPE
1665 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1666 return NULL;
1667
1668 vectype = get_vectype_for_scalar_type (itype);
1669 if (vectype == NULL_TREE)
1670 return NULL;
1671
1672 /* If the target can handle vectorized division or modulo natively,
1673 don't attempt to optimize this. */
1674 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1675 if (optab != unknown_optab)
1676 {
1677 enum machine_mode vec_mode = TYPE_MODE (vectype);
1678 int icode = (int) optab_handler (optab, vec_mode);
1679 if (icode != CODE_FOR_nothing)
1680 return NULL;
1681 }
1682
1683 prec = TYPE_PRECISION (itype);
1684 if (integer_pow2p (oprnd1))
1685 {
1686 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1687 return NULL;
1688
1689 /* Pattern detected. */
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1692 "vect_recog_divmod_pattern: detected: ");
1693
1694 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1695 build_int_cst (itype, 0));
1696 if (rhs_code == TRUNC_DIV_EXPR)
1697 {
1698 tree var = vect_recog_temp_ssa_var (itype, NULL);
1699 tree shift;
1700 def_stmt
1701 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1702 fold_build2 (MINUS_EXPR, itype,
1703 oprnd1,
1704 build_int_cst (itype,
1705 1)),
1706 build_int_cst (itype, 0));
1707 new_pattern_def_seq (stmt_vinfo, def_stmt);
1708 var = vect_recog_temp_ssa_var (itype, NULL);
1709 def_stmt
1710 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1711 gimple_assign_lhs (def_stmt));
1712 append_pattern_def_seq (stmt_vinfo, def_stmt);
1713
1714 shift = build_int_cst (itype, tree_log2 (oprnd1));
1715 pattern_stmt
1716 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1717 vect_recog_temp_ssa_var (itype,
1718 NULL),
1719 var, shift);
1720 }
1721 else
1722 {
1723 tree signmask;
1724 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1725 if (compare_tree_int (oprnd1, 2) == 0)
1726 {
1727 signmask = vect_recog_temp_ssa_var (itype, NULL);
1728 def_stmt
1729 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1730 build_int_cst (itype, 1),
1731 build_int_cst (itype, 0));
1732 append_pattern_def_seq (stmt_vinfo, def_stmt);
1733 }
1734 else
1735 {
1736 tree utype
1737 = build_nonstandard_integer_type (prec, 1);
1738 tree vecutype = get_vectype_for_scalar_type (utype);
1739 tree shift
1740 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1741 - tree_log2 (oprnd1));
1742 tree var = vect_recog_temp_ssa_var (utype, NULL);
1743
1744 def_stmt
1745 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1746 build_int_cst (utype, -1),
1747 build_int_cst (utype, 0));
1748 def_stmt_vinfo
1749 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1750 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 var = vect_recog_temp_ssa_var (utype, NULL);
1754 def_stmt
1755 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1756 gimple_assign_lhs (def_stmt),
1757 shift);
1758 def_stmt_vinfo
1759 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1760 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1761 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1762 append_pattern_def_seq (stmt_vinfo, def_stmt);
1763 signmask = vect_recog_temp_ssa_var (itype, NULL);
1764 def_stmt
1765 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1766 NULL_TREE);
1767 append_pattern_def_seq (stmt_vinfo, def_stmt);
1768 }
1769 def_stmt
1770 = gimple_build_assign_with_ops (PLUS_EXPR,
1771 vect_recog_temp_ssa_var (itype,
1772 NULL),
1773 oprnd0, signmask);
1774 append_pattern_def_seq (stmt_vinfo, def_stmt);
1775 def_stmt
1776 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1777 vect_recog_temp_ssa_var (itype,
1778 NULL),
1779 gimple_assign_lhs (def_stmt),
1780 fold_build2 (MINUS_EXPR, itype,
1781 oprnd1,
1782 build_int_cst (itype,
1783 1)));
1784 append_pattern_def_seq (stmt_vinfo, def_stmt);
1785
1786 pattern_stmt
1787 = gimple_build_assign_with_ops (MINUS_EXPR,
1788 vect_recog_temp_ssa_var (itype,
1789 NULL),
1790 gimple_assign_lhs (def_stmt),
1791 signmask);
1792 }
1793
1794 if (dump_enabled_p ())
1795 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1796 0);
1797
1798 stmts->safe_push (last_stmt);
1799
1800 *type_in = vectype;
1801 *type_out = vectype;
1802 return pattern_stmt;
1803 }
1804
1805 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1806 || integer_zerop (oprnd1)
1807 || prec > HOST_BITS_PER_WIDE_INT)
1808 return NULL;
1809
1810 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1811 return NULL;
1812
1813 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1814
1815 if (TYPE_UNSIGNED (itype))
1816 {
1817 unsigned HOST_WIDE_INT mh, ml;
1818 int pre_shift, post_shift;
1819 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1820 & GET_MODE_MASK (TYPE_MODE (itype));
1821 tree t1, t2, t3, t4;
1822
1823 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1824 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1825 return NULL;
1826
1827 /* Find a suitable multiplier and right shift count
1828 instead of multiplying with D. */
1829 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1830
1831 /* If the suggested multiplier is more than SIZE bits, we can do better
1832 for even divisors, using an initial right shift. */
1833 if (mh != 0 && (d & 1) == 0)
1834 {
1835 pre_shift = floor_log2 (d & -d);
1836 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1837 &ml, &post_shift, &dummy_int);
1838 gcc_assert (!mh);
1839 }
1840 else
1841 pre_shift = 0;
1842
1843 if (mh != 0)
1844 {
1845 if (post_shift - 1 >= prec)
1846 return NULL;
1847
1848 /* t1 = oprnd0 h* ml;
1849 t2 = oprnd0 - t1;
1850 t3 = t2 >> 1;
1851 t4 = t1 + t3;
1852 q = t4 >> (post_shift - 1); */
1853 t1 = vect_recog_temp_ssa_var (itype, NULL);
1854 def_stmt
1855 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1856 build_int_cst (itype, ml));
1857 append_pattern_def_seq (stmt_vinfo, def_stmt);
1858
1859 t2 = vect_recog_temp_ssa_var (itype, NULL);
1860 def_stmt
1861 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1862 append_pattern_def_seq (stmt_vinfo, def_stmt);
1863
1864 t3 = vect_recog_temp_ssa_var (itype, NULL);
1865 def_stmt
1866 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1867 integer_one_node);
1868 append_pattern_def_seq (stmt_vinfo, def_stmt);
1869
1870 t4 = vect_recog_temp_ssa_var (itype, NULL);
1871 def_stmt
1872 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1873
1874 if (post_shift != 1)
1875 {
1876 append_pattern_def_seq (stmt_vinfo, def_stmt);
1877
1878 q = vect_recog_temp_ssa_var (itype, NULL);
1879 pattern_stmt
1880 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1881 build_int_cst (itype,
1882 post_shift
1883 - 1));
1884 }
1885 else
1886 {
1887 q = t4;
1888 pattern_stmt = def_stmt;
1889 }
1890 }
1891 else
1892 {
1893 if (pre_shift >= prec || post_shift >= prec)
1894 return NULL;
1895
1896 /* t1 = oprnd0 >> pre_shift;
1897 t2 = t1 h* ml;
1898 q = t2 >> post_shift; */
1899 if (pre_shift)
1900 {
1901 t1 = vect_recog_temp_ssa_var (itype, NULL);
1902 def_stmt
1903 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1904 build_int_cst (NULL,
1905 pre_shift));
1906 append_pattern_def_seq (stmt_vinfo, def_stmt);
1907 }
1908 else
1909 t1 = oprnd0;
1910
1911 t2 = vect_recog_temp_ssa_var (itype, NULL);
1912 def_stmt
1913 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1914 build_int_cst (itype, ml));
1915
1916 if (post_shift)
1917 {
1918 append_pattern_def_seq (stmt_vinfo, def_stmt);
1919
1920 q = vect_recog_temp_ssa_var (itype, NULL);
1921 def_stmt
1922 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1923 build_int_cst (itype,
1924 post_shift));
1925 }
1926 else
1927 q = t2;
1928
1929 pattern_stmt = def_stmt;
1930 }
1931 }
1932 else
1933 {
1934 unsigned HOST_WIDE_INT ml;
1935 int post_shift;
1936 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1937 unsigned HOST_WIDE_INT abs_d;
1938 bool add = false;
1939 tree t1, t2, t3, t4;
1940
1941 /* Give up for -1. */
1942 if (d == -1)
1943 return NULL;
1944
1945 /* Since d might be INT_MIN, we have to cast to
1946 unsigned HOST_WIDE_INT before negating to avoid
1947 undefined signed overflow. */
1948 abs_d = (d >= 0
1949 ? (unsigned HOST_WIDE_INT) d
1950 : - (unsigned HOST_WIDE_INT) d);
1951
1952 /* n rem d = n rem -d */
1953 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1954 {
1955 d = abs_d;
1956 oprnd1 = build_int_cst (itype, abs_d);
1957 }
1958 else if (HOST_BITS_PER_WIDE_INT >= prec
1959 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1960 /* This case is not handled correctly below. */
1961 return NULL;
1962
1963 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1964 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1965 {
1966 add = true;
1967 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1968 }
1969 if (post_shift >= prec)
1970 return NULL;
1971
1972 /* t1 = oprnd1 h* ml; */
1973 t1 = vect_recog_temp_ssa_var (itype, NULL);
1974 def_stmt
1975 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1976 build_int_cst (itype, ml));
1977 append_pattern_def_seq (stmt_vinfo, def_stmt);
1978
1979 if (add)
1980 {
1981 /* t2 = t1 + oprnd0; */
1982 t2 = vect_recog_temp_ssa_var (itype, NULL);
1983 def_stmt
1984 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1985 append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 }
1987 else
1988 t2 = t1;
1989
1990 if (post_shift)
1991 {
1992 /* t3 = t2 >> post_shift; */
1993 t3 = vect_recog_temp_ssa_var (itype, NULL);
1994 def_stmt
1995 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1996 build_int_cst (itype, post_shift));
1997 append_pattern_def_seq (stmt_vinfo, def_stmt);
1998 }
1999 else
2000 t3 = t2;
2001
2002 /* t4 = oprnd0 >> (prec - 1); */
2003 t4 = vect_recog_temp_ssa_var (itype, NULL);
2004 def_stmt
2005 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2006 build_int_cst (itype, prec - 1));
2007 append_pattern_def_seq (stmt_vinfo, def_stmt);
2008
2009 /* q = t3 - t4; or q = t4 - t3; */
2010 q = vect_recog_temp_ssa_var (itype, NULL);
2011 pattern_stmt
2012 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2013 d < 0 ? t3 : t4);
2014 }
2015
2016 if (rhs_code == TRUNC_MOD_EXPR)
2017 {
2018 tree r, t1;
2019
2020 /* We divided. Now finish by:
2021 t1 = q * oprnd1;
2022 r = oprnd0 - t1; */
2023 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2024
2025 t1 = vect_recog_temp_ssa_var (itype, NULL);
2026 def_stmt
2027 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2028 append_pattern_def_seq (stmt_vinfo, def_stmt);
2029
2030 r = vect_recog_temp_ssa_var (itype, NULL);
2031 pattern_stmt
2032 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2033 }
2034
2035 /* Pattern detected. */
2036 if (dump_enabled_p ())
2037 {
2038 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2039 "vect_recog_divmod_pattern: detected: ");
2040 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2041 }
2042
2043 stmts->safe_push (last_stmt);
2044
2045 *type_in = vectype;
2046 *type_out = vectype;
2047 return pattern_stmt;
2048 }
2049
2050 /* Function vect_recog_mixed_size_cond_pattern
2051
2052 Try to find the following pattern:
2053
2054 type x_t, y_t;
2055 TYPE a_T, b_T, c_T;
2056 loop:
2057 S1 a_T = x_t CMP y_t ? b_T : c_T;
2058
2059 where type 'TYPE' is an integral type which has different size
2060 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2061 than 'type', the constants need to fit into an integer type
2062 with the same width as 'type') or results of conversion from 'type'.
2063
2064 Input:
2065
2066 * LAST_STMT: A stmt from which the pattern search begins.
2067
2068 Output:
2069
2070 * TYPE_IN: The type of the input arguments to the pattern.
2071
2072 * TYPE_OUT: The type of the output of this pattern.
2073
2074 * Return value: A new stmt that will be used to replace the pattern.
2075 Additionally a def_stmt is added.
2076
2077 a_it = x_t CMP y_t ? b_it : c_it;
2078 a_T = (TYPE) a_it; */
2079
2080 static gimple
vect_recog_mixed_size_cond_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2081 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2082 tree *type_out)
2083 {
2084 gimple last_stmt = (*stmts)[0];
2085 tree cond_expr, then_clause, else_clause;
2086 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2087 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2088 enum machine_mode cmpmode;
2089 gimple pattern_stmt, def_stmt;
2090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2091 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2092 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2093 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2094 bool promotion;
2095 tree comp_scalar_type;
2096
2097 if (!is_gimple_assign (last_stmt)
2098 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2099 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2100 return NULL;
2101
2102 cond_expr = gimple_assign_rhs1 (last_stmt);
2103 then_clause = gimple_assign_rhs2 (last_stmt);
2104 else_clause = gimple_assign_rhs3 (last_stmt);
2105
2106 if (!COMPARISON_CLASS_P (cond_expr))
2107 return NULL;
2108
2109 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2110 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2111 if (comp_vectype == NULL_TREE)
2112 return NULL;
2113
2114 type = gimple_expr_type (last_stmt);
2115 if (types_compatible_p (type, comp_scalar_type)
2116 || ((TREE_CODE (then_clause) != INTEGER_CST
2117 || TREE_CODE (else_clause) != INTEGER_CST)
2118 && !INTEGRAL_TYPE_P (comp_scalar_type))
2119 || !INTEGRAL_TYPE_P (type))
2120 return NULL;
2121
2122 if ((TREE_CODE (then_clause) != INTEGER_CST
2123 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2124 &def_stmt0, &promotion))
2125 || (TREE_CODE (else_clause) != INTEGER_CST
2126 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2127 &def_stmt1, &promotion)))
2128 return NULL;
2129
2130 if (orig_type0 && orig_type1
2131 && !types_compatible_p (orig_type0, orig_type1))
2132 return NULL;
2133
2134 if (orig_type0)
2135 {
2136 if (!types_compatible_p (orig_type0, comp_scalar_type))
2137 return NULL;
2138 then_clause = gimple_assign_rhs1 (def_stmt0);
2139 itype = orig_type0;
2140 }
2141
2142 if (orig_type1)
2143 {
2144 if (!types_compatible_p (orig_type1, comp_scalar_type))
2145 return NULL;
2146 else_clause = gimple_assign_rhs1 (def_stmt1);
2147 itype = orig_type1;
2148 }
2149
2150 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2151
2152 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2153 return NULL;
2154
2155 vectype = get_vectype_for_scalar_type (type);
2156 if (vectype == NULL_TREE)
2157 return NULL;
2158
2159 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2160 return NULL;
2161
2162 if (itype == NULL_TREE)
2163 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2164 TYPE_UNSIGNED (type));
2165
2166 if (itype == NULL_TREE
2167 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2168 return NULL;
2169
2170 vecitype = get_vectype_for_scalar_type (itype);
2171 if (vecitype == NULL_TREE)
2172 return NULL;
2173
2174 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2175 return NULL;
2176
2177 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2178 {
2179 if ((TREE_CODE (then_clause) == INTEGER_CST
2180 && !int_fits_type_p (then_clause, itype))
2181 || (TREE_CODE (else_clause) == INTEGER_CST
2182 && !int_fits_type_p (else_clause, itype)))
2183 return NULL;
2184 }
2185
2186 def_stmt
2187 = gimple_build_assign_with_ops (COND_EXPR,
2188 vect_recog_temp_ssa_var (itype, NULL),
2189 unshare_expr (cond_expr),
2190 fold_convert (itype, then_clause),
2191 fold_convert (itype, else_clause));
2192 pattern_stmt
2193 = gimple_build_assign_with_ops (NOP_EXPR,
2194 vect_recog_temp_ssa_var (type, NULL),
2195 gimple_assign_lhs (def_stmt), NULL_TREE);
2196
2197 new_pattern_def_seq (stmt_vinfo, def_stmt);
2198 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2199 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2200 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2201 *type_in = vecitype;
2202 *type_out = vectype;
2203
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2206 "vect_recog_mixed_size_cond_pattern: detected: ");
2207
2208 return pattern_stmt;
2209 }
2210
2211
2212 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2213 true if bool VAR can be optimized that way. */
2214
2215 static bool
check_bool_pattern(tree var,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2216 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2217 {
2218 gimple def_stmt;
2219 enum vect_def_type dt;
2220 tree def, rhs1;
2221 enum tree_code rhs_code;
2222
2223 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2224 &dt))
2225 return false;
2226
2227 if (dt != vect_internal_def)
2228 return false;
2229
2230 if (!is_gimple_assign (def_stmt))
2231 return false;
2232
2233 if (!has_single_use (def))
2234 return false;
2235
2236 rhs1 = gimple_assign_rhs1 (def_stmt);
2237 rhs_code = gimple_assign_rhs_code (def_stmt);
2238 switch (rhs_code)
2239 {
2240 case SSA_NAME:
2241 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2242
2243 CASE_CONVERT:
2244 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2245 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2246 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2247 return false;
2248 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2249
2250 case BIT_NOT_EXPR:
2251 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2252
2253 case BIT_AND_EXPR:
2254 case BIT_IOR_EXPR:
2255 case BIT_XOR_EXPR:
2256 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2257 return false;
2258 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2259 bb_vinfo);
2260
2261 default:
2262 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2263 {
2264 tree vecitype, comp_vectype;
2265
2266 /* If the comparison can throw, then is_gimple_condexpr will be
2267 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2268 if (stmt_could_throw_p (def_stmt))
2269 return false;
2270
2271 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2272 if (comp_vectype == NULL_TREE)
2273 return false;
2274
2275 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2276 {
2277 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2278 tree itype
2279 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2280 vecitype = get_vectype_for_scalar_type (itype);
2281 if (vecitype == NULL_TREE)
2282 return false;
2283 }
2284 else
2285 vecitype = comp_vectype;
2286 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2287 }
2288 return false;
2289 }
2290 }
2291
2292
2293 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2294 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2295 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2296
2297 static tree
adjust_bool_pattern_cast(tree type,tree var)2298 adjust_bool_pattern_cast (tree type, tree var)
2299 {
2300 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2301 gimple cast_stmt, pattern_stmt;
2302
2303 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2304 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2305 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2306 cast_stmt
2307 = gimple_build_assign_with_ops (NOP_EXPR,
2308 vect_recog_temp_ssa_var (type, NULL),
2309 gimple_assign_lhs (pattern_stmt),
2310 NULL_TREE);
2311 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2312 return gimple_assign_lhs (cast_stmt);
2313 }
2314
2315
2316 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2317 recursively. VAR is an SSA_NAME that should be transformed from bool
2318 to a wider integer type, OUT_TYPE is the desired final integer type of
2319 the whole pattern, TRUEVAL should be NULL unless optimizing
2320 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2321 in the then_clause, STMTS is where statements with added pattern stmts
2322 should be pushed to. */
2323
2324 static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,vec<gimple> * stmts)2325 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2326 vec<gimple> *stmts)
2327 {
2328 gimple stmt = SSA_NAME_DEF_STMT (var);
2329 enum tree_code rhs_code, def_rhs_code;
2330 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2331 location_t loc;
2332 gimple pattern_stmt, def_stmt;
2333
2334 rhs1 = gimple_assign_rhs1 (stmt);
2335 rhs2 = gimple_assign_rhs2 (stmt);
2336 rhs_code = gimple_assign_rhs_code (stmt);
2337 loc = gimple_location (stmt);
2338 switch (rhs_code)
2339 {
2340 case SSA_NAME:
2341 CASE_CONVERT:
2342 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2343 itype = TREE_TYPE (irhs1);
2344 pattern_stmt
2345 = gimple_build_assign_with_ops (SSA_NAME,
2346 vect_recog_temp_ssa_var (itype, NULL),
2347 irhs1, NULL_TREE);
2348 break;
2349
2350 case BIT_NOT_EXPR:
2351 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2352 itype = TREE_TYPE (irhs1);
2353 pattern_stmt
2354 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2355 vect_recog_temp_ssa_var (itype, NULL),
2356 irhs1, build_int_cst (itype, 1));
2357 break;
2358
2359 case BIT_AND_EXPR:
2360 /* Try to optimize x = y & (a < b ? 1 : 0); into
2361 x = (a < b ? y : 0);
2362
2363 E.g. for:
2364 bool a_b, b_b, c_b;
2365 TYPE d_T;
2366
2367 S1 a_b = x1 CMP1 y1;
2368 S2 b_b = x2 CMP2 y2;
2369 S3 c_b = a_b & b_b;
2370 S4 d_T = (TYPE) c_b;
2371
2372 we would normally emit:
2373
2374 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2375 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2376 S3' c_T = a_T & b_T;
2377 S4' d_T = c_T;
2378
2379 but we can save one stmt by using the
2380 result of one of the COND_EXPRs in the other COND_EXPR and leave
2381 BIT_AND_EXPR stmt out:
2382
2383 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2384 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2385 S4' f_T = c_T;
2386
2387 At least when VEC_COND_EXPR is implemented using masks
2388 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2389 computes the comparison masks and ands it, in one case with
2390 all ones vector, in the other case with a vector register.
2391 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2392 often more expensive. */
2393 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2394 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2395 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2396 {
2397 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2398 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2399 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2400 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2401 {
2402 gimple tstmt;
2403 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2404 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2405 tstmt = stmts->pop ();
2406 gcc_assert (tstmt == def_stmt);
2407 stmts->quick_push (stmt);
2408 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2409 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2410 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2411 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2412 return irhs2;
2413 }
2414 else
2415 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2416 goto and_ior_xor;
2417 }
2418 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2419 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2420 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2421 {
2422 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2423 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2424 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2425 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2426 {
2427 gimple tstmt;
2428 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2429 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2430 tstmt = stmts->pop ();
2431 gcc_assert (tstmt == def_stmt);
2432 stmts->quick_push (stmt);
2433 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2434 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2435 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2436 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2437 return irhs1;
2438 }
2439 else
2440 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2441 goto and_ior_xor;
2442 }
2443 /* FALLTHRU */
2444 case BIT_IOR_EXPR:
2445 case BIT_XOR_EXPR:
2446 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2447 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2448 and_ior_xor:
2449 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2450 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2451 {
2452 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2453 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2454 int out_prec = TYPE_PRECISION (out_type);
2455 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2456 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2457 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2458 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2459 else
2460 {
2461 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2462 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2463 }
2464 }
2465 itype = TREE_TYPE (irhs1);
2466 pattern_stmt
2467 = gimple_build_assign_with_ops (rhs_code,
2468 vect_recog_temp_ssa_var (itype, NULL),
2469 irhs1, irhs2);
2470 break;
2471
2472 default:
2473 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2474 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2475 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2476 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2477 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2478 {
2479 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2480 itype
2481 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2482 }
2483 else
2484 itype = TREE_TYPE (rhs1);
2485 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2486 if (trueval == NULL_TREE)
2487 trueval = build_int_cst (itype, 1);
2488 else
2489 gcc_checking_assert (useless_type_conversion_p (itype,
2490 TREE_TYPE (trueval)));
2491 pattern_stmt
2492 = gimple_build_assign_with_ops (COND_EXPR,
2493 vect_recog_temp_ssa_var (itype, NULL),
2494 cond_expr, trueval,
2495 build_int_cst (itype, 0));
2496 break;
2497 }
2498
2499 stmts->safe_push (stmt);
2500 gimple_set_location (pattern_stmt, loc);
2501 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2502 return gimple_assign_lhs (pattern_stmt);
2503 }
2504
2505
2506 /* Function vect_recog_bool_pattern
2507
2508 Try to find pattern like following:
2509
2510 bool a_b, b_b, c_b, d_b, e_b;
2511 TYPE f_T;
2512 loop:
2513 S1 a_b = x1 CMP1 y1;
2514 S2 b_b = x2 CMP2 y2;
2515 S3 c_b = a_b & b_b;
2516 S4 d_b = x3 CMP3 y3;
2517 S5 e_b = c_b | d_b;
2518 S6 f_T = (TYPE) e_b;
2519
2520 where type 'TYPE' is an integral type.
2521
2522 Input:
2523
2524 * LAST_STMT: A stmt at the end from which the pattern
2525 search begins, i.e. cast of a bool to
2526 an integer type.
2527
2528 Output:
2529
2530 * TYPE_IN: The type of the input arguments to the pattern.
2531
2532 * TYPE_OUT: The type of the output of this pattern.
2533
2534 * Return value: A new stmt that will be used to replace the pattern.
2535
2536 Assuming size of TYPE is the same as size of all comparisons
2537 (otherwise some casts would be added where needed), the above
2538 sequence we create related pattern stmts:
2539 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2540 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2541 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2542 S5' e_T = c_T | d_T;
2543 S6' f_T = e_T;
2544
2545 Instead of the above S3' we could emit:
2546 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2547 S3' c_T = a_T | b_T;
2548 but the above is more efficient. */
2549
2550 static gimple
vect_recog_bool_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2551 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2552 tree *type_out)
2553 {
2554 gimple last_stmt = stmts->pop ();
2555 enum tree_code rhs_code;
2556 tree var, lhs, rhs, vectype;
2557 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2558 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2559 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2560 gimple pattern_stmt;
2561
2562 if (!is_gimple_assign (last_stmt))
2563 return NULL;
2564
2565 var = gimple_assign_rhs1 (last_stmt);
2566 lhs = gimple_assign_lhs (last_stmt);
2567
2568 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2569 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2570 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2571 return NULL;
2572
2573 rhs_code = gimple_assign_rhs_code (last_stmt);
2574 if (CONVERT_EXPR_CODE_P (rhs_code))
2575 {
2576 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2577 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2578 return NULL;
2579 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2580 if (vectype == NULL_TREE)
2581 return NULL;
2582
2583 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2584 return NULL;
2585
2586 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2587 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2588 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2589 pattern_stmt
2590 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2591 else
2592 pattern_stmt
2593 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2594 *type_out = vectype;
2595 *type_in = vectype;
2596 stmts->safe_push (last_stmt);
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2599 "vect_recog_bool_pattern: detected: ");
2600
2601 return pattern_stmt;
2602 }
2603 else if (rhs_code == SSA_NAME
2604 && STMT_VINFO_DATA_REF (stmt_vinfo))
2605 {
2606 stmt_vec_info pattern_stmt_info;
2607 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2608 gcc_assert (vectype != NULL_TREE);
2609 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2610 return NULL;
2611 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2612 return NULL;
2613
2614 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2615 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2616 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2617 {
2618 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2619 gimple cast_stmt
2620 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2621 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2622 rhs = rhs2;
2623 }
2624 pattern_stmt
2625 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2626 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2627 bb_vinfo);
2628 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2629 STMT_VINFO_DATA_REF (pattern_stmt_info)
2630 = STMT_VINFO_DATA_REF (stmt_vinfo);
2631 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2632 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2633 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2634 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2635 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2636 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2637 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2638 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2639 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2640 *type_out = vectype;
2641 *type_in = vectype;
2642 stmts->safe_push (last_stmt);
2643 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2645 "vect_recog_bool_pattern: detected: ");
2646 return pattern_stmt;
2647 }
2648 else
2649 return NULL;
2650 }
2651
2652
2653 /* Mark statements that are involved in a pattern. */
2654
2655 static inline void
vect_mark_pattern_stmts(gimple orig_stmt,gimple pattern_stmt,tree pattern_vectype)2656 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2657 tree pattern_vectype)
2658 {
2659 stmt_vec_info pattern_stmt_info, def_stmt_info;
2660 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2663 gimple def_stmt;
2664
2665 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2666 if (pattern_stmt_info == NULL)
2667 {
2668 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2669 bb_vinfo);
2670 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2671 }
2672 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2673
2674 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2675 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2676 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2677 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2678 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2679 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2680 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2681 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2682 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2683 {
2684 gimple_stmt_iterator si;
2685 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2686 !gsi_end_p (si); gsi_next (&si))
2687 {
2688 def_stmt = gsi_stmt (si);
2689 def_stmt_info = vinfo_for_stmt (def_stmt);
2690 if (def_stmt_info == NULL)
2691 {
2692 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2693 bb_vinfo);
2694 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2695 }
2696 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2697 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2698 STMT_VINFO_DEF_TYPE (def_stmt_info)
2699 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2700 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2701 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2702 }
2703 }
2704 }
2705
2706 /* Function vect_pattern_recog_1
2707
2708 Input:
2709 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2710 computation pattern.
2711 STMT: A stmt from which the pattern search should start.
2712
2713 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2714 expression that computes the same functionality and can be used to
2715 replace the sequence of stmts that are involved in the pattern.
2716
2717 Output:
2718 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2719 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2720 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2721 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2722 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2723 to the available target pattern.
2724
2725 This function also does some bookkeeping, as explained in the documentation
2726 for vect_recog_pattern. */
2727
2728 static void
vect_pattern_recog_1(vect_recog_func_ptr vect_recog_func,gimple_stmt_iterator si,vec<gimple> * stmts_to_replace)2729 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2730 gimple_stmt_iterator si,
2731 vec<gimple> *stmts_to_replace)
2732 {
2733 gimple stmt = gsi_stmt (si), pattern_stmt;
2734 stmt_vec_info stmt_info;
2735 loop_vec_info loop_vinfo;
2736 tree pattern_vectype;
2737 tree type_in, type_out;
2738 enum tree_code code;
2739 int i;
2740 gimple next;
2741
2742 stmts_to_replace->truncate (0);
2743 stmts_to_replace->quick_push (stmt);
2744 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2745 if (!pattern_stmt)
2746 return;
2747
2748 stmt = stmts_to_replace->last ();
2749 stmt_info = vinfo_for_stmt (stmt);
2750 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2751
2752 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2753 {
2754 /* No need to check target support (already checked by the pattern
2755 recognition function). */
2756 pattern_vectype = type_out ? type_out : type_in;
2757 }
2758 else
2759 {
2760 enum machine_mode vec_mode;
2761 enum insn_code icode;
2762 optab optab;
2763
2764 /* Check target support */
2765 type_in = get_vectype_for_scalar_type (type_in);
2766 if (!type_in)
2767 return;
2768 if (type_out)
2769 type_out = get_vectype_for_scalar_type (type_out);
2770 else
2771 type_out = type_in;
2772 if (!type_out)
2773 return;
2774 pattern_vectype = type_out;
2775
2776 if (is_gimple_assign (pattern_stmt))
2777 code = gimple_assign_rhs_code (pattern_stmt);
2778 else
2779 {
2780 gcc_assert (is_gimple_call (pattern_stmt));
2781 code = CALL_EXPR;
2782 }
2783
2784 optab = optab_for_tree_code (code, type_in, optab_default);
2785 vec_mode = TYPE_MODE (type_in);
2786 if (!optab
2787 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2788 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2789 return;
2790 }
2791
2792 /* Found a vectorizable pattern. */
2793 if (dump_enabled_p ())
2794 {
2795 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2796 "pattern recognized: ");
2797 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2798 }
2799
2800 /* Mark the stmts that are involved in the pattern. */
2801 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2802
2803 /* Patterns cannot be vectorized using SLP, because they change the order of
2804 computation. */
2805 if (loop_vinfo)
2806 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2807 if (next == stmt)
2808 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
2809
2810 /* It is possible that additional pattern stmts are created and inserted in
2811 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2812 relevant statements. */
2813 for (i = 0; stmts_to_replace->iterate (i, &stmt)
2814 && (unsigned) i < (stmts_to_replace->length () - 1);
2815 i++)
2816 {
2817 stmt_info = vinfo_for_stmt (stmt);
2818 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2819 if (dump_enabled_p ())
2820 {
2821 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2822 "additional pattern stmt: ");
2823 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2824 }
2825
2826 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2827 }
2828 }
2829
2830
2831 /* Function vect_pattern_recog
2832
2833 Input:
2834 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2835 computation idioms.
2836
2837 Output - for each computation idiom that is detected we create a new stmt
2838 that provides the same functionality and that can be vectorized. We
2839 also record some information in the struct_stmt_info of the relevant
2840 stmts, as explained below:
2841
2842 At the entry to this function we have the following stmts, with the
2843 following initial value in the STMT_VINFO fields:
2844
2845 stmt in_pattern_p related_stmt vec_stmt
2846 S1: a_i = .... - - -
2847 S2: a_2 = ..use(a_i).. - - -
2848 S3: a_1 = ..use(a_2).. - - -
2849 S4: a_0 = ..use(a_1).. - - -
2850 S5: ... = ..use(a_0).. - - -
2851
2852 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2853 represented by a single stmt. We then:
2854 - create a new stmt S6 equivalent to the pattern (the stmt is not
2855 inserted into the code)
2856 - fill in the STMT_VINFO fields as follows:
2857
2858 in_pattern_p related_stmt vec_stmt
2859 S1: a_i = .... - - -
2860 S2: a_2 = ..use(a_i).. - - -
2861 S3: a_1 = ..use(a_2).. - - -
2862 S4: a_0 = ..use(a_1).. true S6 -
2863 '---> S6: a_new = .... - S4 -
2864 S5: ... = ..use(a_0).. - - -
2865
2866 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2867 to each other through the RELATED_STMT field).
2868
2869 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2870 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2871 remain irrelevant unless used by stmts other than S4.
2872
2873 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2874 (because they are marked as irrelevant). It will vectorize S6, and record
2875 a pointer to the new vector stmt VS6 from S6 (as usual).
2876 S4 will be skipped, and S5 will be vectorized as usual:
2877
2878 in_pattern_p related_stmt vec_stmt
2879 S1: a_i = .... - - -
2880 S2: a_2 = ..use(a_i).. - - -
2881 S3: a_1 = ..use(a_2).. - - -
2882 > VS6: va_new = .... - - -
2883 S4: a_0 = ..use(a_1).. true S6 VS6
2884 '---> S6: a_new = .... - S4 VS6
2885 > VS5: ... = ..vuse(va_new).. - - -
2886 S5: ... = ..use(a_0).. - - -
2887
2888 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2889 elsewhere), and we'll end up with:
2890
2891 VS6: va_new = ....
2892 VS5: ... = ..vuse(va_new)..
2893
2894 In case of more than one pattern statements, e.g., widen-mult with
2895 intermediate type:
2896
2897 S1 a_t = ;
2898 S2 a_T = (TYPE) a_t;
2899 '--> S3: a_it = (interm_type) a_t;
2900 S4 prod_T = a_T * CONST;
2901 '--> S5: prod_T' = a_it w* CONST;
2902
2903 there may be other users of a_T outside the pattern. In that case S2 will
2904 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2905 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2906 be recorded in S3. */
2907
2908 void
vect_pattern_recog(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2909 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2910 {
2911 struct loop *loop;
2912 basic_block *bbs;
2913 unsigned int nbbs;
2914 gimple_stmt_iterator si;
2915 unsigned int i, j;
2916 vect_recog_func_ptr vect_recog_func;
2917 vec<gimple> stmts_to_replace;
2918 stmts_to_replace.create (1);
2919 gimple stmt;
2920
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_NOTE, vect_location,
2923 "=== vect_pattern_recog ===");
2924
2925 if (loop_vinfo)
2926 {
2927 loop = LOOP_VINFO_LOOP (loop_vinfo);
2928 bbs = LOOP_VINFO_BBS (loop_vinfo);
2929 nbbs = loop->num_nodes;
2930 }
2931 else
2932 {
2933 bbs = &BB_VINFO_BB (bb_vinfo);
2934 nbbs = 1;
2935 }
2936
2937 /* Scan through the loop stmts, applying the pattern recognition
2938 functions starting at each stmt visited: */
2939 for (i = 0; i < nbbs; i++)
2940 {
2941 basic_block bb = bbs[i];
2942 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2943 {
2944 if (bb_vinfo && (stmt = gsi_stmt (si))
2945 && vinfo_for_stmt (stmt)
2946 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2947 continue;
2948
2949 /* Scan over all generic vect_recog_xxx_pattern functions. */
2950 for (j = 0; j < NUM_PATTERNS; j++)
2951 {
2952 vect_recog_func = vect_vect_recog_func_ptrs[j];
2953 vect_pattern_recog_1 (vect_recog_func, si,
2954 &stmts_to_replace);
2955 }
2956 }
2957 }
2958
2959 stmts_to_replace.release ();
2960 }
2961