1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
24
25 A short description of if-conversion:
26
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
37
38 Sample transformation:
39
40 INPUT
41 -----
42
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
47
48 <L17>:;
49 goto <bb 3> (<L3>);
50
51 <L1>:;
52
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
58
59 <L19>:;
60 goto <bb 1> (<L0>);
61
62 <L18>:;
63
64 OUTPUT
65 ------
66
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
70
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
76
77 <L19>:;
78 goto <bb 1> (<L0>);
79
80 <L18>:;
81 */
82
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "params.h"
118 #include "cfganal.h"
119
120 /* Only handle PHIs with no more arguments unless we are asked to by
121 simd pragma. */
122 #define MAX_PHI_ARG_NUM \
123 ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
124
125 /* Indicate if new load/store that needs to be predicated is introduced
126 during if conversion. */
127 static bool any_pred_load_store;
128
129 /* Indicate if there are any complicated PHIs that need to be handled in
130 if-conversion. Complicated PHI has more than two arguments and can't
131 be degenerated to two arguments PHI. See more information in comment
132 before phi_convertible_by_degenerating_args. */
133 static bool any_complicated_phi;
134
135 /* Hash for struct innermost_loop_behavior. It depends on the user to
136 free the memory. */
137
138 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
139 {
140 static inline hashval_t hash (const value_type &);
141 static inline bool equal (const value_type &,
142 const compare_type &);
143 };
144
145 inline hashval_t
hash(const value_type & e)146 innermost_loop_behavior_hash::hash (const value_type &e)
147 {
148 hashval_t hash;
149
150 hash = iterative_hash_expr (e->base_address, 0);
151 hash = iterative_hash_expr (e->offset, hash);
152 hash = iterative_hash_expr (e->init, hash);
153 return iterative_hash_expr (e->step, hash);
154 }
155
156 inline bool
equal(const value_type & e1,const compare_type & e2)157 innermost_loop_behavior_hash::equal (const value_type &e1,
158 const compare_type &e2)
159 {
160 if ((e1->base_address && !e2->base_address)
161 || (!e1->base_address && e2->base_address)
162 || (!e1->offset && e2->offset)
163 || (e1->offset && !e2->offset)
164 || (!e1->init && e2->init)
165 || (e1->init && !e2->init)
166 || (!e1->step && e2->step)
167 || (e1->step && !e2->step))
168 return false;
169
170 if (e1->base_address && e2->base_address
171 && !operand_equal_p (e1->base_address, e2->base_address, 0))
172 return false;
173 if (e1->offset && e2->offset
174 && !operand_equal_p (e1->offset, e2->offset, 0))
175 return false;
176 if (e1->init && e2->init
177 && !operand_equal_p (e1->init, e2->init, 0))
178 return false;
179 if (e1->step && e2->step
180 && !operand_equal_p (e1->step, e2->step, 0))
181 return false;
182
183 return true;
184 }
185
186 /* List of basic blocks in if-conversion-suitable order. */
187 static basic_block *ifc_bbs;
188
189 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
190 static hash_map<innermost_loop_behavior_hash,
191 data_reference_p> *innermost_DR_map;
192
193 /* Hash table to store <base reference, DR> pairs. */
194 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
195
196 /* Structure used to predicate basic blocks. This is attached to the
197 ->aux field of the BBs in the loop to be if-converted. */
198 struct bb_predicate {
199
200 /* The condition under which this basic block is executed. */
201 tree predicate;
202
203 /* PREDICATE is gimplified, and the sequence of statements is
204 recorded here, in order to avoid the duplication of computations
205 that occur in previous conditions. See PR44483. */
206 gimple_seq predicate_gimplified_stmts;
207 };
208
209 /* Returns true when the basic block BB has a predicate. */
210
211 static inline bool
bb_has_predicate(basic_block bb)212 bb_has_predicate (basic_block bb)
213 {
214 return bb->aux != NULL;
215 }
216
217 /* Returns the gimplified predicate for basic block BB. */
218
219 static inline tree
bb_predicate(basic_block bb)220 bb_predicate (basic_block bb)
221 {
222 return ((struct bb_predicate *) bb->aux)->predicate;
223 }
224
225 /* Sets the gimplified predicate COND for basic block BB. */
226
227 static inline void
set_bb_predicate(basic_block bb,tree cond)228 set_bb_predicate (basic_block bb, tree cond)
229 {
230 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
231 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
232 || is_gimple_condexpr (cond));
233 ((struct bb_predicate *) bb->aux)->predicate = cond;
234 }
235
236 /* Returns the sequence of statements of the gimplification of the
237 predicate for basic block BB. */
238
239 static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)240 bb_predicate_gimplified_stmts (basic_block bb)
241 {
242 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
243 }
244
245 /* Sets the sequence of statements STMTS of the gimplification of the
246 predicate for basic block BB. */
247
248 static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)249 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
250 {
251 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
252 }
253
254 /* Adds the sequence of statements STMTS to the sequence of statements
255 of the predicate for basic block BB. */
256
257 static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)258 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
259 {
260 /* We might have updated some stmts in STMTS via force_gimple_operand
261 calling fold_stmt and that producing multiple stmts. Delink immediate
262 uses so update_ssa after loop versioning doesn't get confused for
263 the not yet inserted predicates.
264 ??? This should go away once we reliably avoid updating stmts
265 not in any BB. */
266 for (gimple_stmt_iterator gsi = gsi_start (stmts);
267 !gsi_end_p (gsi); gsi_next (&gsi))
268 {
269 gimple *stmt = gsi_stmt (gsi);
270 delink_stmt_imm_use (stmt);
271 gimple_set_modified (stmt, true);
272 }
273 gimple_seq_add_seq_without_update
274 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
275 }
276
277 /* Initializes to TRUE the predicate of basic block BB. */
278
279 static inline void
init_bb_predicate(basic_block bb)280 init_bb_predicate (basic_block bb)
281 {
282 bb->aux = XNEW (struct bb_predicate);
283 set_bb_predicate_gimplified_stmts (bb, NULL);
284 set_bb_predicate (bb, boolean_true_node);
285 }
286
287 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
288
289 static inline void
release_bb_predicate(basic_block bb)290 release_bb_predicate (basic_block bb)
291 {
292 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
293 if (stmts)
294 {
295 /* Ensure that these stmts haven't yet been added to a bb. */
296 if (flag_checking)
297 for (gimple_stmt_iterator i = gsi_start (stmts);
298 !gsi_end_p (i); gsi_next (&i))
299 gcc_assert (! gimple_bb (gsi_stmt (i)));
300
301 /* Discard them. */
302 gimple_seq_discard (stmts);
303 set_bb_predicate_gimplified_stmts (bb, NULL);
304 }
305 }
306
307 /* Free the predicate of basic block BB. */
308
309 static inline void
free_bb_predicate(basic_block bb)310 free_bb_predicate (basic_block bb)
311 {
312 if (!bb_has_predicate (bb))
313 return;
314
315 release_bb_predicate (bb);
316 free (bb->aux);
317 bb->aux = NULL;
318 }
319
320 /* Reinitialize predicate of BB with the true predicate. */
321
322 static inline void
reset_bb_predicate(basic_block bb)323 reset_bb_predicate (basic_block bb)
324 {
325 if (!bb_has_predicate (bb))
326 init_bb_predicate (bb);
327 else
328 {
329 release_bb_predicate (bb);
330 set_bb_predicate (bb, boolean_true_node);
331 }
332 }
333
334 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
335 the expression EXPR. Inserts the statement created for this
336 computation before GSI and leaves the iterator GSI at the same
337 statement. */
338
339 static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)340 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
341 {
342 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
343 gimple *stmt = gimple_build_assign (new_name, expr);
344 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
345 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
346 return new_name;
347 }
348
349 /* Return true when COND is a false predicate. */
350
351 static inline bool
is_false_predicate(tree cond)352 is_false_predicate (tree cond)
353 {
354 return (cond != NULL_TREE
355 && (cond == boolean_false_node
356 || integer_zerop (cond)));
357 }
358
359 /* Return true when COND is a true predicate. */
360
361 static inline bool
is_true_predicate(tree cond)362 is_true_predicate (tree cond)
363 {
364 return (cond == NULL_TREE
365 || cond == boolean_true_node
366 || integer_onep (cond));
367 }
368
369 /* Returns true when BB has a predicate that is not trivial: true or
370 NULL_TREE. */
371
372 static inline bool
is_predicated(basic_block bb)373 is_predicated (basic_block bb)
374 {
375 return !is_true_predicate (bb_predicate (bb));
376 }
377
378 /* Parses the predicate COND and returns its comparison code and
379 operands OP0 and OP1. */
380
381 static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)382 parse_predicate (tree cond, tree *op0, tree *op1)
383 {
384 gimple *s;
385
386 if (TREE_CODE (cond) == SSA_NAME
387 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
388 {
389 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
390 {
391 *op0 = gimple_assign_rhs1 (s);
392 *op1 = gimple_assign_rhs2 (s);
393 return gimple_assign_rhs_code (s);
394 }
395
396 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
397 {
398 tree op = gimple_assign_rhs1 (s);
399 tree type = TREE_TYPE (op);
400 enum tree_code code = parse_predicate (op, op0, op1);
401
402 return code == ERROR_MARK ? ERROR_MARK
403 : invert_tree_comparison (code, HONOR_NANS (type));
404 }
405
406 return ERROR_MARK;
407 }
408
409 if (COMPARISON_CLASS_P (cond))
410 {
411 *op0 = TREE_OPERAND (cond, 0);
412 *op1 = TREE_OPERAND (cond, 1);
413 return TREE_CODE (cond);
414 }
415
416 return ERROR_MARK;
417 }
418
419 /* Returns the fold of predicate C1 OR C2 at location LOC. */
420
421 static tree
fold_or_predicates(location_t loc,tree c1,tree c2)422 fold_or_predicates (location_t loc, tree c1, tree c2)
423 {
424 tree op1a, op1b, op2a, op2b;
425 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
426 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
427
428 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
429 {
430 tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
431 code2, op2a, op2b);
432 if (t)
433 return t;
434 }
435
436 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
437 }
438
439 /* Returns either a COND_EXPR or the folded expression if the folded
440 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
441 a constant or a SSA_NAME. */
442
443 static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)444 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
445 {
446 tree rhs1, lhs1, cond_expr;
447
448 /* If COND is comparison r != 0 and r has boolean type, convert COND
449 to SSA_NAME to accept by vect bool pattern. */
450 if (TREE_CODE (cond) == NE_EXPR)
451 {
452 tree op0 = TREE_OPERAND (cond, 0);
453 tree op1 = TREE_OPERAND (cond, 1);
454 if (TREE_CODE (op0) == SSA_NAME
455 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
456 && (integer_zerop (op1)))
457 cond = op0;
458 }
459 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
460
461 if (cond_expr == NULL_TREE)
462 return build3 (COND_EXPR, type, cond, rhs, lhs);
463
464 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
465
466 if (is_gimple_val (cond_expr))
467 return cond_expr;
468
469 if (TREE_CODE (cond_expr) == ABS_EXPR)
470 {
471 rhs1 = TREE_OPERAND (cond_expr, 1);
472 STRIP_USELESS_TYPE_CONVERSION (rhs1);
473 if (is_gimple_val (rhs1))
474 return build1 (ABS_EXPR, type, rhs1);
475 }
476
477 if (TREE_CODE (cond_expr) == MIN_EXPR
478 || TREE_CODE (cond_expr) == MAX_EXPR)
479 {
480 lhs1 = TREE_OPERAND (cond_expr, 0);
481 STRIP_USELESS_TYPE_CONVERSION (lhs1);
482 rhs1 = TREE_OPERAND (cond_expr, 1);
483 STRIP_USELESS_TYPE_CONVERSION (rhs1);
484 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
485 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
486 }
487 return build3 (COND_EXPR, type, cond, rhs, lhs);
488 }
489
490 /* Add condition NC to the predicate list of basic block BB. LOOP is
491 the loop to be if-converted. Use predicate of cd-equivalent block
492 for join bb if it exists: we call basic blocks bb1 and bb2
493 cd-equivalent if they are executed under the same condition. */
494
495 static inline void
add_to_predicate_list(struct loop * loop,basic_block bb,tree nc)496 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
497 {
498 tree bc, *tp;
499 basic_block dom_bb;
500
501 if (is_true_predicate (nc))
502 return;
503
504 /* If dominance tells us this basic block is always executed,
505 don't record any predicates for it. */
506 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
507 return;
508
509 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
510 /* We use notion of cd equivalence to get simpler predicate for
511 join block, e.g. if join block has 2 predecessors with predicates
512 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
513 p1 & p2 | p1 & !p2. */
514 if (dom_bb != loop->header
515 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
516 {
517 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
518 bc = bb_predicate (dom_bb);
519 if (!is_true_predicate (bc))
520 set_bb_predicate (bb, bc);
521 else
522 gcc_assert (is_true_predicate (bb_predicate (bb)));
523 if (dump_file && (dump_flags & TDF_DETAILS))
524 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
525 dom_bb->index, bb->index);
526 return;
527 }
528
529 if (!is_predicated (bb))
530 bc = nc;
531 else
532 {
533 bc = bb_predicate (bb);
534 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
535 if (is_true_predicate (bc))
536 {
537 reset_bb_predicate (bb);
538 return;
539 }
540 }
541
542 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
543 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
544 tp = &TREE_OPERAND (bc, 0);
545 else
546 tp = &bc;
547 if (!is_gimple_condexpr (*tp))
548 {
549 gimple_seq stmts;
550 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
551 add_bb_predicate_gimplified_stmts (bb, stmts);
552 }
553 set_bb_predicate (bb, bc);
554 }
555
556 /* Add the condition COND to the previous condition PREV_COND, and add
557 this to the predicate list of the destination of edge E. LOOP is
558 the loop to be if-converted. */
559
560 static void
add_to_dst_predicate_list(struct loop * loop,edge e,tree prev_cond,tree cond)561 add_to_dst_predicate_list (struct loop *loop, edge e,
562 tree prev_cond, tree cond)
563 {
564 if (!flow_bb_inside_loop_p (loop, e->dest))
565 return;
566
567 if (!is_true_predicate (prev_cond))
568 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
569 prev_cond, cond);
570
571 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
572 add_to_predicate_list (loop, e->dest, cond);
573 }
574
575 /* Return true if one of the successor edges of BB exits LOOP. */
576
577 static bool
bb_with_exit_edge_p(struct loop * loop,basic_block bb)578 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
579 {
580 edge e;
581 edge_iterator ei;
582
583 FOR_EACH_EDGE (e, ei, bb->succs)
584 if (loop_exit_edge_p (loop, e))
585 return true;
586
587 return false;
588 }
589
590 /* Given PHI which has more than two arguments, this function checks if
591 it's if-convertible by degenerating its arguments. Specifically, if
592 below two conditions are satisfied:
593
594 1) Number of PHI arguments with different values equals to 2 and one
595 argument has the only occurrence.
596 2) The edge corresponding to the unique argument isn't critical edge.
597
598 Such PHI can be handled as PHIs have only two arguments. For example,
599 below PHI:
600
601 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
602
603 can be transformed into:
604
605 res = (predicate of e3) ? A_2 : A_1;
606
607 Return TRUE if it is the case, FALSE otherwise. */
608
609 static bool
phi_convertible_by_degenerating_args(gphi * phi)610 phi_convertible_by_degenerating_args (gphi *phi)
611 {
612 edge e;
613 tree arg, t1 = NULL, t2 = NULL;
614 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
615 unsigned int num_args = gimple_phi_num_args (phi);
616
617 gcc_assert (num_args > 2);
618
619 for (i = 0; i < num_args; i++)
620 {
621 arg = gimple_phi_arg_def (phi, i);
622 if (t1 == NULL || operand_equal_p (t1, arg, 0))
623 {
624 n1++;
625 i1 = i;
626 t1 = arg;
627 }
628 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
629 {
630 n2++;
631 i2 = i;
632 t2 = arg;
633 }
634 else
635 return false;
636 }
637
638 if (n1 != 1 && n2 != 1)
639 return false;
640
641 /* Check if the edge corresponding to the unique arg is critical. */
642 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
643 if (EDGE_COUNT (e->src->succs) > 1)
644 return false;
645
646 return true;
647 }
648
649 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
650 and it belongs to basic block BB. Note at this point, it is sure
651 that PHI is if-convertible. This function updates global variable
652 ANY_COMPLICATED_PHI if PHI is complicated. */
653
654 static bool
if_convertible_phi_p(struct loop * loop,basic_block bb,gphi * phi)655 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
656 {
657 if (dump_file && (dump_flags & TDF_DETAILS))
658 {
659 fprintf (dump_file, "-------------------------\n");
660 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
661 }
662
663 if (bb != loop->header
664 && gimple_phi_num_args (phi) > 2
665 && !phi_convertible_by_degenerating_args (phi))
666 any_complicated_phi = true;
667
668 return true;
669 }
670
671 /* Records the status of a data reference. This struct is attached to
672 each DR->aux field. */
673
674 struct ifc_dr {
675 bool rw_unconditionally;
676 bool w_unconditionally;
677 bool written_at_least_once;
678
679 tree rw_predicate;
680 tree w_predicate;
681 tree base_w_predicate;
682 };
683
684 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
685 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
686 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
687 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
688
689 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
690 HASH tables. While storing them in HASH table, it checks if the
691 reference is unconditionally read or written and stores that as a flag
692 information. For base reference it checks if it is written atlest once
693 unconditionally and stores it as flag information along with DR.
694 In other words for every data reference A in STMT there exist other
695 accesses to a data reference with the same base with predicates that
696 add up (OR-up) to the true predicate: this ensures that the data
697 reference A is touched (read or written) on every iteration of the
698 if-converted loop. */
699 static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)700 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
701 {
702
703 data_reference_p *master_dr, *base_master_dr;
704 tree base_ref = DR_BASE_OBJECT (a);
705 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
706 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
707 bool exist1, exist2;
708
709 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
710 if (!exist1)
711 *master_dr = a;
712
713 if (DR_IS_WRITE (a))
714 {
715 IFC_DR (*master_dr)->w_predicate
716 = fold_or_predicates (UNKNOWN_LOCATION, ca,
717 IFC_DR (*master_dr)->w_predicate);
718 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
719 DR_W_UNCONDITIONALLY (*master_dr) = true;
720 }
721 IFC_DR (*master_dr)->rw_predicate
722 = fold_or_predicates (UNKNOWN_LOCATION, ca,
723 IFC_DR (*master_dr)->rw_predicate);
724 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
725 DR_RW_UNCONDITIONALLY (*master_dr) = true;
726
727 if (DR_IS_WRITE (a))
728 {
729 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
730 if (!exist2)
731 *base_master_dr = a;
732 IFC_DR (*base_master_dr)->base_w_predicate
733 = fold_or_predicates (UNKNOWN_LOCATION, ca,
734 IFC_DR (*base_master_dr)->base_w_predicate);
735 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
736 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
737 }
738 }
739
740 /* Return TRUE if can prove the index IDX of an array reference REF is
741 within array bound. Return false otherwise. */
742
743 static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)744 idx_within_array_bound (tree ref, tree *idx, void *dta)
745 {
746 bool overflow;
747 widest_int niter, valid_niter, delta, wi_step;
748 tree ev, init, step;
749 tree low, high;
750 struct loop *loop = (struct loop*) dta;
751
752 /* Only support within-bound access for array references. */
753 if (TREE_CODE (ref) != ARRAY_REF)
754 return false;
755
756 /* For arrays at the end of the structure, we are not guaranteed that they
757 do not really extend over their declared size. However, for arrays of
758 size greater than one, this is unlikely to be intended. */
759 if (array_at_struct_end_p (ref))
760 return false;
761
762 ev = analyze_scalar_evolution (loop, *idx);
763 ev = instantiate_parameters (loop, ev);
764 init = initial_condition (ev);
765 step = evolution_part_in_loop_num (ev, loop->num);
766
767 if (!init || TREE_CODE (init) != INTEGER_CST
768 || (step && TREE_CODE (step) != INTEGER_CST))
769 return false;
770
771 low = array_ref_low_bound (ref);
772 high = array_ref_up_bound (ref);
773
774 /* The case of nonconstant bounds could be handled, but it would be
775 complicated. */
776 if (TREE_CODE (low) != INTEGER_CST
777 || !high || TREE_CODE (high) != INTEGER_CST)
778 return false;
779
780 /* Check if the intial idx is within bound. */
781 if (wi::to_widest (init) < wi::to_widest (low)
782 || wi::to_widest (init) > wi::to_widest (high))
783 return false;
784
785 /* The idx is always within bound. */
786 if (!step || integer_zerop (step))
787 return true;
788
789 if (!max_loop_iterations (loop, &niter))
790 return false;
791
792 if (wi::to_widest (step) < 0)
793 {
794 delta = wi::to_widest (init) - wi::to_widest (low);
795 wi_step = -wi::to_widest (step);
796 }
797 else
798 {
799 delta = wi::to_widest (high) - wi::to_widest (init);
800 wi_step = wi::to_widest (step);
801 }
802
803 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
804 /* The iteration space of idx is within array bound. */
805 if (!overflow && niter <= valid_niter)
806 return true;
807
808 return false;
809 }
810
811 /* Return TRUE if ref is a within bound array reference. */
812
813 static bool
ref_within_array_bound(gimple * stmt,tree ref)814 ref_within_array_bound (gimple *stmt, tree ref)
815 {
816 struct loop *loop = loop_containing_stmt (stmt);
817
818 gcc_assert (loop != NULL);
819 return for_each_index (&ref, idx_within_array_bound, loop);
820 }
821
822
823 /* Given a memory reference expression T, return TRUE if base object
824 it refers to is writable. The base object of a memory reference
825 is the main object being referenced, which is returned by function
826 get_base_address. */
827
828 static bool
base_object_writable(tree ref)829 base_object_writable (tree ref)
830 {
831 tree base_tree = get_base_address (ref);
832
833 return (base_tree
834 && DECL_P (base_tree)
835 && decl_binds_to_current_def_p (base_tree)
836 && !TREE_READONLY (base_tree));
837 }
838
839 /* Return true when the memory references of STMT won't trap in the
840 if-converted code. There are two things that we have to check for:
841
842 - writes to memory occur to writable memory: if-conversion of
843 memory writes transforms the conditional memory writes into
844 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
845 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
846 be executed at all in the original code, it may be a readonly
847 memory. To check that A is not const-qualified, we check that
848 there exists at least an unconditional write to A in the current
849 function.
850
851 - reads or writes to memory are valid memory accesses for every
852 iteration. To check that the memory accesses are correctly formed
853 and that we are allowed to read and write in these locations, we
854 check that the memory accesses to be if-converted occur at every
855 iteration unconditionally.
856
857 Returns true for the memory reference in STMT, same memory reference
858 is read or written unconditionally atleast once and the base memory
859 reference is written unconditionally once. This is to check reference
860 will not write fault. Also retuns true if the memory reference is
861 unconditionally read once then we are conditionally writing to memory
862 which is defined as read and write and is bound to the definition
863 we are seeing. */
864 static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)865 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
866 {
867 /* If DR didn't see a reference here we can't use it to tell
868 whether the ref traps or not. */
869 if (gimple_uid (stmt) == 0)
870 return false;
871
872 data_reference_p *master_dr, *base_master_dr;
873 data_reference_p a = drs[gimple_uid (stmt) - 1];
874
875 tree base = DR_BASE_OBJECT (a);
876 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
877
878 gcc_assert (DR_STMT (a) == stmt);
879 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
880 || DR_INIT (a) || DR_STEP (a));
881
882 master_dr = innermost_DR_map->get (innermost);
883 gcc_assert (master_dr != NULL);
884
885 base_master_dr = baseref_DR_map->get (base);
886
887 /* If a is unconditionally written to it doesn't trap. */
888 if (DR_W_UNCONDITIONALLY (*master_dr))
889 return true;
890
891 /* If a is unconditionally accessed then ...
892
893 Even a is conditional access, we can treat it as an unconditional
894 one if it's an array reference and all its index are within array
895 bound. */
896 if (DR_RW_UNCONDITIONALLY (*master_dr)
897 || ref_within_array_bound (stmt, DR_REF (a)))
898 {
899 /* an unconditional read won't trap. */
900 if (DR_IS_READ (a))
901 return true;
902
903 /* an unconditionaly write won't trap if the base is written
904 to unconditionally. */
905 if (base_master_dr
906 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
907 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
908 /* or the base is known to be not readonly. */
909 else if (base_object_writable (DR_REF (a)))
910 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
911 }
912
913 return false;
914 }
915
916 /* Return true if STMT could be converted into a masked load or store
917 (conditional load or store based on a mask computed from bb predicate). */
918
919 static bool
ifcvt_can_use_mask_load_store(gimple * stmt)920 ifcvt_can_use_mask_load_store (gimple *stmt)
921 {
922 tree lhs, ref;
923 machine_mode mode;
924 basic_block bb = gimple_bb (stmt);
925 bool is_load;
926
927 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
928 || bb->loop_father->dont_vectorize
929 || !gimple_assign_single_p (stmt)
930 || gimple_has_volatile_ops (stmt))
931 return false;
932
933 /* Check whether this is a load or store. */
934 lhs = gimple_assign_lhs (stmt);
935 if (gimple_store_p (stmt))
936 {
937 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938 return false;
939 is_load = false;
940 ref = lhs;
941 }
942 else if (gimple_assign_load_p (stmt))
943 {
944 is_load = true;
945 ref = gimple_assign_rhs1 (stmt);
946 }
947 else
948 return false;
949
950 if (may_be_nonaddressable_p (ref))
951 return false;
952
953 /* Mask should be integer mode of the same size as the load/store
954 mode. */
955 mode = TYPE_MODE (TREE_TYPE (lhs));
956 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957 return false;
958
959 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960 return true;
961
962 return false;
963 }
964
965 /* Return true when STMT is if-convertible.
966
967 GIMPLE_ASSIGN statement is not if-convertible if,
968 - it is not movable,
969 - it could trap,
970 - LHS is not var decl. */
971
972 static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)973 if_convertible_gimple_assign_stmt_p (gimple *stmt,
974 vec<data_reference_p> refs)
975 {
976 tree lhs = gimple_assign_lhs (stmt);
977
978 if (dump_file && (dump_flags & TDF_DETAILS))
979 {
980 fprintf (dump_file, "-------------------------\n");
981 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
982 }
983
984 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
985 return false;
986
987 /* Some of these constrains might be too conservative. */
988 if (stmt_ends_bb_p (stmt)
989 || gimple_has_volatile_ops (stmt)
990 || (TREE_CODE (lhs) == SSA_NAME
991 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
992 || gimple_has_side_effects (stmt))
993 {
994 if (dump_file && (dump_flags & TDF_DETAILS))
995 fprintf (dump_file, "stmt not suitable for ifcvt\n");
996 return false;
997 }
998
999 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1000 in between if_convertible_loop_p and combine_blocks
1001 we can perform loop versioning. */
1002 gimple_set_plf (stmt, GF_PLF_2, false);
1003
1004 if ((! gimple_vuse (stmt)
1005 || gimple_could_trap_p_1 (stmt, false, false)
1006 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1007 && gimple_could_trap_p (stmt))
1008 {
1009 if (ifcvt_can_use_mask_load_store (stmt))
1010 {
1011 gimple_set_plf (stmt, GF_PLF_2, true);
1012 any_pred_load_store = true;
1013 return true;
1014 }
1015 if (dump_file && (dump_flags & TDF_DETAILS))
1016 fprintf (dump_file, "tree could trap...\n");
1017 return false;
1018 }
1019
1020 /* When if-converting stores force versioning, likewise if we
1021 ended up generating store data races. */
1022 if (gimple_vdef (stmt))
1023 any_pred_load_store = true;
1024
1025 return true;
1026 }
1027
1028 /* Return true when STMT is if-convertible.
1029
1030 A statement is if-convertible if:
1031 - it is an if-convertible GIMPLE_ASSIGN,
1032 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1033 - it is builtins call. */
1034
1035 static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1036 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1037 {
1038 switch (gimple_code (stmt))
1039 {
1040 case GIMPLE_LABEL:
1041 case GIMPLE_DEBUG:
1042 case GIMPLE_COND:
1043 return true;
1044
1045 case GIMPLE_ASSIGN:
1046 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1047
1048 case GIMPLE_CALL:
1049 {
1050 tree fndecl = gimple_call_fndecl (stmt);
1051 if (fndecl)
1052 {
1053 int flags = gimple_call_flags (stmt);
1054 if ((flags & ECF_CONST)
1055 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1056 /* We can only vectorize some builtins at the moment,
1057 so restrict if-conversion to those. */
1058 && DECL_BUILT_IN (fndecl))
1059 return true;
1060 }
1061 return false;
1062 }
1063
1064 default:
1065 /* Don't know what to do with 'em so don't do anything. */
1066 if (dump_file && (dump_flags & TDF_DETAILS))
1067 {
1068 fprintf (dump_file, "don't know what to do\n");
1069 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1070 }
1071 return false;
1072 }
1073
1074 return true;
1075 }
1076
1077 /* Assumes that BB has more than 1 predecessors.
1078 Returns false if at least one successor is not on critical edge
1079 and true otherwise. */
1080
1081 static inline bool
all_preds_critical_p(basic_block bb)1082 all_preds_critical_p (basic_block bb)
1083 {
1084 edge e;
1085 edge_iterator ei;
1086
1087 FOR_EACH_EDGE (e, ei, bb->preds)
1088 if (EDGE_COUNT (e->src->succs) == 1)
1089 return false;
1090 return true;
1091 }
1092
1093 /* Returns true if at least one successor in on critical edge. */
1094 static inline bool
has_pred_critical_p(basic_block bb)1095 has_pred_critical_p (basic_block bb)
1096 {
1097 edge e;
1098 edge_iterator ei;
1099
1100 FOR_EACH_EDGE (e, ei, bb->preds)
1101 if (EDGE_COUNT (e->src->succs) > 1)
1102 return true;
1103 return false;
1104 }
1105
1106 /* Return true when BB is if-convertible. This routine does not check
1107 basic block's statements and phis.
1108
1109 A basic block is not if-convertible if:
1110 - it is non-empty and it is after the exit block (in BFS order),
1111 - it is after the exit block but before the latch,
1112 - its edges are not normal.
1113
1114 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1115 inside LOOP. */
1116
1117 static bool
if_convertible_bb_p(struct loop * loop,basic_block bb,basic_block exit_bb)1118 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1119 {
1120 edge e;
1121 edge_iterator ei;
1122
1123 if (dump_file && (dump_flags & TDF_DETAILS))
1124 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1125
1126 if (EDGE_COUNT (bb->succs) > 2)
1127 return false;
1128
1129 if (exit_bb)
1130 {
1131 if (bb != loop->latch)
1132 {
1133 if (dump_file && (dump_flags & TDF_DETAILS))
1134 fprintf (dump_file, "basic block after exit bb but before latch\n");
1135 return false;
1136 }
1137 else if (!empty_block_p (bb))
1138 {
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1140 fprintf (dump_file, "non empty basic block after exit bb\n");
1141 return false;
1142 }
1143 else if (bb == loop->latch
1144 && bb != exit_bb
1145 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1146 {
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, "latch is not dominated by exit_block\n");
1149 return false;
1150 }
1151 }
1152
1153 /* Be less adventurous and handle only normal edges. */
1154 FOR_EACH_EDGE (e, ei, bb->succs)
1155 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1156 {
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, "Difficult to handle edges\n");
1159 return false;
1160 }
1161
1162 return true;
1163 }
1164
1165 /* Return true when all predecessor blocks of BB are visited. The
1166 VISITED bitmap keeps track of the visited blocks. */
1167
1168 static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1169 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1170 {
1171 edge e;
1172 edge_iterator ei;
1173 FOR_EACH_EDGE (e, ei, bb->preds)
1174 if (!bitmap_bit_p (*visited, e->src->index))
1175 return false;
1176
1177 return true;
1178 }
1179
1180 /* Get body of a LOOP in suitable order for if-conversion. It is
1181 caller's responsibility to deallocate basic block list.
1182 If-conversion suitable order is, breadth first sort (BFS) order
1183 with an additional constraint: select a block only if all its
1184 predecessors are already selected. */
1185
1186 static basic_block *
get_loop_body_in_if_conv_order(const struct loop * loop)1187 get_loop_body_in_if_conv_order (const struct loop *loop)
1188 {
1189 basic_block *blocks, *blocks_in_bfs_order;
1190 basic_block bb;
1191 bitmap visited;
1192 unsigned int index = 0;
1193 unsigned int visited_count = 0;
1194
1195 gcc_assert (loop->num_nodes);
1196 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1197
1198 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1199 visited = BITMAP_ALLOC (NULL);
1200
1201 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1202
1203 index = 0;
1204 while (index < loop->num_nodes)
1205 {
1206 bb = blocks_in_bfs_order [index];
1207
1208 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1209 {
1210 free (blocks_in_bfs_order);
1211 BITMAP_FREE (visited);
1212 free (blocks);
1213 return NULL;
1214 }
1215
1216 if (!bitmap_bit_p (visited, bb->index))
1217 {
1218 if (pred_blocks_visited_p (bb, &visited)
1219 || bb == loop->header)
1220 {
1221 /* This block is now visited. */
1222 bitmap_set_bit (visited, bb->index);
1223 blocks[visited_count++] = bb;
1224 }
1225 }
1226
1227 index++;
1228
1229 if (index == loop->num_nodes
1230 && visited_count != loop->num_nodes)
1231 /* Not done yet. */
1232 index = 0;
1233 }
1234 free (blocks_in_bfs_order);
1235 BITMAP_FREE (visited);
1236 return blocks;
1237 }
1238
1239 /* Returns true when the analysis of the predicates for all the basic
1240 blocks in LOOP succeeded.
1241
1242 predicate_bbs first allocates the predicates of the basic blocks.
1243 These fields are then initialized with the tree expressions
1244 representing the predicates under which a basic block is executed
1245 in the LOOP. As the loop->header is executed at each iteration, it
1246 has the "true" predicate. Other statements executed under a
1247 condition are predicated with that condition, for example
1248
1249 | if (x)
1250 | S1;
1251 | else
1252 | S2;
1253
1254 S1 will be predicated with "x", and
1255 S2 will be predicated with "!x". */
1256
1257 static void
predicate_bbs(loop_p loop)1258 predicate_bbs (loop_p loop)
1259 {
1260 unsigned int i;
1261
1262 for (i = 0; i < loop->num_nodes; i++)
1263 init_bb_predicate (ifc_bbs[i]);
1264
1265 for (i = 0; i < loop->num_nodes; i++)
1266 {
1267 basic_block bb = ifc_bbs[i];
1268 tree cond;
1269 gimple *stmt;
1270
1271 /* The loop latch and loop exit block are always executed and
1272 have no extra conditions to be processed: skip them. */
1273 if (bb == loop->latch
1274 || bb_with_exit_edge_p (loop, bb))
1275 {
1276 reset_bb_predicate (bb);
1277 continue;
1278 }
1279
1280 cond = bb_predicate (bb);
1281 stmt = last_stmt (bb);
1282 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1283 {
1284 tree c2;
1285 edge true_edge, false_edge;
1286 location_t loc = gimple_location (stmt);
1287 tree c = build2_loc (loc, gimple_cond_code (stmt),
1288 boolean_type_node,
1289 gimple_cond_lhs (stmt),
1290 gimple_cond_rhs (stmt));
1291
1292 /* Add new condition into destination's predicate list. */
1293 extract_true_false_edges_from_block (gimple_bb (stmt),
1294 &true_edge, &false_edge);
1295
1296 /* If C is true, then TRUE_EDGE is taken. */
1297 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1298 unshare_expr (c));
1299
1300 /* If C is false, then FALSE_EDGE is taken. */
1301 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1302 unshare_expr (c));
1303 add_to_dst_predicate_list (loop, false_edge,
1304 unshare_expr (cond), c2);
1305
1306 cond = NULL_TREE;
1307 }
1308
1309 /* If current bb has only one successor, then consider it as an
1310 unconditional goto. */
1311 if (single_succ_p (bb))
1312 {
1313 basic_block bb_n = single_succ (bb);
1314
1315 /* The successor bb inherits the predicate of its
1316 predecessor. If there is no predicate in the predecessor
1317 bb, then consider the successor bb as always executed. */
1318 if (cond == NULL_TREE)
1319 cond = boolean_true_node;
1320
1321 add_to_predicate_list (loop, bb_n, cond);
1322 }
1323 }
1324
1325 /* The loop header is always executed. */
1326 reset_bb_predicate (loop->header);
1327 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1328 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1329 }
1330
1331 /* Build region by adding loop pre-header and post-header blocks. */
1332
1333 static vec<basic_block>
build_region(struct loop * loop)1334 build_region (struct loop *loop)
1335 {
1336 vec<basic_block> region = vNULL;
1337 basic_block exit_bb = NULL;
1338
1339 gcc_assert (ifc_bbs);
1340 /* The first element is loop pre-header. */
1341 region.safe_push (loop_preheader_edge (loop)->src);
1342
1343 for (unsigned int i = 0; i < loop->num_nodes; i++)
1344 {
1345 basic_block bb = ifc_bbs[i];
1346 region.safe_push (bb);
1347 /* Find loop postheader. */
1348 edge e;
1349 edge_iterator ei;
1350 FOR_EACH_EDGE (e, ei, bb->succs)
1351 if (loop_exit_edge_p (loop, e))
1352 {
1353 exit_bb = e->dest;
1354 break;
1355 }
1356 }
1357 /* The last element is loop post-header. */
1358 gcc_assert (exit_bb);
1359 region.safe_push (exit_bb);
1360 return region;
1361 }
1362
1363 /* Return true when LOOP is if-convertible. This is a helper function
1364 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1365 in if_convertible_loop_p. */
1366
1367 static bool
if_convertible_loop_p_1(struct loop * loop,vec<data_reference_p> * refs)1368 if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
1369 {
1370 unsigned int i;
1371 basic_block exit_bb = NULL;
1372 vec<basic_block> region;
1373
1374 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1375 return false;
1376
1377 calculate_dominance_info (CDI_DOMINATORS);
1378
1379 /* Allow statements that can be handled during if-conversion. */
1380 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1381 if (!ifc_bbs)
1382 {
1383 if (dump_file && (dump_flags & TDF_DETAILS))
1384 fprintf (dump_file, "Irreducible loop\n");
1385 return false;
1386 }
1387
1388 for (i = 0; i < loop->num_nodes; i++)
1389 {
1390 basic_block bb = ifc_bbs[i];
1391
1392 if (!if_convertible_bb_p (loop, bb, exit_bb))
1393 return false;
1394
1395 if (bb_with_exit_edge_p (loop, bb))
1396 exit_bb = bb;
1397 }
1398
1399 for (i = 0; i < loop->num_nodes; i++)
1400 {
1401 basic_block bb = ifc_bbs[i];
1402 gimple_stmt_iterator gsi;
1403
1404 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1405 switch (gimple_code (gsi_stmt (gsi)))
1406 {
1407 case GIMPLE_LABEL:
1408 case GIMPLE_ASSIGN:
1409 case GIMPLE_CALL:
1410 case GIMPLE_DEBUG:
1411 case GIMPLE_COND:
1412 gimple_set_uid (gsi_stmt (gsi), 0);
1413 break;
1414 default:
1415 return false;
1416 }
1417 }
1418
1419 data_reference_p dr;
1420
1421 innermost_DR_map
1422 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1423 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1424
1425 /* Compute post-dominator tree locally. */
1426 region = build_region (loop);
1427 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1428
1429 predicate_bbs (loop);
1430
1431 /* Free post-dominator tree since it is not used after predication. */
1432 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1433 region.release ();
1434
1435 for (i = 0; refs->iterate (i, &dr); i++)
1436 {
1437 tree ref = DR_REF (dr);
1438
1439 dr->aux = XNEW (struct ifc_dr);
1440 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1441 DR_RW_UNCONDITIONALLY (dr) = false;
1442 DR_W_UNCONDITIONALLY (dr) = false;
1443 IFC_DR (dr)->rw_predicate = boolean_false_node;
1444 IFC_DR (dr)->w_predicate = boolean_false_node;
1445 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1446 if (gimple_uid (DR_STMT (dr)) == 0)
1447 gimple_set_uid (DR_STMT (dr), i + 1);
1448
1449 /* If DR doesn't have innermost loop behavior or it's a compound
1450 memory reference, we synthesize its innermost loop behavior
1451 for hashing. */
1452 if (TREE_CODE (ref) == COMPONENT_REF
1453 || TREE_CODE (ref) == IMAGPART_EXPR
1454 || TREE_CODE (ref) == REALPART_EXPR
1455 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1456 || DR_INIT (dr) || DR_STEP (dr)))
1457 {
1458 while (TREE_CODE (ref) == COMPONENT_REF
1459 || TREE_CODE (ref) == IMAGPART_EXPR
1460 || TREE_CODE (ref) == REALPART_EXPR)
1461 ref = TREE_OPERAND (ref, 0);
1462
1463 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1464 DR_BASE_ADDRESS (dr) = ref;
1465 }
1466 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1467 }
1468
1469 for (i = 0; i < loop->num_nodes; i++)
1470 {
1471 basic_block bb = ifc_bbs[i];
1472 gimple_stmt_iterator itr;
1473
1474 /* Check the if-convertibility of statements in predicated BBs. */
1475 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1476 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1477 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1478 return false;
1479 }
1480
1481 /* Checking PHIs needs to be done after stmts, as the fact whether there
1482 are any masked loads or stores affects the tests. */
1483 for (i = 0; i < loop->num_nodes; i++)
1484 {
1485 basic_block bb = ifc_bbs[i];
1486 gphi_iterator itr;
1487
1488 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1489 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1490 return false;
1491 }
1492
1493 if (dump_file)
1494 fprintf (dump_file, "Applying if-conversion\n");
1495
1496 return true;
1497 }
1498
1499 /* Return true when LOOP is if-convertible.
1500 LOOP is if-convertible if:
1501 - it is innermost,
1502 - it has two or more basic blocks,
1503 - it has only one exit,
1504 - loop header is not the exit edge,
1505 - if its basic blocks and phi nodes are if convertible. */
1506
1507 static bool
if_convertible_loop_p(struct loop * loop)1508 if_convertible_loop_p (struct loop *loop)
1509 {
1510 edge e;
1511 edge_iterator ei;
1512 bool res = false;
1513 vec<data_reference_p> refs;
1514
1515 /* Handle only innermost loop. */
1516 if (!loop || loop->inner)
1517 {
1518 if (dump_file && (dump_flags & TDF_DETAILS))
1519 fprintf (dump_file, "not innermost loop\n");
1520 return false;
1521 }
1522
1523 /* If only one block, no need for if-conversion. */
1524 if (loop->num_nodes <= 2)
1525 {
1526 if (dump_file && (dump_flags & TDF_DETAILS))
1527 fprintf (dump_file, "less than 2 basic blocks\n");
1528 return false;
1529 }
1530
1531 /* More than one loop exit is too much to handle. */
1532 if (!single_exit (loop))
1533 {
1534 if (dump_file && (dump_flags & TDF_DETAILS))
1535 fprintf (dump_file, "multiple exits\n");
1536 return false;
1537 }
1538
1539 /* If one of the loop header's edge is an exit edge then do not
1540 apply if-conversion. */
1541 FOR_EACH_EDGE (e, ei, loop->header->succs)
1542 if (loop_exit_edge_p (loop, e))
1543 return false;
1544
1545 refs.create (5);
1546 res = if_convertible_loop_p_1 (loop, &refs);
1547
1548 data_reference_p dr;
1549 unsigned int i;
1550 for (i = 0; refs.iterate (i, &dr); i++)
1551 free (dr->aux);
1552
1553 free_data_refs (refs);
1554
1555 delete innermost_DR_map;
1556 innermost_DR_map = NULL;
1557
1558 delete baseref_DR_map;
1559 baseref_DR_map = NULL;
1560
1561 return res;
1562 }
1563
1564 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1565 which is in predicated basic block.
1566 In fact, the following PHI pattern is searching:
1567 loop-header:
1568 reduc_1 = PHI <..., reduc_2>
1569 ...
1570 if (...)
1571 reduc_3 = ...
1572 reduc_2 = PHI <reduc_1, reduc_3>
1573
1574 ARG_0 and ARG_1 are correspondent PHI arguments.
1575 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1576 EXTENDED is true if PHI has > 2 arguments. */
1577
1578 static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended)1579 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1580 tree *op0, tree *op1, bool extended)
1581 {
1582 tree lhs, r_op1, r_op2;
1583 gimple *stmt;
1584 gimple *header_phi = NULL;
1585 enum tree_code reduction_op;
1586 basic_block bb = gimple_bb (phi);
1587 struct loop *loop = bb->loop_father;
1588 edge latch_e = loop_latch_edge (loop);
1589 imm_use_iterator imm_iter;
1590 use_operand_p use_p;
1591 edge e;
1592 edge_iterator ei;
1593 bool result = false;
1594 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1595 return false;
1596
1597 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1598 {
1599 lhs = arg_1;
1600 header_phi = SSA_NAME_DEF_STMT (arg_0);
1601 stmt = SSA_NAME_DEF_STMT (arg_1);
1602 }
1603 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1604 {
1605 lhs = arg_0;
1606 header_phi = SSA_NAME_DEF_STMT (arg_1);
1607 stmt = SSA_NAME_DEF_STMT (arg_0);
1608 }
1609 else
1610 return false;
1611 if (gimple_bb (header_phi) != loop->header)
1612 return false;
1613
1614 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1615 return false;
1616
1617 if (gimple_code (stmt) != GIMPLE_ASSIGN
1618 || gimple_has_volatile_ops (stmt))
1619 return false;
1620
1621 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1622 return false;
1623
1624 if (!is_predicated (gimple_bb (stmt)))
1625 return false;
1626
1627 /* Check that stmt-block is predecessor of phi-block. */
1628 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1629 if (e->dest == bb)
1630 {
1631 result = true;
1632 break;
1633 }
1634 if (!result)
1635 return false;
1636
1637 if (!has_single_use (lhs))
1638 return false;
1639
1640 reduction_op = gimple_assign_rhs_code (stmt);
1641 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1642 return false;
1643 r_op1 = gimple_assign_rhs1 (stmt);
1644 r_op2 = gimple_assign_rhs2 (stmt);
1645
1646 /* Make R_OP1 to hold reduction variable. */
1647 if (r_op2 == PHI_RESULT (header_phi)
1648 && reduction_op == PLUS_EXPR)
1649 std::swap (r_op1, r_op2);
1650 else if (r_op1 != PHI_RESULT (header_phi))
1651 return false;
1652
1653 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1654 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1655 {
1656 gimple *use_stmt = USE_STMT (use_p);
1657 if (is_gimple_debug (use_stmt))
1658 continue;
1659 if (use_stmt == stmt)
1660 continue;
1661 if (gimple_code (use_stmt) != GIMPLE_PHI)
1662 return false;
1663 }
1664
1665 *op0 = r_op1; *op1 = r_op2;
1666 *reduc = stmt;
1667 return true;
1668 }
1669
1670 /* Converts conditional scalar reduction into unconditional form, e.g.
1671 bb_4
1672 if (_5 != 0) goto bb_5 else goto bb_6
1673 end_bb_4
1674 bb_5
1675 res_6 = res_13 + 1;
1676 end_bb_5
1677 bb_6
1678 # res_2 = PHI <res_13(4), res_6(5)>
1679 end_bb_6
1680
1681 will be converted into sequence
1682 _ifc__1 = _5 != 0 ? 1 : 0;
1683 res_2 = res_13 + _ifc__1;
1684 Argument SWAP tells that arguments of conditional expression should be
1685 swapped.
1686 Returns rhs of resulting PHI assignment. */
1687
1688 static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap)1689 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1690 tree cond, tree op0, tree op1, bool swap)
1691 {
1692 gimple_stmt_iterator stmt_it;
1693 gimple *new_assign;
1694 tree rhs;
1695 tree rhs1 = gimple_assign_rhs1 (reduc);
1696 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1697 tree c;
1698 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1699
1700 if (dump_file && (dump_flags & TDF_DETAILS))
1701 {
1702 fprintf (dump_file, "Found cond scalar reduction.\n");
1703 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1704 }
1705
1706 /* Build cond expression using COND and constant operand
1707 of reduction rhs. */
1708 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1709 unshare_expr (cond),
1710 swap ? zero : op1,
1711 swap ? op1 : zero);
1712
1713 /* Create assignment stmt and insert it at GSI. */
1714 new_assign = gimple_build_assign (tmp, c);
1715 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1716 /* Build rhs for unconditional increment/decrement. */
1717 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1718 TREE_TYPE (rhs1), op0, tmp);
1719
1720 /* Delete original reduction stmt. */
1721 stmt_it = gsi_for_stmt (reduc);
1722 gsi_remove (&stmt_it, true);
1723 release_defs (reduc);
1724 return rhs;
1725 }
1726
1727 /* Produce condition for all occurrences of ARG in PHI node. */
1728
1729 static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1730 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1731 gimple_stmt_iterator *gsi)
1732 {
1733 int len;
1734 int i;
1735 tree cond = NULL_TREE;
1736 tree c;
1737 edge e;
1738
1739 len = occur->length ();
1740 gcc_assert (len > 0);
1741 for (i = 0; i < len; i++)
1742 {
1743 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1744 c = bb_predicate (e->src);
1745 if (is_true_predicate (c))
1746 {
1747 cond = c;
1748 break;
1749 }
1750 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1751 is_gimple_condexpr, NULL_TREE,
1752 true, GSI_SAME_STMT);
1753 if (cond != NULL_TREE)
1754 {
1755 /* Must build OR expression. */
1756 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1757 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1758 is_gimple_condexpr, NULL_TREE,
1759 true, GSI_SAME_STMT);
1760 }
1761 else
1762 cond = c;
1763 }
1764 gcc_assert (cond != NULL_TREE);
1765 return cond;
1766 }
1767
1768 /* Local valueization callback that follows all-use SSA edges. */
1769
1770 static tree
ifcvt_follow_ssa_use_edges(tree val)1771 ifcvt_follow_ssa_use_edges (tree val)
1772 {
1773 return val;
1774 }
1775
1776 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1777 This routine can handle PHI nodes with more than two arguments.
1778
1779 For example,
1780 S1: A = PHI <x1(1), x2(5)>
1781 is converted into,
1782 S2: A = cond ? x1 : x2;
1783
1784 The generated code is inserted at GSI that points to the top of
1785 basic block's statement list.
1786 If PHI node has more than two arguments a chain of conditional
1787 expression is produced. */
1788
1789
1790 static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1791 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1792 {
1793 gimple *new_stmt = NULL, *reduc;
1794 tree rhs, res, arg0, arg1, op0, op1, scev;
1795 tree cond;
1796 unsigned int index0;
1797 unsigned int max, args_len;
1798 edge e;
1799 basic_block bb;
1800 unsigned int i;
1801
1802 res = gimple_phi_result (phi);
1803 if (virtual_operand_p (res))
1804 return;
1805
1806 if ((rhs = degenerate_phi_result (phi))
1807 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1808 res))
1809 && !chrec_contains_undetermined (scev)
1810 && scev != res
1811 && (rhs = gimple_phi_arg_def (phi, 0))))
1812 {
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1814 {
1815 fprintf (dump_file, "Degenerate phi!\n");
1816 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1817 }
1818 new_stmt = gimple_build_assign (res, rhs);
1819 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1820 update_stmt (new_stmt);
1821 return;
1822 }
1823
1824 bb = gimple_bb (phi);
1825 if (EDGE_COUNT (bb->preds) == 2)
1826 {
1827 /* Predicate ordinary PHI node with 2 arguments. */
1828 edge first_edge, second_edge;
1829 basic_block true_bb;
1830 first_edge = EDGE_PRED (bb, 0);
1831 second_edge = EDGE_PRED (bb, 1);
1832 cond = bb_predicate (first_edge->src);
1833 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1834 std::swap (first_edge, second_edge);
1835 if (EDGE_COUNT (first_edge->src->succs) > 1)
1836 {
1837 cond = bb_predicate (second_edge->src);
1838 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1839 cond = TREE_OPERAND (cond, 0);
1840 else
1841 first_edge = second_edge;
1842 }
1843 else
1844 cond = bb_predicate (first_edge->src);
1845 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1846 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1847 is_gimple_condexpr, NULL_TREE,
1848 true, GSI_SAME_STMT);
1849 true_bb = first_edge->src;
1850 if (EDGE_PRED (bb, 1)->src == true_bb)
1851 {
1852 arg0 = gimple_phi_arg_def (phi, 1);
1853 arg1 = gimple_phi_arg_def (phi, 0);
1854 }
1855 else
1856 {
1857 arg0 = gimple_phi_arg_def (phi, 0);
1858 arg1 = gimple_phi_arg_def (phi, 1);
1859 }
1860 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1861 &op0, &op1, false))
1862 /* Convert reduction stmt into vectorizable form. */
1863 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1864 true_bb != gimple_bb (reduc));
1865 else
1866 /* Build new RHS using selected condition and arguments. */
1867 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1868 arg0, arg1);
1869 new_stmt = gimple_build_assign (res, rhs);
1870 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1871 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1872 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1873 {
1874 new_stmt = gsi_stmt (new_gsi);
1875 update_stmt (new_stmt);
1876 }
1877
1878 if (dump_file && (dump_flags & TDF_DETAILS))
1879 {
1880 fprintf (dump_file, "new phi replacement stmt\n");
1881 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1882 }
1883 return;
1884 }
1885
1886 /* Create hashmap for PHI node which contain vector of argument indexes
1887 having the same value. */
1888 bool swap = false;
1889 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1890 unsigned int num_args = gimple_phi_num_args (phi);
1891 int max_ind = -1;
1892 /* Vector of different PHI argument values. */
1893 auto_vec<tree> args (num_args);
1894
1895 /* Compute phi_arg_map. */
1896 for (i = 0; i < num_args; i++)
1897 {
1898 tree arg;
1899
1900 arg = gimple_phi_arg_def (phi, i);
1901 if (!phi_arg_map.get (arg))
1902 args.quick_push (arg);
1903 phi_arg_map.get_or_insert (arg).safe_push (i);
1904 }
1905
1906 /* Determine element with max number of occurrences. */
1907 max_ind = -1;
1908 max = 1;
1909 args_len = args.length ();
1910 for (i = 0; i < args_len; i++)
1911 {
1912 unsigned int len;
1913 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1914 {
1915 max_ind = (int) i;
1916 max = len;
1917 }
1918 }
1919
1920 /* Put element with max number of occurences to the end of ARGS. */
1921 if (max_ind != -1 && max_ind +1 != (int) args_len)
1922 std::swap (args[args_len - 1], args[max_ind]);
1923
1924 /* Handle one special case when number of arguments with different values
1925 is equal 2 and one argument has the only occurrence. Such PHI can be
1926 handled as if would have only 2 arguments. */
1927 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1928 {
1929 vec<int> *indexes;
1930 indexes = phi_arg_map.get (args[0]);
1931 index0 = (*indexes)[0];
1932 arg0 = args[0];
1933 arg1 = args[1];
1934 e = gimple_phi_arg_edge (phi, index0);
1935 cond = bb_predicate (e->src);
1936 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1937 {
1938 swap = true;
1939 cond = TREE_OPERAND (cond, 0);
1940 }
1941 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1942 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1943 is_gimple_condexpr, NULL_TREE,
1944 true, GSI_SAME_STMT);
1945 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1946 &op0, &op1, true)))
1947 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1948 swap? arg1 : arg0,
1949 swap? arg0 : arg1);
1950 else
1951 /* Convert reduction stmt into vectorizable form. */
1952 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1953 swap);
1954 new_stmt = gimple_build_assign (res, rhs);
1955 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1956 update_stmt (new_stmt);
1957 }
1958 else
1959 {
1960 /* Common case. */
1961 vec<int> *indexes;
1962 tree type = TREE_TYPE (gimple_phi_result (phi));
1963 tree lhs;
1964 arg1 = args[1];
1965 for (i = 0; i < args_len; i++)
1966 {
1967 arg0 = args[i];
1968 indexes = phi_arg_map.get (args[i]);
1969 if (i != args_len - 1)
1970 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1971 else
1972 lhs = res;
1973 cond = gen_phi_arg_condition (phi, indexes, gsi);
1974 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1975 arg0, arg1);
1976 new_stmt = gimple_build_assign (lhs, rhs);
1977 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1978 update_stmt (new_stmt);
1979 arg1 = lhs;
1980 }
1981 }
1982
1983 if (dump_file && (dump_flags & TDF_DETAILS))
1984 {
1985 fprintf (dump_file, "new extended phi replacement stmt\n");
1986 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1987 }
1988 }
1989
1990 /* Replaces in LOOP all the scalar phi nodes other than those in the
1991 LOOP->header block with conditional modify expressions. */
1992
1993 static void
predicate_all_scalar_phis(struct loop * loop)1994 predicate_all_scalar_phis (struct loop *loop)
1995 {
1996 basic_block bb;
1997 unsigned int orig_loop_num_nodes = loop->num_nodes;
1998 unsigned int i;
1999
2000 for (i = 1; i < orig_loop_num_nodes; i++)
2001 {
2002 gphi *phi;
2003 gimple_stmt_iterator gsi;
2004 gphi_iterator phi_gsi;
2005 bb = ifc_bbs[i];
2006
2007 if (bb == loop->header)
2008 continue;
2009
2010 phi_gsi = gsi_start_phis (bb);
2011 if (gsi_end_p (phi_gsi))
2012 continue;
2013
2014 gsi = gsi_after_labels (bb);
2015 while (!gsi_end_p (phi_gsi))
2016 {
2017 phi = phi_gsi.phi ();
2018 if (virtual_operand_p (gimple_phi_result (phi)))
2019 gsi_next (&phi_gsi);
2020 else
2021 {
2022 predicate_scalar_phi (phi, &gsi);
2023 remove_phi_node (&phi_gsi, false);
2024 }
2025 }
2026 }
2027 }
2028
2029 /* Insert in each basic block of LOOP the statements produced by the
2030 gimplification of the predicates. */
2031
2032 static void
insert_gimplified_predicates(loop_p loop)2033 insert_gimplified_predicates (loop_p loop)
2034 {
2035 unsigned int i;
2036
2037 for (i = 0; i < loop->num_nodes; i++)
2038 {
2039 basic_block bb = ifc_bbs[i];
2040 gimple_seq stmts;
2041 if (!is_predicated (bb))
2042 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2043 if (!is_predicated (bb))
2044 {
2045 /* Do not insert statements for a basic block that is not
2046 predicated. Also make sure that the predicate of the
2047 basic block is set to true. */
2048 reset_bb_predicate (bb);
2049 continue;
2050 }
2051
2052 stmts = bb_predicate_gimplified_stmts (bb);
2053 if (stmts)
2054 {
2055 if (any_pred_load_store)
2056 {
2057 /* Insert the predicate of the BB just after the label,
2058 as the if-conversion of memory writes will use this
2059 predicate. */
2060 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2061 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2062 }
2063 else
2064 {
2065 /* Insert the predicate of the BB at the end of the BB
2066 as this would reduce the register pressure: the only
2067 use of this predicate will be in successor BBs. */
2068 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2069
2070 if (gsi_end_p (gsi)
2071 || stmt_ends_bb_p (gsi_stmt (gsi)))
2072 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2073 else
2074 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2075 }
2076
2077 /* Once the sequence is code generated, set it to NULL. */
2078 set_bb_predicate_gimplified_stmts (bb, NULL);
2079 }
2080 }
2081 }
2082
2083 /* Helper function for predicate_mem_writes. Returns index of existent
2084 mask if it was created for given SIZE and -1 otherwise. */
2085
2086 static int
mask_exists(int size,vec<int> vec)2087 mask_exists (int size, vec<int> vec)
2088 {
2089 unsigned int ix;
2090 int v;
2091 FOR_EACH_VEC_ELT (vec, ix, v)
2092 if (v == size)
2093 return (int) ix;
2094 return -1;
2095 }
2096
2097 /* Predicate each write to memory in LOOP.
2098
2099 This function transforms control flow constructs containing memory
2100 writes of the form:
2101
2102 | for (i = 0; i < N; i++)
2103 | if (cond)
2104 | A[i] = expr;
2105
2106 into the following form that does not contain control flow:
2107
2108 | for (i = 0; i < N; i++)
2109 | A[i] = cond ? expr : A[i];
2110
2111 The original CFG looks like this:
2112
2113 | bb_0
2114 | i = 0
2115 | end_bb_0
2116 |
2117 | bb_1
2118 | if (i < N) goto bb_5 else goto bb_2
2119 | end_bb_1
2120 |
2121 | bb_2
2122 | cond = some_computation;
2123 | if (cond) goto bb_3 else goto bb_4
2124 | end_bb_2
2125 |
2126 | bb_3
2127 | A[i] = expr;
2128 | goto bb_4
2129 | end_bb_3
2130 |
2131 | bb_4
2132 | goto bb_1
2133 | end_bb_4
2134
2135 insert_gimplified_predicates inserts the computation of the COND
2136 expression at the beginning of the destination basic block:
2137
2138 | bb_0
2139 | i = 0
2140 | end_bb_0
2141 |
2142 | bb_1
2143 | if (i < N) goto bb_5 else goto bb_2
2144 | end_bb_1
2145 |
2146 | bb_2
2147 | cond = some_computation;
2148 | if (cond) goto bb_3 else goto bb_4
2149 | end_bb_2
2150 |
2151 | bb_3
2152 | cond = some_computation;
2153 | A[i] = expr;
2154 | goto bb_4
2155 | end_bb_3
2156 |
2157 | bb_4
2158 | goto bb_1
2159 | end_bb_4
2160
2161 predicate_mem_writes is then predicating the memory write as follows:
2162
2163 | bb_0
2164 | i = 0
2165 | end_bb_0
2166 |
2167 | bb_1
2168 | if (i < N) goto bb_5 else goto bb_2
2169 | end_bb_1
2170 |
2171 | bb_2
2172 | if (cond) goto bb_3 else goto bb_4
2173 | end_bb_2
2174 |
2175 | bb_3
2176 | cond = some_computation;
2177 | A[i] = cond ? expr : A[i];
2178 | goto bb_4
2179 | end_bb_3
2180 |
2181 | bb_4
2182 | goto bb_1
2183 | end_bb_4
2184
2185 and finally combine_blocks removes the basic block boundaries making
2186 the loop vectorizable:
2187
2188 | bb_0
2189 | i = 0
2190 | if (i < N) goto bb_5 else goto bb_1
2191 | end_bb_0
2192 |
2193 | bb_1
2194 | cond = some_computation;
2195 | A[i] = cond ? expr : A[i];
2196 | if (i < N) goto bb_5 else goto bb_4
2197 | end_bb_1
2198 |
2199 | bb_4
2200 | goto bb_1
2201 | end_bb_4
2202 */
2203
2204 static void
predicate_mem_writes(loop_p loop)2205 predicate_mem_writes (loop_p loop)
2206 {
2207 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2208 auto_vec<int, 1> vect_sizes;
2209 auto_vec<tree, 1> vect_masks;
2210
2211 for (i = 1; i < orig_loop_num_nodes; i++)
2212 {
2213 gimple_stmt_iterator gsi;
2214 basic_block bb = ifc_bbs[i];
2215 tree cond = bb_predicate (bb);
2216 bool swap;
2217 gimple *stmt;
2218 int index;
2219
2220 if (is_true_predicate (cond))
2221 continue;
2222
2223 swap = false;
2224 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2225 {
2226 swap = true;
2227 cond = TREE_OPERAND (cond, 0);
2228 }
2229
2230 vect_sizes.truncate (0);
2231 vect_masks.truncate (0);
2232
2233 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2234 {
2235 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2236 ;
2237 else if (is_false_predicate (cond)
2238 && gimple_vdef (stmt))
2239 {
2240 unlink_stmt_vdef (stmt);
2241 gsi_remove (&gsi, true);
2242 release_defs (stmt);
2243 continue;
2244 }
2245 else if (gimple_plf (stmt, GF_PLF_2))
2246 {
2247 tree lhs = gimple_assign_lhs (stmt);
2248 tree rhs = gimple_assign_rhs1 (stmt);
2249 tree ref, addr, ptr, mask;
2250 gcall *new_stmt;
2251 gimple_seq stmts = NULL;
2252 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2253 /* We checked before setting GF_PLF_2 that an equivalent
2254 integer mode exists. */
2255 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2256 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2257 mark_addressable (ref);
2258 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2259 true, NULL_TREE, true,
2260 GSI_SAME_STMT);
2261 if (!vect_sizes.is_empty ()
2262 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2263 /* Use created mask. */
2264 mask = vect_masks[index];
2265 else
2266 {
2267 if (COMPARISON_CLASS_P (cond))
2268 mask = gimple_build (&stmts, TREE_CODE (cond),
2269 boolean_type_node,
2270 TREE_OPERAND (cond, 0),
2271 TREE_OPERAND (cond, 1));
2272 else
2273 mask = cond;
2274
2275 if (swap)
2276 {
2277 tree true_val
2278 = constant_boolean_node (true, TREE_TYPE (mask));
2279 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2280 TREE_TYPE (mask), mask, true_val);
2281 }
2282 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2283
2284 /* Save mask and its size for further use. */
2285 vect_sizes.safe_push (bitsize);
2286 vect_masks.safe_push (mask);
2287 }
2288 ptr = build_int_cst (reference_alias_ptr_type (ref),
2289 get_object_alignment (ref));
2290 /* Copy points-to info if possible. */
2291 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2292 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2293 ref);
2294 if (TREE_CODE (lhs) == SSA_NAME)
2295 {
2296 new_stmt
2297 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2298 ptr, mask);
2299 gimple_call_set_lhs (new_stmt, lhs);
2300 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2301 }
2302 else
2303 {
2304 new_stmt
2305 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2306 mask, rhs);
2307 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2308 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2309 SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
2310 }
2311 gimple_call_set_nothrow (new_stmt, true);
2312
2313 gsi_replace (&gsi, new_stmt, true);
2314 }
2315 else if (gimple_vdef (stmt))
2316 {
2317 tree lhs = gimple_assign_lhs (stmt);
2318 tree rhs = gimple_assign_rhs1 (stmt);
2319 tree type = TREE_TYPE (lhs);
2320
2321 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2322 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2323 if (swap)
2324 std::swap (lhs, rhs);
2325 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2326 is_gimple_condexpr, NULL_TREE,
2327 true, GSI_SAME_STMT);
2328 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2329 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2330 update_stmt (stmt);
2331 }
2332 gsi_next (&gsi);
2333 }
2334 }
2335 }
2336
2337 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2338 other than the exit and latch of the LOOP. Also resets the
2339 GIMPLE_DEBUG information. */
2340
2341 static void
remove_conditions_and_labels(loop_p loop)2342 remove_conditions_and_labels (loop_p loop)
2343 {
2344 gimple_stmt_iterator gsi;
2345 unsigned int i;
2346
2347 for (i = 0; i < loop->num_nodes; i++)
2348 {
2349 basic_block bb = ifc_bbs[i];
2350
2351 if (bb_with_exit_edge_p (loop, bb)
2352 || bb == loop->latch)
2353 continue;
2354
2355 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2356 switch (gimple_code (gsi_stmt (gsi)))
2357 {
2358 case GIMPLE_COND:
2359 case GIMPLE_LABEL:
2360 gsi_remove (&gsi, true);
2361 break;
2362
2363 case GIMPLE_DEBUG:
2364 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2365 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2366 {
2367 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2368 update_stmt (gsi_stmt (gsi));
2369 }
2370 gsi_next (&gsi);
2371 break;
2372
2373 default:
2374 gsi_next (&gsi);
2375 }
2376 }
2377 }
2378
2379 /* Combine all the basic blocks from LOOP into one or two super basic
2380 blocks. Replace PHI nodes with conditional modify expressions. */
2381
2382 static void
combine_blocks(struct loop * loop)2383 combine_blocks (struct loop *loop)
2384 {
2385 basic_block bb, exit_bb, merge_target_bb;
2386 unsigned int orig_loop_num_nodes = loop->num_nodes;
2387 unsigned int i;
2388 edge e;
2389 edge_iterator ei;
2390
2391 remove_conditions_and_labels (loop);
2392 insert_gimplified_predicates (loop);
2393 predicate_all_scalar_phis (loop);
2394
2395 if (any_pred_load_store)
2396 predicate_mem_writes (loop);
2397
2398 /* Merge basic blocks: first remove all the edges in the loop,
2399 except for those from the exit block. */
2400 exit_bb = NULL;
2401 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2402 for (i = 0; i < orig_loop_num_nodes; i++)
2403 {
2404 bb = ifc_bbs[i];
2405 predicated[i] = !is_true_predicate (bb_predicate (bb));
2406 free_bb_predicate (bb);
2407 if (bb_with_exit_edge_p (loop, bb))
2408 {
2409 gcc_assert (exit_bb == NULL);
2410 exit_bb = bb;
2411 }
2412 }
2413 gcc_assert (exit_bb != loop->latch);
2414
2415 for (i = 1; i < orig_loop_num_nodes; i++)
2416 {
2417 bb = ifc_bbs[i];
2418
2419 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2420 {
2421 if (e->src == exit_bb)
2422 ei_next (&ei);
2423 else
2424 remove_edge (e);
2425 }
2426 }
2427
2428 if (exit_bb != NULL)
2429 {
2430 if (exit_bb != loop->header)
2431 {
2432 /* Connect this node to loop header. */
2433 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2434 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2435 }
2436
2437 /* Redirect non-exit edges to loop->latch. */
2438 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2439 {
2440 if (!loop_exit_edge_p (loop, e))
2441 redirect_edge_and_branch (e, loop->latch);
2442 }
2443 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2444 }
2445 else
2446 {
2447 /* If the loop does not have an exit, reconnect header and latch. */
2448 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2449 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2450 }
2451
2452 merge_target_bb = loop->header;
2453
2454 /* Get at the virtual def valid for uses starting at the first block
2455 we merge into the header. Without a virtual PHI the loop has the
2456 same virtual use on all stmts. */
2457 gphi *vphi = get_virtual_phi (loop->header);
2458 tree last_vdef = NULL_TREE;
2459 if (vphi)
2460 {
2461 last_vdef = gimple_phi_result (vphi);
2462 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2463 ! gsi_end_p (gsi); gsi_next (&gsi))
2464 if (gimple_vdef (gsi_stmt (gsi)))
2465 last_vdef = gimple_vdef (gsi_stmt (gsi));
2466 }
2467 for (i = 1; i < orig_loop_num_nodes; i++)
2468 {
2469 gimple_stmt_iterator gsi;
2470 gimple_stmt_iterator last;
2471
2472 bb = ifc_bbs[i];
2473
2474 if (bb == exit_bb || bb == loop->latch)
2475 continue;
2476
2477 /* We release virtual PHIs late because we have to propagate them
2478 out using the current VUSE. The def might be the one used
2479 after the loop. */
2480 vphi = get_virtual_phi (bb);
2481 if (vphi)
2482 {
2483 imm_use_iterator iter;
2484 use_operand_p use_p;
2485 gimple *use_stmt;
2486 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2487 {
2488 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2489 SET_USE (use_p, last_vdef);
2490 }
2491 gsi = gsi_for_stmt (vphi);
2492 remove_phi_node (&gsi, true);
2493 }
2494
2495 /* Make stmts member of loop->header and clear range info from all stmts
2496 in BB which is now no longer executed conditional on a predicate we
2497 could have derived it from. */
2498 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2499 {
2500 gimple *stmt = gsi_stmt (gsi);
2501 gimple_set_bb (stmt, merge_target_bb);
2502 /* Update virtual operands. */
2503 if (last_vdef)
2504 {
2505 use_operand_p use_p = ssa_vuse_operand (stmt);
2506 if (use_p
2507 && USE_FROM_PTR (use_p) != last_vdef)
2508 SET_USE (use_p, last_vdef);
2509 if (gimple_vdef (stmt))
2510 last_vdef = gimple_vdef (stmt);
2511 }
2512 if (predicated[i])
2513 {
2514 ssa_op_iter i;
2515 tree op;
2516 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2517 reset_flow_sensitive_info (op);
2518 }
2519 }
2520
2521 /* Update stmt list. */
2522 last = gsi_last_bb (merge_target_bb);
2523 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2524 set_bb_seq (bb, NULL);
2525
2526 delete_basic_block (bb);
2527 }
2528
2529 /* If possible, merge loop header to the block with the exit edge.
2530 This reduces the number of basic blocks to two, to please the
2531 vectorizer that handles only loops with two nodes. */
2532 if (exit_bb
2533 && exit_bb != loop->header)
2534 {
2535 /* We release virtual PHIs late because we have to propagate them
2536 out using the current VUSE. The def might be the one used
2537 after the loop. */
2538 vphi = get_virtual_phi (exit_bb);
2539 if (vphi)
2540 {
2541 imm_use_iterator iter;
2542 use_operand_p use_p;
2543 gimple *use_stmt;
2544 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2545 {
2546 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2547 SET_USE (use_p, last_vdef);
2548 }
2549 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2550 remove_phi_node (&gsi, true);
2551 }
2552
2553 if (can_merge_blocks_p (loop->header, exit_bb))
2554 merge_blocks (loop->header, exit_bb);
2555 }
2556
2557 free (ifc_bbs);
2558 ifc_bbs = NULL;
2559 free (predicated);
2560 }
2561
2562 /* Version LOOP before if-converting it; the original loop
2563 will be if-converted, the new copy of the loop will not,
2564 and the LOOP_VECTORIZED internal call will be guarding which
2565 loop to execute. The vectorizer pass will fold this
2566 internal call into either true or false.
2567
2568 Note that this function intentionally invalidates profile. Both edges
2569 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2570 consistent after the condition is folded in the vectorizer. */
2571
2572 static struct loop *
version_loop_for_if_conversion(struct loop * loop)2573 version_loop_for_if_conversion (struct loop *loop)
2574 {
2575 basic_block cond_bb;
2576 tree cond = make_ssa_name (boolean_type_node);
2577 struct loop *new_loop;
2578 gimple *g;
2579 gimple_stmt_iterator gsi;
2580 unsigned int save_length;
2581
2582 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2583 build_int_cst (integer_type_node, loop->num),
2584 integer_zero_node);
2585 gimple_call_set_lhs (g, cond);
2586
2587 /* Save BB->aux around loop_version as that uses the same field. */
2588 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2589 void **saved_preds = XALLOCAVEC (void *, save_length);
2590 for (unsigned i = 0; i < save_length; i++)
2591 saved_preds[i] = ifc_bbs[i]->aux;
2592
2593 initialize_original_copy_tables ();
2594 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2595 is re-merged in the vectorizer. */
2596 new_loop = loop_version (loop, cond, &cond_bb,
2597 profile_probability::always (),
2598 profile_probability::always (),
2599 profile_probability::always (),
2600 profile_probability::always (), true);
2601 free_original_copy_tables ();
2602
2603 for (unsigned i = 0; i < save_length; i++)
2604 ifc_bbs[i]->aux = saved_preds[i];
2605
2606 if (new_loop == NULL)
2607 return NULL;
2608
2609 new_loop->dont_vectorize = true;
2610 new_loop->force_vectorize = false;
2611 gsi = gsi_last_bb (cond_bb);
2612 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2613 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2614 update_ssa (TODO_update_ssa);
2615 return new_loop;
2616 }
2617
2618 /* Return true when LOOP satisfies the follow conditions that will
2619 allow it to be recognized by the vectorizer for outer-loop
2620 vectorization:
2621 - The loop is not the root node of the loop tree.
2622 - The loop has exactly one inner loop.
2623 - The loop has a single exit.
2624 - The loop header has a single successor, which is the inner
2625 loop header.
2626 - Each of the inner and outer loop latches have a single
2627 predecessor.
2628 - The loop exit block has a single predecessor, which is the
2629 inner loop's exit block. */
2630
2631 static bool
versionable_outer_loop_p(struct loop * loop)2632 versionable_outer_loop_p (struct loop *loop)
2633 {
2634 if (!loop_outer (loop)
2635 || loop->dont_vectorize
2636 || !loop->inner
2637 || loop->inner->next
2638 || !single_exit (loop)
2639 || !single_succ_p (loop->header)
2640 || single_succ (loop->header) != loop->inner->header
2641 || !single_pred_p (loop->latch)
2642 || !single_pred_p (loop->inner->latch))
2643 return false;
2644
2645 basic_block outer_exit = single_pred (loop->latch);
2646 basic_block inner_exit = single_pred (loop->inner->latch);
2647
2648 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2649 return false;
2650
2651 if (dump_file)
2652 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2653
2654 return true;
2655 }
2656
2657 /* Performs splitting of critical edges. Skip splitting and return false
2658 if LOOP will not be converted because:
2659
2660 - LOOP is not well formed.
2661 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2662
2663 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2664
2665 static bool
ifcvt_split_critical_edges(struct loop * loop,bool aggressive_if_conv)2666 ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
2667 {
2668 basic_block *body;
2669 basic_block bb;
2670 unsigned int num = loop->num_nodes;
2671 unsigned int i;
2672 gimple *stmt;
2673 edge e;
2674 edge_iterator ei;
2675 auto_vec<edge> critical_edges;
2676
2677 /* Loop is not well formed. */
2678 if (num <= 2 || loop->inner || !single_exit (loop))
2679 return false;
2680
2681 body = get_loop_body (loop);
2682 for (i = 0; i < num; i++)
2683 {
2684 bb = body[i];
2685 if (!aggressive_if_conv
2686 && phi_nodes (bb)
2687 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2688 {
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file,
2691 "BB %d has complicated PHI with more than %u args.\n",
2692 bb->index, MAX_PHI_ARG_NUM);
2693
2694 free (body);
2695 return false;
2696 }
2697 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2698 continue;
2699
2700 stmt = last_stmt (bb);
2701 /* Skip basic blocks not ending with conditional branch. */
2702 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2703 continue;
2704
2705 FOR_EACH_EDGE (e, ei, bb->succs)
2706 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2707 critical_edges.safe_push (e);
2708 }
2709 free (body);
2710
2711 while (critical_edges.length () > 0)
2712 {
2713 e = critical_edges.pop ();
2714 /* Don't split if bb can be predicated along non-critical edge. */
2715 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2716 split_edge (e);
2717 }
2718
2719 return true;
2720 }
2721
2722 /* Delete redundant statements produced by predication which prevents
2723 loop vectorization. */
2724
2725 static void
ifcvt_local_dce(basic_block bb)2726 ifcvt_local_dce (basic_block bb)
2727 {
2728 gimple *stmt;
2729 gimple *stmt1;
2730 gimple *phi;
2731 gimple_stmt_iterator gsi;
2732 auto_vec<gimple *> worklist;
2733 enum gimple_code code;
2734 use_operand_p use_p;
2735 imm_use_iterator imm_iter;
2736
2737 worklist.create (64);
2738 /* Consider all phi as live statements. */
2739 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2740 {
2741 phi = gsi_stmt (gsi);
2742 gimple_set_plf (phi, GF_PLF_2, true);
2743 worklist.safe_push (phi);
2744 }
2745 /* Consider load/store statements, CALL and COND as live. */
2746 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2747 {
2748 stmt = gsi_stmt (gsi);
2749 if (gimple_store_p (stmt)
2750 || gimple_assign_load_p (stmt)
2751 || is_gimple_debug (stmt))
2752 {
2753 gimple_set_plf (stmt, GF_PLF_2, true);
2754 worklist.safe_push (stmt);
2755 continue;
2756 }
2757 code = gimple_code (stmt);
2758 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2759 {
2760 gimple_set_plf (stmt, GF_PLF_2, true);
2761 worklist.safe_push (stmt);
2762 continue;
2763 }
2764 gimple_set_plf (stmt, GF_PLF_2, false);
2765
2766 if (code == GIMPLE_ASSIGN)
2767 {
2768 tree lhs = gimple_assign_lhs (stmt);
2769 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2770 {
2771 stmt1 = USE_STMT (use_p);
2772 if (gimple_bb (stmt1) != bb)
2773 {
2774 gimple_set_plf (stmt, GF_PLF_2, true);
2775 worklist.safe_push (stmt);
2776 break;
2777 }
2778 }
2779 }
2780 }
2781 /* Propagate liveness through arguments of live stmt. */
2782 while (worklist.length () > 0)
2783 {
2784 ssa_op_iter iter;
2785 use_operand_p use_p;
2786 tree use;
2787
2788 stmt = worklist.pop ();
2789 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2790 {
2791 use = USE_FROM_PTR (use_p);
2792 if (TREE_CODE (use) != SSA_NAME)
2793 continue;
2794 stmt1 = SSA_NAME_DEF_STMT (use);
2795 if (gimple_bb (stmt1) != bb
2796 || gimple_plf (stmt1, GF_PLF_2))
2797 continue;
2798 gimple_set_plf (stmt1, GF_PLF_2, true);
2799 worklist.safe_push (stmt1);
2800 }
2801 }
2802 /* Delete dead statements. */
2803 gsi = gsi_start_bb (bb);
2804 while (!gsi_end_p (gsi))
2805 {
2806 stmt = gsi_stmt (gsi);
2807 if (gimple_plf (stmt, GF_PLF_2))
2808 {
2809 gsi_next (&gsi);
2810 continue;
2811 }
2812 if (dump_file && (dump_flags & TDF_DETAILS))
2813 {
2814 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2815 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2816 }
2817 gsi_remove (&gsi, true);
2818 release_defs (stmt);
2819 }
2820 }
2821
2822 /* If-convert LOOP when it is legal. For the moment this pass has no
2823 profitability analysis. Returns non-zero todo flags when something
2824 changed. */
2825
2826 unsigned int
tree_if_conversion(struct loop * loop)2827 tree_if_conversion (struct loop *loop)
2828 {
2829 unsigned int todo = 0;
2830 bool aggressive_if_conv;
2831 struct loop *rloop;
2832
2833 again:
2834 rloop = NULL;
2835 ifc_bbs = NULL;
2836 any_pred_load_store = false;
2837 any_complicated_phi = false;
2838
2839 /* Apply more aggressive if-conversion when loop or its outer loop were
2840 marked with simd pragma. When that's the case, we try to if-convert
2841 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
2842 aggressive_if_conv = loop->force_vectorize;
2843 if (!aggressive_if_conv)
2844 {
2845 struct loop *outer_loop = loop_outer (loop);
2846 if (outer_loop && outer_loop->force_vectorize)
2847 aggressive_if_conv = true;
2848 }
2849
2850 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
2851 goto cleanup;
2852
2853 if (!if_convertible_loop_p (loop)
2854 || !dbg_cnt (if_conversion_tree))
2855 goto cleanup;
2856
2857 if ((any_pred_load_store || any_complicated_phi)
2858 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2859 || loop->dont_vectorize))
2860 goto cleanup;
2861
2862 /* Since we have no cost model, always version loops unless the user
2863 specified -ftree-loop-if-convert or unless versioning is required.
2864 Either version this loop, or if the pattern is right for outer-loop
2865 vectorization, version the outer loop. In the latter case we will
2866 still if-convert the original inner loop. */
2867 if (any_pred_load_store
2868 || any_complicated_phi
2869 || flag_tree_loop_if_convert != 1)
2870 {
2871 struct loop *vloop
2872 = (versionable_outer_loop_p (loop_outer (loop))
2873 ? loop_outer (loop) : loop);
2874 struct loop *nloop = version_loop_for_if_conversion (vloop);
2875 if (nloop == NULL)
2876 goto cleanup;
2877 if (vloop != loop)
2878 {
2879 /* If versionable_outer_loop_p decided to version the
2880 outer loop, version also the inner loop of the non-vectorized
2881 loop copy. So we transform:
2882 loop1
2883 loop2
2884 into:
2885 if (LOOP_VECTORIZED (1, 3))
2886 {
2887 loop1
2888 loop2
2889 }
2890 else
2891 loop3 (copy of loop1)
2892 if (LOOP_VECTORIZED (4, 5))
2893 loop4 (copy of loop2)
2894 else
2895 loop5 (copy of loop4) */
2896 gcc_assert (nloop->inner && nloop->inner->next == NULL);
2897 rloop = nloop->inner;
2898 }
2899 }
2900
2901 /* Now all statements are if-convertible. Combine all the basic
2902 blocks into one huge basic block doing the if-conversion
2903 on-the-fly. */
2904 combine_blocks (loop);
2905
2906 /* Delete dead predicate computations. */
2907 ifcvt_local_dce (loop->header);
2908
2909 todo |= TODO_cleanup_cfg;
2910
2911 cleanup:
2912 if (ifc_bbs)
2913 {
2914 unsigned int i;
2915
2916 for (i = 0; i < loop->num_nodes; i++)
2917 free_bb_predicate (ifc_bbs[i]);
2918
2919 free (ifc_bbs);
2920 ifc_bbs = NULL;
2921 }
2922 if (rloop != NULL)
2923 {
2924 loop = rloop;
2925 goto again;
2926 }
2927
2928 return todo;
2929 }
2930
2931 /* Tree if-conversion pass management. */
2932
2933 namespace {
2934
2935 const pass_data pass_data_if_conversion =
2936 {
2937 GIMPLE_PASS, /* type */
2938 "ifcvt", /* name */
2939 OPTGROUP_NONE, /* optinfo_flags */
2940 TV_TREE_LOOP_IFCVT, /* tv_id */
2941 ( PROP_cfg | PROP_ssa ), /* properties_required */
2942 0, /* properties_provided */
2943 0, /* properties_destroyed */
2944 0, /* todo_flags_start */
2945 0, /* todo_flags_finish */
2946 };
2947
2948 class pass_if_conversion : public gimple_opt_pass
2949 {
2950 public:
pass_if_conversion(gcc::context * ctxt)2951 pass_if_conversion (gcc::context *ctxt)
2952 : gimple_opt_pass (pass_data_if_conversion, ctxt)
2953 {}
2954
2955 /* opt_pass methods: */
2956 virtual bool gate (function *);
2957 virtual unsigned int execute (function *);
2958
2959 }; // class pass_if_conversion
2960
2961 bool
gate(function * fun)2962 pass_if_conversion::gate (function *fun)
2963 {
2964 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2965 && flag_tree_loop_if_convert != 0)
2966 || flag_tree_loop_if_convert == 1);
2967 }
2968
2969 unsigned int
execute(function * fun)2970 pass_if_conversion::execute (function *fun)
2971 {
2972 struct loop *loop;
2973 unsigned todo = 0;
2974
2975 if (number_of_loops (fun) <= 1)
2976 return 0;
2977
2978 FOR_EACH_LOOP (loop, 0)
2979 if (flag_tree_loop_if_convert == 1
2980 || ((flag_tree_loop_vectorize || loop->force_vectorize)
2981 && !loop->dont_vectorize))
2982 todo |= tree_if_conversion (loop);
2983
2984 if (todo)
2985 {
2986 free_numbers_of_iterations_estimates (fun);
2987 scev_reset ();
2988 }
2989
2990 if (flag_checking)
2991 {
2992 basic_block bb;
2993 FOR_EACH_BB_FN (bb, fun)
2994 gcc_assert (!bb->aux);
2995 }
2996
2997 return todo;
2998 }
2999
3000 } // anon namespace
3001
3002 gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3003 make_pass_if_conversion (gcc::context *ctxt)
3004 {
3005 return new pass_if_conversion (ctxt);
3006 }
3007