1 /* If-conversion for vectorizer. 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. 3 Contributed by Devang Patel <dpatel@apple.com> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* This pass implements a tree level if-conversion of loops. Its 22 initial goal is to help the vectorizer to vectorize loops with 23 conditions. 24 25 A short description of if-conversion: 26 27 o Decide if a loop is if-convertible or not. 28 o Walk all loop basic blocks in breadth first order (BFS order). 29 o Remove conditional statements (at the end of basic block) 30 and propagate condition into destination basic blocks' 31 predicate list. 32 o Replace modify expression with conditional modify expression 33 using current basic block's condition. 34 o Merge all basic blocks 35 o Replace phi nodes with conditional modify expr 36 o Merge all basic blocks into header 37 38 Sample transformation: 39 40 INPUT 41 ----- 42 43 # i_23 = PHI <0(0), i_18(10)>; 44 <L0>:; 45 j_15 = A[i_23]; 46 if (j_15 > 41) goto <L1>; else goto <L17>; 47 48 <L17>:; 49 goto <bb 3> (<L3>); 50 51 <L1>:; 52 53 # iftmp.2_4 = PHI <0(8), 42(2)>; 54 <L3>:; 55 A[i_23] = iftmp.2_4; 56 i_18 = i_23 + 1; 57 if (i_18 <= 15) goto <L19>; else goto <L18>; 58 59 <L19>:; 60 goto <bb 1> (<L0>); 61 62 <L18>:; 63 64 OUTPUT 65 ------ 66 67 # i_23 = PHI <0(0), i_18(10)>; 68 <L0>:; 69 j_15 = A[i_23]; 70 71 <L3>:; 72 iftmp.2_4 = j_15 > 41 ? 42 : 0; 73 A[i_23] = iftmp.2_4; 74 i_18 = i_23 + 1; 75 if (i_18 <= 15) goto <L19>; else goto <L18>; 76 77 <L19>:; 78 goto <bb 1> (<L0>); 79 80 <L18>:; 81 */ 82 83 #include "config.h" 84 #include "system.h" 85 #include "coretypes.h" 86 #include "backend.h" 87 #include "rtl.h" 88 #include "tree.h" 89 #include "gimple.h" 90 #include "cfghooks.h" 91 #include "tree-pass.h" 92 #include "ssa.h" 93 #include "expmed.h" 94 #include "optabs-query.h" 95 #include "gimple-pretty-print.h" 96 #include "alias.h" 97 #include "fold-const.h" 98 #include "stor-layout.h" 99 #include "gimple-fold.h" 100 #include "gimplify.h" 101 #include "gimple-iterator.h" 102 #include "gimplify-me.h" 103 #include "tree-cfg.h" 104 #include "tree-into-ssa.h" 105 #include "tree-ssa.h" 106 #include "cfgloop.h" 107 #include "tree-data-ref.h" 108 #include "tree-scalar-evolution.h" 109 #include "tree-ssa-loop.h" 110 #include "tree-ssa-loop-niter.h" 111 #include "tree-ssa-loop-ivopts.h" 112 #include "tree-ssa-address.h" 113 #include "dbgcnt.h" 114 #include "tree-hash-traits.h" 115 #include "varasm.h" 116 #include "builtins.h" 117 #include "params.h" 118 #include "cfganal.h" 119 120 /* Only handle PHIs with no more arguments unless we are asked to by 121 simd pragma. */ 122 #define MAX_PHI_ARG_NUM \ 123 ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS)) 124 125 /* Indicate if new load/store that needs to be predicated is introduced 126 during if conversion. */ 127 static bool any_pred_load_store; 128 129 /* Indicate if there are any complicated PHIs that need to be handled in 130 if-conversion. Complicated PHI has more than two arguments and can't 131 be degenerated to two arguments PHI. See more information in comment 132 before phi_convertible_by_degenerating_args. */ 133 static bool any_complicated_phi; 134 135 /* Hash for struct innermost_loop_behavior. It depends on the user to 136 free the memory. */ 137 138 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior> 139 { 140 static inline hashval_t hash (const value_type &); 141 static inline bool equal (const value_type &, 142 const compare_type &); 143 }; 144 145 inline hashval_t 146 innermost_loop_behavior_hash::hash (const value_type &e) 147 { 148 hashval_t hash; 149 150 hash = iterative_hash_expr (e->base_address, 0); 151 hash = iterative_hash_expr (e->offset, hash); 152 hash = iterative_hash_expr (e->init, hash); 153 return iterative_hash_expr (e->step, hash); 154 } 155 156 inline bool 157 innermost_loop_behavior_hash::equal (const value_type &e1, 158 const compare_type &e2) 159 { 160 if ((e1->base_address && !e2->base_address) 161 || (!e1->base_address && e2->base_address) 162 || (!e1->offset && e2->offset) 163 || (e1->offset && !e2->offset) 164 || (!e1->init && e2->init) 165 || (e1->init && !e2->init) 166 || (!e1->step && e2->step) 167 || (e1->step && !e2->step)) 168 return false; 169 170 if (e1->base_address && e2->base_address 171 && !operand_equal_p (e1->base_address, e2->base_address, 0)) 172 return false; 173 if (e1->offset && e2->offset 174 && !operand_equal_p (e1->offset, e2->offset, 0)) 175 return false; 176 if (e1->init && e2->init 177 && !operand_equal_p (e1->init, e2->init, 0)) 178 return false; 179 if (e1->step && e2->step 180 && !operand_equal_p (e1->step, e2->step, 0)) 181 return false; 182 183 return true; 184 } 185 186 /* List of basic blocks in if-conversion-suitable order. */ 187 static basic_block *ifc_bbs; 188 189 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */ 190 static hash_map<innermost_loop_behavior_hash, 191 data_reference_p> *innermost_DR_map; 192 193 /* Hash table to store <base reference, DR> pairs. */ 194 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; 195 196 /* Structure used to predicate basic blocks. This is attached to the 197 ->aux field of the BBs in the loop to be if-converted. */ 198 struct bb_predicate { 199 200 /* The condition under which this basic block is executed. */ 201 tree predicate; 202 203 /* PREDICATE is gimplified, and the sequence of statements is 204 recorded here, in order to avoid the duplication of computations 205 that occur in previous conditions. See PR44483. */ 206 gimple_seq predicate_gimplified_stmts; 207 }; 208 209 /* Returns true when the basic block BB has a predicate. */ 210 211 static inline bool 212 bb_has_predicate (basic_block bb) 213 { 214 return bb->aux != NULL; 215 } 216 217 /* Returns the gimplified predicate for basic block BB. */ 218 219 static inline tree 220 bb_predicate (basic_block bb) 221 { 222 return ((struct bb_predicate *) bb->aux)->predicate; 223 } 224 225 /* Sets the gimplified predicate COND for basic block BB. */ 226 227 static inline void 228 set_bb_predicate (basic_block bb, tree cond) 229 { 230 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR 231 && is_gimple_condexpr (TREE_OPERAND (cond, 0))) 232 || is_gimple_condexpr (cond)); 233 ((struct bb_predicate *) bb->aux)->predicate = cond; 234 } 235 236 /* Returns the sequence of statements of the gimplification of the 237 predicate for basic block BB. */ 238 239 static inline gimple_seq 240 bb_predicate_gimplified_stmts (basic_block bb) 241 { 242 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts; 243 } 244 245 /* Sets the sequence of statements STMTS of the gimplification of the 246 predicate for basic block BB. */ 247 248 static inline void 249 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) 250 { 251 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts; 252 } 253 254 /* Adds the sequence of statements STMTS to the sequence of statements 255 of the predicate for basic block BB. */ 256 257 static inline void 258 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) 259 { 260 /* We might have updated some stmts in STMTS via force_gimple_operand 261 calling fold_stmt and that producing multiple stmts. Delink immediate 262 uses so update_ssa after loop versioning doesn't get confused for 263 the not yet inserted predicates. 264 ??? This should go away once we reliably avoid updating stmts 265 not in any BB. */ 266 for (gimple_stmt_iterator gsi = gsi_start (stmts); 267 !gsi_end_p (gsi); gsi_next (&gsi)) 268 { 269 gimple *stmt = gsi_stmt (gsi); 270 delink_stmt_imm_use (stmt); 271 gimple_set_modified (stmt, true); 272 } 273 gimple_seq_add_seq_without_update 274 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts); 275 } 276 277 /* Initializes to TRUE the predicate of basic block BB. */ 278 279 static inline void 280 init_bb_predicate (basic_block bb) 281 { 282 bb->aux = XNEW (struct bb_predicate); 283 set_bb_predicate_gimplified_stmts (bb, NULL); 284 set_bb_predicate (bb, boolean_true_node); 285 } 286 287 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */ 288 289 static inline void 290 release_bb_predicate (basic_block bb) 291 { 292 gimple_seq stmts = bb_predicate_gimplified_stmts (bb); 293 if (stmts) 294 { 295 /* Ensure that these stmts haven't yet been added to a bb. */ 296 if (flag_checking) 297 for (gimple_stmt_iterator i = gsi_start (stmts); 298 !gsi_end_p (i); gsi_next (&i)) 299 gcc_assert (! gimple_bb (gsi_stmt (i))); 300 301 /* Discard them. */ 302 gimple_seq_discard (stmts); 303 set_bb_predicate_gimplified_stmts (bb, NULL); 304 } 305 } 306 307 /* Free the predicate of basic block BB. */ 308 309 static inline void 310 free_bb_predicate (basic_block bb) 311 { 312 if (!bb_has_predicate (bb)) 313 return; 314 315 release_bb_predicate (bb); 316 free (bb->aux); 317 bb->aux = NULL; 318 } 319 320 /* Reinitialize predicate of BB with the true predicate. */ 321 322 static inline void 323 reset_bb_predicate (basic_block bb) 324 { 325 if (!bb_has_predicate (bb)) 326 init_bb_predicate (bb); 327 else 328 { 329 release_bb_predicate (bb); 330 set_bb_predicate (bb, boolean_true_node); 331 } 332 } 333 334 /* Returns a new SSA_NAME of type TYPE that is assigned the value of 335 the expression EXPR. Inserts the statement created for this 336 computation before GSI and leaves the iterator GSI at the same 337 statement. */ 338 339 static tree 340 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi) 341 { 342 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_"); 343 gimple *stmt = gimple_build_assign (new_name, expr); 344 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi))); 345 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 346 return new_name; 347 } 348 349 /* Return true when COND is a false predicate. */ 350 351 static inline bool 352 is_false_predicate (tree cond) 353 { 354 return (cond != NULL_TREE 355 && (cond == boolean_false_node 356 || integer_zerop (cond))); 357 } 358 359 /* Return true when COND is a true predicate. */ 360 361 static inline bool 362 is_true_predicate (tree cond) 363 { 364 return (cond == NULL_TREE 365 || cond == boolean_true_node 366 || integer_onep (cond)); 367 } 368 369 /* Returns true when BB has a predicate that is not trivial: true or 370 NULL_TREE. */ 371 372 static inline bool 373 is_predicated (basic_block bb) 374 { 375 return !is_true_predicate (bb_predicate (bb)); 376 } 377 378 /* Parses the predicate COND and returns its comparison code and 379 operands OP0 and OP1. */ 380 381 static enum tree_code 382 parse_predicate (tree cond, tree *op0, tree *op1) 383 { 384 gimple *s; 385 386 if (TREE_CODE (cond) == SSA_NAME 387 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond))) 388 { 389 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison) 390 { 391 *op0 = gimple_assign_rhs1 (s); 392 *op1 = gimple_assign_rhs2 (s); 393 return gimple_assign_rhs_code (s); 394 } 395 396 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR) 397 { 398 tree op = gimple_assign_rhs1 (s); 399 tree type = TREE_TYPE (op); 400 enum tree_code code = parse_predicate (op, op0, op1); 401 402 return code == ERROR_MARK ? ERROR_MARK 403 : invert_tree_comparison (code, HONOR_NANS (type)); 404 } 405 406 return ERROR_MARK; 407 } 408 409 if (COMPARISON_CLASS_P (cond)) 410 { 411 *op0 = TREE_OPERAND (cond, 0); 412 *op1 = TREE_OPERAND (cond, 1); 413 return TREE_CODE (cond); 414 } 415 416 return ERROR_MARK; 417 } 418 419 /* Returns the fold of predicate C1 OR C2 at location LOC. */ 420 421 static tree 422 fold_or_predicates (location_t loc, tree c1, tree c2) 423 { 424 tree op1a, op1b, op2a, op2b; 425 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b); 426 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b); 427 428 if (code1 != ERROR_MARK && code2 != ERROR_MARK) 429 { 430 tree t = maybe_fold_or_comparisons (code1, op1a, op1b, 431 code2, op2a, op2b); 432 if (t) 433 return t; 434 } 435 436 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2); 437 } 438 439 /* Returns either a COND_EXPR or the folded expression if the folded 440 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR, 441 a constant or a SSA_NAME. */ 442 443 static tree 444 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs) 445 { 446 tree rhs1, lhs1, cond_expr; 447 448 /* If COND is comparison r != 0 and r has boolean type, convert COND 449 to SSA_NAME to accept by vect bool pattern. */ 450 if (TREE_CODE (cond) == NE_EXPR) 451 { 452 tree op0 = TREE_OPERAND (cond, 0); 453 tree op1 = TREE_OPERAND (cond, 1); 454 if (TREE_CODE (op0) == SSA_NAME 455 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE 456 && (integer_zerop (op1))) 457 cond = op0; 458 } 459 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs); 460 461 if (cond_expr == NULL_TREE) 462 return build3 (COND_EXPR, type, cond, rhs, lhs); 463 464 STRIP_USELESS_TYPE_CONVERSION (cond_expr); 465 466 if (is_gimple_val (cond_expr)) 467 return cond_expr; 468 469 if (TREE_CODE (cond_expr) == ABS_EXPR) 470 { 471 rhs1 = TREE_OPERAND (cond_expr, 1); 472 STRIP_USELESS_TYPE_CONVERSION (rhs1); 473 if (is_gimple_val (rhs1)) 474 return build1 (ABS_EXPR, type, rhs1); 475 } 476 477 if (TREE_CODE (cond_expr) == MIN_EXPR 478 || TREE_CODE (cond_expr) == MAX_EXPR) 479 { 480 lhs1 = TREE_OPERAND (cond_expr, 0); 481 STRIP_USELESS_TYPE_CONVERSION (lhs1); 482 rhs1 = TREE_OPERAND (cond_expr, 1); 483 STRIP_USELESS_TYPE_CONVERSION (rhs1); 484 if (is_gimple_val (rhs1) && is_gimple_val (lhs1)) 485 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1); 486 } 487 return build3 (COND_EXPR, type, cond, rhs, lhs); 488 } 489 490 /* Add condition NC to the predicate list of basic block BB. LOOP is 491 the loop to be if-converted. Use predicate of cd-equivalent block 492 for join bb if it exists: we call basic blocks bb1 and bb2 493 cd-equivalent if they are executed under the same condition. */ 494 495 static inline void 496 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc) 497 { 498 tree bc, *tp; 499 basic_block dom_bb; 500 501 if (is_true_predicate (nc)) 502 return; 503 504 /* If dominance tells us this basic block is always executed, 505 don't record any predicates for it. */ 506 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 507 return; 508 509 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); 510 /* We use notion of cd equivalence to get simpler predicate for 511 join block, e.g. if join block has 2 predecessors with predicates 512 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of 513 p1 & p2 | p1 & !p2. */ 514 if (dom_bb != loop->header 515 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb) 516 { 517 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb)); 518 bc = bb_predicate (dom_bb); 519 if (!is_true_predicate (bc)) 520 set_bb_predicate (bb, bc); 521 else 522 gcc_assert (is_true_predicate (bb_predicate (bb))); 523 if (dump_file && (dump_flags & TDF_DETAILS)) 524 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n", 525 dom_bb->index, bb->index); 526 return; 527 } 528 529 if (!is_predicated (bb)) 530 bc = nc; 531 else 532 { 533 bc = bb_predicate (bb); 534 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc); 535 if (is_true_predicate (bc)) 536 { 537 reset_bb_predicate (bb); 538 return; 539 } 540 } 541 542 /* Allow a TRUTH_NOT_EXPR around the main predicate. */ 543 if (TREE_CODE (bc) == TRUTH_NOT_EXPR) 544 tp = &TREE_OPERAND (bc, 0); 545 else 546 tp = &bc; 547 if (!is_gimple_condexpr (*tp)) 548 { 549 gimple_seq stmts; 550 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE); 551 add_bb_predicate_gimplified_stmts (bb, stmts); 552 } 553 set_bb_predicate (bb, bc); 554 } 555 556 /* Add the condition COND to the previous condition PREV_COND, and add 557 this to the predicate list of the destination of edge E. LOOP is 558 the loop to be if-converted. */ 559 560 static void 561 add_to_dst_predicate_list (struct loop *loop, edge e, 562 tree prev_cond, tree cond) 563 { 564 if (!flow_bb_inside_loop_p (loop, e->dest)) 565 return; 566 567 if (!is_true_predicate (prev_cond)) 568 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 569 prev_cond, cond); 570 571 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest)) 572 add_to_predicate_list (loop, e->dest, cond); 573 } 574 575 /* Return true if one of the successor edges of BB exits LOOP. */ 576 577 static bool 578 bb_with_exit_edge_p (struct loop *loop, basic_block bb) 579 { 580 edge e; 581 edge_iterator ei; 582 583 FOR_EACH_EDGE (e, ei, bb->succs) 584 if (loop_exit_edge_p (loop, e)) 585 return true; 586 587 return false; 588 } 589 590 /* Given PHI which has more than two arguments, this function checks if 591 it's if-convertible by degenerating its arguments. Specifically, if 592 below two conditions are satisfied: 593 594 1) Number of PHI arguments with different values equals to 2 and one 595 argument has the only occurrence. 596 2) The edge corresponding to the unique argument isn't critical edge. 597 598 Such PHI can be handled as PHIs have only two arguments. For example, 599 below PHI: 600 601 res = PHI <A_1(e1), A_1(e2), A_2(e3)>; 602 603 can be transformed into: 604 605 res = (predicate of e3) ? A_2 : A_1; 606 607 Return TRUE if it is the case, FALSE otherwise. */ 608 609 static bool 610 phi_convertible_by_degenerating_args (gphi *phi) 611 { 612 edge e; 613 tree arg, t1 = NULL, t2 = NULL; 614 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0; 615 unsigned int num_args = gimple_phi_num_args (phi); 616 617 gcc_assert (num_args > 2); 618 619 for (i = 0; i < num_args; i++) 620 { 621 arg = gimple_phi_arg_def (phi, i); 622 if (t1 == NULL || operand_equal_p (t1, arg, 0)) 623 { 624 n1++; 625 i1 = i; 626 t1 = arg; 627 } 628 else if (t2 == NULL || operand_equal_p (t2, arg, 0)) 629 { 630 n2++; 631 i2 = i; 632 t2 = arg; 633 } 634 else 635 return false; 636 } 637 638 if (n1 != 1 && n2 != 1) 639 return false; 640 641 /* Check if the edge corresponding to the unique arg is critical. */ 642 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2); 643 if (EDGE_COUNT (e->src->succs) > 1) 644 return false; 645 646 return true; 647 } 648 649 /* Return true when PHI is if-convertible. PHI is part of loop LOOP 650 and it belongs to basic block BB. Note at this point, it is sure 651 that PHI is if-convertible. This function updates global variable 652 ANY_COMPLICATED_PHI if PHI is complicated. */ 653 654 static bool 655 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi) 656 { 657 if (dump_file && (dump_flags & TDF_DETAILS)) 658 { 659 fprintf (dump_file, "-------------------------\n"); 660 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); 661 } 662 663 if (bb != loop->header 664 && gimple_phi_num_args (phi) > 2 665 && !phi_convertible_by_degenerating_args (phi)) 666 any_complicated_phi = true; 667 668 return true; 669 } 670 671 /* Records the status of a data reference. This struct is attached to 672 each DR->aux field. */ 673 674 struct ifc_dr { 675 bool rw_unconditionally; 676 bool w_unconditionally; 677 bool written_at_least_once; 678 679 tree rw_predicate; 680 tree w_predicate; 681 tree base_w_predicate; 682 }; 683 684 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) 685 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once) 686 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) 687 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally) 688 689 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in 690 HASH tables. While storing them in HASH table, it checks if the 691 reference is unconditionally read or written and stores that as a flag 692 information. For base reference it checks if it is written atlest once 693 unconditionally and stores it as flag information along with DR. 694 In other words for every data reference A in STMT there exist other 695 accesses to a data reference with the same base with predicates that 696 add up (OR-up) to the true predicate: this ensures that the data 697 reference A is touched (read or written) on every iteration of the 698 if-converted loop. */ 699 static void 700 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) 701 { 702 703 data_reference_p *master_dr, *base_master_dr; 704 tree base_ref = DR_BASE_OBJECT (a); 705 innermost_loop_behavior *innermost = &DR_INNERMOST (a); 706 tree ca = bb_predicate (gimple_bb (DR_STMT (a))); 707 bool exist1, exist2; 708 709 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1); 710 if (!exist1) 711 *master_dr = a; 712 713 if (DR_IS_WRITE (a)) 714 { 715 IFC_DR (*master_dr)->w_predicate 716 = fold_or_predicates (UNKNOWN_LOCATION, ca, 717 IFC_DR (*master_dr)->w_predicate); 718 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate)) 719 DR_W_UNCONDITIONALLY (*master_dr) = true; 720 } 721 IFC_DR (*master_dr)->rw_predicate 722 = fold_or_predicates (UNKNOWN_LOCATION, ca, 723 IFC_DR (*master_dr)->rw_predicate); 724 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate)) 725 DR_RW_UNCONDITIONALLY (*master_dr) = true; 726 727 if (DR_IS_WRITE (a)) 728 { 729 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2); 730 if (!exist2) 731 *base_master_dr = a; 732 IFC_DR (*base_master_dr)->base_w_predicate 733 = fold_or_predicates (UNKNOWN_LOCATION, ca, 734 IFC_DR (*base_master_dr)->base_w_predicate); 735 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate)) 736 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true; 737 } 738 } 739 740 /* Return TRUE if can prove the index IDX of an array reference REF is 741 within array bound. Return false otherwise. */ 742 743 static bool 744 idx_within_array_bound (tree ref, tree *idx, void *dta) 745 { 746 bool overflow; 747 widest_int niter, valid_niter, delta, wi_step; 748 tree ev, init, step; 749 tree low, high; 750 struct loop *loop = (struct loop*) dta; 751 752 /* Only support within-bound access for array references. */ 753 if (TREE_CODE (ref) != ARRAY_REF) 754 return false; 755 756 /* For arrays at the end of the structure, we are not guaranteed that they 757 do not really extend over their declared size. However, for arrays of 758 size greater than one, this is unlikely to be intended. */ 759 if (array_at_struct_end_p (ref)) 760 return false; 761 762 ev = analyze_scalar_evolution (loop, *idx); 763 ev = instantiate_parameters (loop, ev); 764 init = initial_condition (ev); 765 step = evolution_part_in_loop_num (ev, loop->num); 766 767 if (!init || TREE_CODE (init) != INTEGER_CST 768 || (step && TREE_CODE (step) != INTEGER_CST)) 769 return false; 770 771 low = array_ref_low_bound (ref); 772 high = array_ref_up_bound (ref); 773 774 /* The case of nonconstant bounds could be handled, but it would be 775 complicated. */ 776 if (TREE_CODE (low) != INTEGER_CST 777 || !high || TREE_CODE (high) != INTEGER_CST) 778 return false; 779 780 /* Check if the intial idx is within bound. */ 781 if (wi::to_widest (init) < wi::to_widest (low) 782 || wi::to_widest (init) > wi::to_widest (high)) 783 return false; 784 785 /* The idx is always within bound. */ 786 if (!step || integer_zerop (step)) 787 return true; 788 789 if (!max_loop_iterations (loop, &niter)) 790 return false; 791 792 if (wi::to_widest (step) < 0) 793 { 794 delta = wi::to_widest (init) - wi::to_widest (low); 795 wi_step = -wi::to_widest (step); 796 } 797 else 798 { 799 delta = wi::to_widest (high) - wi::to_widest (init); 800 wi_step = wi::to_widest (step); 801 } 802 803 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow); 804 /* The iteration space of idx is within array bound. */ 805 if (!overflow && niter <= valid_niter) 806 return true; 807 808 return false; 809 } 810 811 /* Return TRUE if ref is a within bound array reference. */ 812 813 static bool 814 ref_within_array_bound (gimple *stmt, tree ref) 815 { 816 struct loop *loop = loop_containing_stmt (stmt); 817 818 gcc_assert (loop != NULL); 819 return for_each_index (&ref, idx_within_array_bound, loop); 820 } 821 822 823 /* Given a memory reference expression T, return TRUE if base object 824 it refers to is writable. The base object of a memory reference 825 is the main object being referenced, which is returned by function 826 get_base_address. */ 827 828 static bool 829 base_object_writable (tree ref) 830 { 831 tree base_tree = get_base_address (ref); 832 833 return (base_tree 834 && DECL_P (base_tree) 835 && decl_binds_to_current_def_p (base_tree) 836 && !TREE_READONLY (base_tree)); 837 } 838 839 /* Return true when the memory references of STMT won't trap in the 840 if-converted code. There are two things that we have to check for: 841 842 - writes to memory occur to writable memory: if-conversion of 843 memory writes transforms the conditional memory writes into 844 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed 845 into "A[i] = cond ? foo : A[i]", and as the write to memory may not 846 be executed at all in the original code, it may be a readonly 847 memory. To check that A is not const-qualified, we check that 848 there exists at least an unconditional write to A in the current 849 function. 850 851 - reads or writes to memory are valid memory accesses for every 852 iteration. To check that the memory accesses are correctly formed 853 and that we are allowed to read and write in these locations, we 854 check that the memory accesses to be if-converted occur at every 855 iteration unconditionally. 856 857 Returns true for the memory reference in STMT, same memory reference 858 is read or written unconditionally atleast once and the base memory 859 reference is written unconditionally once. This is to check reference 860 will not write fault. Also retuns true if the memory reference is 861 unconditionally read once then we are conditionally writing to memory 862 which is defined as read and write and is bound to the definition 863 we are seeing. */ 864 static bool 865 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs) 866 { 867 /* If DR didn't see a reference here we can't use it to tell 868 whether the ref traps or not. */ 869 if (gimple_uid (stmt) == 0) 870 return false; 871 872 data_reference_p *master_dr, *base_master_dr; 873 data_reference_p a = drs[gimple_uid (stmt) - 1]; 874 875 tree base = DR_BASE_OBJECT (a); 876 innermost_loop_behavior *innermost = &DR_INNERMOST (a); 877 878 gcc_assert (DR_STMT (a) == stmt); 879 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a) 880 || DR_INIT (a) || DR_STEP (a)); 881 882 master_dr = innermost_DR_map->get (innermost); 883 gcc_assert (master_dr != NULL); 884 885 base_master_dr = baseref_DR_map->get (base); 886 887 /* If a is unconditionally written to it doesn't trap. */ 888 if (DR_W_UNCONDITIONALLY (*master_dr)) 889 return true; 890 891 /* If a is unconditionally accessed then ... 892 893 Even a is conditional access, we can treat it as an unconditional 894 one if it's an array reference and all its index are within array 895 bound. */ 896 if (DR_RW_UNCONDITIONALLY (*master_dr) 897 || ref_within_array_bound (stmt, DR_REF (a))) 898 { 899 /* an unconditional read won't trap. */ 900 if (DR_IS_READ (a)) 901 return true; 902 903 /* an unconditionaly write won't trap if the base is written 904 to unconditionally. */ 905 if (base_master_dr 906 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) 907 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); 908 /* or the base is known to be not readonly. */ 909 else if (base_object_writable (DR_REF (a))) 910 return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); 911 } 912 913 return false; 914 } 915 916 /* Return true if STMT could be converted into a masked load or store 917 (conditional load or store based on a mask computed from bb predicate). */ 918 919 static bool 920 ifcvt_can_use_mask_load_store (gimple *stmt) 921 { 922 tree lhs, ref; 923 machine_mode mode; 924 basic_block bb = gimple_bb (stmt); 925 bool is_load; 926 927 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize) 928 || bb->loop_father->dont_vectorize 929 || !gimple_assign_single_p (stmt) 930 || gimple_has_volatile_ops (stmt)) 931 return false; 932 933 /* Check whether this is a load or store. */ 934 lhs = gimple_assign_lhs (stmt); 935 if (gimple_store_p (stmt)) 936 { 937 if (!is_gimple_val (gimple_assign_rhs1 (stmt))) 938 return false; 939 is_load = false; 940 ref = lhs; 941 } 942 else if (gimple_assign_load_p (stmt)) 943 { 944 is_load = true; 945 ref = gimple_assign_rhs1 (stmt); 946 } 947 else 948 return false; 949 950 if (may_be_nonaddressable_p (ref)) 951 return false; 952 953 /* Mask should be integer mode of the same size as the load/store 954 mode. */ 955 mode = TYPE_MODE (TREE_TYPE (lhs)); 956 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode)) 957 return false; 958 959 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load)) 960 return true; 961 962 return false; 963 } 964 965 /* Return true when STMT is if-convertible. 966 967 GIMPLE_ASSIGN statement is not if-convertible if, 968 - it is not movable, 969 - it could trap, 970 - LHS is not var decl. */ 971 972 static bool 973 if_convertible_gimple_assign_stmt_p (gimple *stmt, 974 vec<data_reference_p> refs) 975 { 976 tree lhs = gimple_assign_lhs (stmt); 977 978 if (dump_file && (dump_flags & TDF_DETAILS)) 979 { 980 fprintf (dump_file, "-------------------------\n"); 981 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 982 } 983 984 if (!is_gimple_reg_type (TREE_TYPE (lhs))) 985 return false; 986 987 /* Some of these constrains might be too conservative. */ 988 if (stmt_ends_bb_p (stmt) 989 || gimple_has_volatile_ops (stmt) 990 || (TREE_CODE (lhs) == SSA_NAME 991 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) 992 || gimple_has_side_effects (stmt)) 993 { 994 if (dump_file && (dump_flags & TDF_DETAILS)) 995 fprintf (dump_file, "stmt not suitable for ifcvt\n"); 996 return false; 997 } 998 999 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because 1000 in between if_convertible_loop_p and combine_blocks 1001 we can perform loop versioning. */ 1002 gimple_set_plf (stmt, GF_PLF_2, false); 1003 1004 if ((! gimple_vuse (stmt) 1005 || gimple_could_trap_p_1 (stmt, false, false) 1006 || ! ifcvt_memrefs_wont_trap (stmt, refs)) 1007 && gimple_could_trap_p (stmt)) 1008 { 1009 if (ifcvt_can_use_mask_load_store (stmt)) 1010 { 1011 gimple_set_plf (stmt, GF_PLF_2, true); 1012 any_pred_load_store = true; 1013 return true; 1014 } 1015 if (dump_file && (dump_flags & TDF_DETAILS)) 1016 fprintf (dump_file, "tree could trap...\n"); 1017 return false; 1018 } 1019 1020 /* When if-converting stores force versioning, likewise if we 1021 ended up generating store data races. */ 1022 if (gimple_vdef (stmt)) 1023 any_pred_load_store = true; 1024 1025 return true; 1026 } 1027 1028 /* Return true when STMT is if-convertible. 1029 1030 A statement is if-convertible if: 1031 - it is an if-convertible GIMPLE_ASSIGN, 1032 - it is a GIMPLE_LABEL or a GIMPLE_COND, 1033 - it is builtins call. */ 1034 1035 static bool 1036 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) 1037 { 1038 switch (gimple_code (stmt)) 1039 { 1040 case GIMPLE_LABEL: 1041 case GIMPLE_DEBUG: 1042 case GIMPLE_COND: 1043 return true; 1044 1045 case GIMPLE_ASSIGN: 1046 return if_convertible_gimple_assign_stmt_p (stmt, refs); 1047 1048 case GIMPLE_CALL: 1049 { 1050 tree fndecl = gimple_call_fndecl (stmt); 1051 if (fndecl) 1052 { 1053 int flags = gimple_call_flags (stmt); 1054 if ((flags & ECF_CONST) 1055 && !(flags & ECF_LOOPING_CONST_OR_PURE) 1056 /* We can only vectorize some builtins at the moment, 1057 so restrict if-conversion to those. */ 1058 && DECL_BUILT_IN (fndecl)) 1059 return true; 1060 } 1061 return false; 1062 } 1063 1064 default: 1065 /* Don't know what to do with 'em so don't do anything. */ 1066 if (dump_file && (dump_flags & TDF_DETAILS)) 1067 { 1068 fprintf (dump_file, "don't know what to do\n"); 1069 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 1070 } 1071 return false; 1072 } 1073 1074 return true; 1075 } 1076 1077 /* Assumes that BB has more than 1 predecessors. 1078 Returns false if at least one successor is not on critical edge 1079 and true otherwise. */ 1080 1081 static inline bool 1082 all_preds_critical_p (basic_block bb) 1083 { 1084 edge e; 1085 edge_iterator ei; 1086 1087 FOR_EACH_EDGE (e, ei, bb->preds) 1088 if (EDGE_COUNT (e->src->succs) == 1) 1089 return false; 1090 return true; 1091 } 1092 1093 /* Returns true if at least one successor in on critical edge. */ 1094 static inline bool 1095 has_pred_critical_p (basic_block bb) 1096 { 1097 edge e; 1098 edge_iterator ei; 1099 1100 FOR_EACH_EDGE (e, ei, bb->preds) 1101 if (EDGE_COUNT (e->src->succs) > 1) 1102 return true; 1103 return false; 1104 } 1105 1106 /* Return true when BB is if-convertible. This routine does not check 1107 basic block's statements and phis. 1108 1109 A basic block is not if-convertible if: 1110 - it is non-empty and it is after the exit block (in BFS order), 1111 - it is after the exit block but before the latch, 1112 - its edges are not normal. 1113 1114 EXIT_BB is the basic block containing the exit of the LOOP. BB is 1115 inside LOOP. */ 1116 1117 static bool 1118 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb) 1119 { 1120 edge e; 1121 edge_iterator ei; 1122 1123 if (dump_file && (dump_flags & TDF_DETAILS)) 1124 fprintf (dump_file, "----------[%d]-------------\n", bb->index); 1125 1126 if (EDGE_COUNT (bb->succs) > 2) 1127 return false; 1128 1129 if (exit_bb) 1130 { 1131 if (bb != loop->latch) 1132 { 1133 if (dump_file && (dump_flags & TDF_DETAILS)) 1134 fprintf (dump_file, "basic block after exit bb but before latch\n"); 1135 return false; 1136 } 1137 else if (!empty_block_p (bb)) 1138 { 1139 if (dump_file && (dump_flags & TDF_DETAILS)) 1140 fprintf (dump_file, "non empty basic block after exit bb\n"); 1141 return false; 1142 } 1143 else if (bb == loop->latch 1144 && bb != exit_bb 1145 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb)) 1146 { 1147 if (dump_file && (dump_flags & TDF_DETAILS)) 1148 fprintf (dump_file, "latch is not dominated by exit_block\n"); 1149 return false; 1150 } 1151 } 1152 1153 /* Be less adventurous and handle only normal edges. */ 1154 FOR_EACH_EDGE (e, ei, bb->succs) 1155 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP)) 1156 { 1157 if (dump_file && (dump_flags & TDF_DETAILS)) 1158 fprintf (dump_file, "Difficult to handle edges\n"); 1159 return false; 1160 } 1161 1162 return true; 1163 } 1164 1165 /* Return true when all predecessor blocks of BB are visited. The 1166 VISITED bitmap keeps track of the visited blocks. */ 1167 1168 static bool 1169 pred_blocks_visited_p (basic_block bb, bitmap *visited) 1170 { 1171 edge e; 1172 edge_iterator ei; 1173 FOR_EACH_EDGE (e, ei, bb->preds) 1174 if (!bitmap_bit_p (*visited, e->src->index)) 1175 return false; 1176 1177 return true; 1178 } 1179 1180 /* Get body of a LOOP in suitable order for if-conversion. It is 1181 caller's responsibility to deallocate basic block list. 1182 If-conversion suitable order is, breadth first sort (BFS) order 1183 with an additional constraint: select a block only if all its 1184 predecessors are already selected. */ 1185 1186 static basic_block * 1187 get_loop_body_in_if_conv_order (const struct loop *loop) 1188 { 1189 basic_block *blocks, *blocks_in_bfs_order; 1190 basic_block bb; 1191 bitmap visited; 1192 unsigned int index = 0; 1193 unsigned int visited_count = 0; 1194 1195 gcc_assert (loop->num_nodes); 1196 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)); 1197 1198 blocks = XCNEWVEC (basic_block, loop->num_nodes); 1199 visited = BITMAP_ALLOC (NULL); 1200 1201 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop); 1202 1203 index = 0; 1204 while (index < loop->num_nodes) 1205 { 1206 bb = blocks_in_bfs_order [index]; 1207 1208 if (bb->flags & BB_IRREDUCIBLE_LOOP) 1209 { 1210 free (blocks_in_bfs_order); 1211 BITMAP_FREE (visited); 1212 free (blocks); 1213 return NULL; 1214 } 1215 1216 if (!bitmap_bit_p (visited, bb->index)) 1217 { 1218 if (pred_blocks_visited_p (bb, &visited) 1219 || bb == loop->header) 1220 { 1221 /* This block is now visited. */ 1222 bitmap_set_bit (visited, bb->index); 1223 blocks[visited_count++] = bb; 1224 } 1225 } 1226 1227 index++; 1228 1229 if (index == loop->num_nodes 1230 && visited_count != loop->num_nodes) 1231 /* Not done yet. */ 1232 index = 0; 1233 } 1234 free (blocks_in_bfs_order); 1235 BITMAP_FREE (visited); 1236 return blocks; 1237 } 1238 1239 /* Returns true when the analysis of the predicates for all the basic 1240 blocks in LOOP succeeded. 1241 1242 predicate_bbs first allocates the predicates of the basic blocks. 1243 These fields are then initialized with the tree expressions 1244 representing the predicates under which a basic block is executed 1245 in the LOOP. As the loop->header is executed at each iteration, it 1246 has the "true" predicate. Other statements executed under a 1247 condition are predicated with that condition, for example 1248 1249 | if (x) 1250 | S1; 1251 | else 1252 | S2; 1253 1254 S1 will be predicated with "x", and 1255 S2 will be predicated with "!x". */ 1256 1257 static void 1258 predicate_bbs (loop_p loop) 1259 { 1260 unsigned int i; 1261 1262 for (i = 0; i < loop->num_nodes; i++) 1263 init_bb_predicate (ifc_bbs[i]); 1264 1265 for (i = 0; i < loop->num_nodes; i++) 1266 { 1267 basic_block bb = ifc_bbs[i]; 1268 tree cond; 1269 gimple *stmt; 1270 1271 /* The loop latch and loop exit block are always executed and 1272 have no extra conditions to be processed: skip them. */ 1273 if (bb == loop->latch 1274 || bb_with_exit_edge_p (loop, bb)) 1275 { 1276 reset_bb_predicate (bb); 1277 continue; 1278 } 1279 1280 cond = bb_predicate (bb); 1281 stmt = last_stmt (bb); 1282 if (stmt && gimple_code (stmt) == GIMPLE_COND) 1283 { 1284 tree c2; 1285 edge true_edge, false_edge; 1286 location_t loc = gimple_location (stmt); 1287 tree c = build2_loc (loc, gimple_cond_code (stmt), 1288 boolean_type_node, 1289 gimple_cond_lhs (stmt), 1290 gimple_cond_rhs (stmt)); 1291 1292 /* Add new condition into destination's predicate list. */ 1293 extract_true_false_edges_from_block (gimple_bb (stmt), 1294 &true_edge, &false_edge); 1295 1296 /* If C is true, then TRUE_EDGE is taken. */ 1297 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond), 1298 unshare_expr (c)); 1299 1300 /* If C is false, then FALSE_EDGE is taken. */ 1301 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, 1302 unshare_expr (c)); 1303 add_to_dst_predicate_list (loop, false_edge, 1304 unshare_expr (cond), c2); 1305 1306 cond = NULL_TREE; 1307 } 1308 1309 /* If current bb has only one successor, then consider it as an 1310 unconditional goto. */ 1311 if (single_succ_p (bb)) 1312 { 1313 basic_block bb_n = single_succ (bb); 1314 1315 /* The successor bb inherits the predicate of its 1316 predecessor. If there is no predicate in the predecessor 1317 bb, then consider the successor bb as always executed. */ 1318 if (cond == NULL_TREE) 1319 cond = boolean_true_node; 1320 1321 add_to_predicate_list (loop, bb_n, cond); 1322 } 1323 } 1324 1325 /* The loop header is always executed. */ 1326 reset_bb_predicate (loop->header); 1327 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL 1328 && bb_predicate_gimplified_stmts (loop->latch) == NULL); 1329 } 1330 1331 /* Build region by adding loop pre-header and post-header blocks. */ 1332 1333 static vec<basic_block> 1334 build_region (struct loop *loop) 1335 { 1336 vec<basic_block> region = vNULL; 1337 basic_block exit_bb = NULL; 1338 1339 gcc_assert (ifc_bbs); 1340 /* The first element is loop pre-header. */ 1341 region.safe_push (loop_preheader_edge (loop)->src); 1342 1343 for (unsigned int i = 0; i < loop->num_nodes; i++) 1344 { 1345 basic_block bb = ifc_bbs[i]; 1346 region.safe_push (bb); 1347 /* Find loop postheader. */ 1348 edge e; 1349 edge_iterator ei; 1350 FOR_EACH_EDGE (e, ei, bb->succs) 1351 if (loop_exit_edge_p (loop, e)) 1352 { 1353 exit_bb = e->dest; 1354 break; 1355 } 1356 } 1357 /* The last element is loop post-header. */ 1358 gcc_assert (exit_bb); 1359 region.safe_push (exit_bb); 1360 return region; 1361 } 1362 1363 /* Return true when LOOP is if-convertible. This is a helper function 1364 for if_convertible_loop_p. REFS and DDRS are initialized and freed 1365 in if_convertible_loop_p. */ 1366 1367 static bool 1368 if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs) 1369 { 1370 unsigned int i; 1371 basic_block exit_bb = NULL; 1372 vec<basic_block> region; 1373 1374 if (find_data_references_in_loop (loop, refs) == chrec_dont_know) 1375 return false; 1376 1377 calculate_dominance_info (CDI_DOMINATORS); 1378 1379 /* Allow statements that can be handled during if-conversion. */ 1380 ifc_bbs = get_loop_body_in_if_conv_order (loop); 1381 if (!ifc_bbs) 1382 { 1383 if (dump_file && (dump_flags & TDF_DETAILS)) 1384 fprintf (dump_file, "Irreducible loop\n"); 1385 return false; 1386 } 1387 1388 for (i = 0; i < loop->num_nodes; i++) 1389 { 1390 basic_block bb = ifc_bbs[i]; 1391 1392 if (!if_convertible_bb_p (loop, bb, exit_bb)) 1393 return false; 1394 1395 if (bb_with_exit_edge_p (loop, bb)) 1396 exit_bb = bb; 1397 } 1398 1399 for (i = 0; i < loop->num_nodes; i++) 1400 { 1401 basic_block bb = ifc_bbs[i]; 1402 gimple_stmt_iterator gsi; 1403 1404 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1405 switch (gimple_code (gsi_stmt (gsi))) 1406 { 1407 case GIMPLE_LABEL: 1408 case GIMPLE_ASSIGN: 1409 case GIMPLE_CALL: 1410 case GIMPLE_DEBUG: 1411 case GIMPLE_COND: 1412 gimple_set_uid (gsi_stmt (gsi), 0); 1413 break; 1414 default: 1415 return false; 1416 } 1417 } 1418 1419 data_reference_p dr; 1420 1421 innermost_DR_map 1422 = new hash_map<innermost_loop_behavior_hash, data_reference_p>; 1423 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; 1424 1425 /* Compute post-dominator tree locally. */ 1426 region = build_region (loop); 1427 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region); 1428 1429 predicate_bbs (loop); 1430 1431 /* Free post-dominator tree since it is not used after predication. */ 1432 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region); 1433 region.release (); 1434 1435 for (i = 0; refs->iterate (i, &dr); i++) 1436 { 1437 tree ref = DR_REF (dr); 1438 1439 dr->aux = XNEW (struct ifc_dr); 1440 DR_BASE_W_UNCONDITIONALLY (dr) = false; 1441 DR_RW_UNCONDITIONALLY (dr) = false; 1442 DR_W_UNCONDITIONALLY (dr) = false; 1443 IFC_DR (dr)->rw_predicate = boolean_false_node; 1444 IFC_DR (dr)->w_predicate = boolean_false_node; 1445 IFC_DR (dr)->base_w_predicate = boolean_false_node; 1446 if (gimple_uid (DR_STMT (dr)) == 0) 1447 gimple_set_uid (DR_STMT (dr), i + 1); 1448 1449 /* If DR doesn't have innermost loop behavior or it's a compound 1450 memory reference, we synthesize its innermost loop behavior 1451 for hashing. */ 1452 if (TREE_CODE (ref) == COMPONENT_REF 1453 || TREE_CODE (ref) == IMAGPART_EXPR 1454 || TREE_CODE (ref) == REALPART_EXPR 1455 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr) 1456 || DR_INIT (dr) || DR_STEP (dr))) 1457 { 1458 while (TREE_CODE (ref) == COMPONENT_REF 1459 || TREE_CODE (ref) == IMAGPART_EXPR 1460 || TREE_CODE (ref) == REALPART_EXPR) 1461 ref = TREE_OPERAND (ref, 0); 1462 1463 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr))); 1464 DR_BASE_ADDRESS (dr) = ref; 1465 } 1466 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr); 1467 } 1468 1469 for (i = 0; i < loop->num_nodes; i++) 1470 { 1471 basic_block bb = ifc_bbs[i]; 1472 gimple_stmt_iterator itr; 1473 1474 /* Check the if-convertibility of statements in predicated BBs. */ 1475 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 1476 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) 1477 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs)) 1478 return false; 1479 } 1480 1481 /* Checking PHIs needs to be done after stmts, as the fact whether there 1482 are any masked loads or stores affects the tests. */ 1483 for (i = 0; i < loop->num_nodes; i++) 1484 { 1485 basic_block bb = ifc_bbs[i]; 1486 gphi_iterator itr; 1487 1488 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr)) 1489 if (!if_convertible_phi_p (loop, bb, itr.phi ())) 1490 return false; 1491 } 1492 1493 if (dump_file) 1494 fprintf (dump_file, "Applying if-conversion\n"); 1495 1496 return true; 1497 } 1498 1499 /* Return true when LOOP is if-convertible. 1500 LOOP is if-convertible if: 1501 - it is innermost, 1502 - it has two or more basic blocks, 1503 - it has only one exit, 1504 - loop header is not the exit edge, 1505 - if its basic blocks and phi nodes are if convertible. */ 1506 1507 static bool 1508 if_convertible_loop_p (struct loop *loop) 1509 { 1510 edge e; 1511 edge_iterator ei; 1512 bool res = false; 1513 vec<data_reference_p> refs; 1514 1515 /* Handle only innermost loop. */ 1516 if (!loop || loop->inner) 1517 { 1518 if (dump_file && (dump_flags & TDF_DETAILS)) 1519 fprintf (dump_file, "not innermost loop\n"); 1520 return false; 1521 } 1522 1523 /* If only one block, no need for if-conversion. */ 1524 if (loop->num_nodes <= 2) 1525 { 1526 if (dump_file && (dump_flags & TDF_DETAILS)) 1527 fprintf (dump_file, "less than 2 basic blocks\n"); 1528 return false; 1529 } 1530 1531 /* More than one loop exit is too much to handle. */ 1532 if (!single_exit (loop)) 1533 { 1534 if (dump_file && (dump_flags & TDF_DETAILS)) 1535 fprintf (dump_file, "multiple exits\n"); 1536 return false; 1537 } 1538 1539 /* If one of the loop header's edge is an exit edge then do not 1540 apply if-conversion. */ 1541 FOR_EACH_EDGE (e, ei, loop->header->succs) 1542 if (loop_exit_edge_p (loop, e)) 1543 return false; 1544 1545 refs.create (5); 1546 res = if_convertible_loop_p_1 (loop, &refs); 1547 1548 data_reference_p dr; 1549 unsigned int i; 1550 for (i = 0; refs.iterate (i, &dr); i++) 1551 free (dr->aux); 1552 1553 free_data_refs (refs); 1554 1555 delete innermost_DR_map; 1556 innermost_DR_map = NULL; 1557 1558 delete baseref_DR_map; 1559 baseref_DR_map = NULL; 1560 1561 return res; 1562 } 1563 1564 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement 1565 which is in predicated basic block. 1566 In fact, the following PHI pattern is searching: 1567 loop-header: 1568 reduc_1 = PHI <..., reduc_2> 1569 ... 1570 if (...) 1571 reduc_3 = ... 1572 reduc_2 = PHI <reduc_1, reduc_3> 1573 1574 ARG_0 and ARG_1 are correspondent PHI arguments. 1575 REDUC, OP0 and OP1 contain reduction stmt and its operands. 1576 EXTENDED is true if PHI has > 2 arguments. */ 1577 1578 static bool 1579 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, 1580 tree *op0, tree *op1, bool extended) 1581 { 1582 tree lhs, r_op1, r_op2; 1583 gimple *stmt; 1584 gimple *header_phi = NULL; 1585 enum tree_code reduction_op; 1586 basic_block bb = gimple_bb (phi); 1587 struct loop *loop = bb->loop_father; 1588 edge latch_e = loop_latch_edge (loop); 1589 imm_use_iterator imm_iter; 1590 use_operand_p use_p; 1591 edge e; 1592 edge_iterator ei; 1593 bool result = false; 1594 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) 1595 return false; 1596 1597 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI) 1598 { 1599 lhs = arg_1; 1600 header_phi = SSA_NAME_DEF_STMT (arg_0); 1601 stmt = SSA_NAME_DEF_STMT (arg_1); 1602 } 1603 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI) 1604 { 1605 lhs = arg_0; 1606 header_phi = SSA_NAME_DEF_STMT (arg_1); 1607 stmt = SSA_NAME_DEF_STMT (arg_0); 1608 } 1609 else 1610 return false; 1611 if (gimple_bb (header_phi) != loop->header) 1612 return false; 1613 1614 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi)) 1615 return false; 1616 1617 if (gimple_code (stmt) != GIMPLE_ASSIGN 1618 || gimple_has_volatile_ops (stmt)) 1619 return false; 1620 1621 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 1622 return false; 1623 1624 if (!is_predicated (gimple_bb (stmt))) 1625 return false; 1626 1627 /* Check that stmt-block is predecessor of phi-block. */ 1628 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs) 1629 if (e->dest == bb) 1630 { 1631 result = true; 1632 break; 1633 } 1634 if (!result) 1635 return false; 1636 1637 if (!has_single_use (lhs)) 1638 return false; 1639 1640 reduction_op = gimple_assign_rhs_code (stmt); 1641 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR) 1642 return false; 1643 r_op1 = gimple_assign_rhs1 (stmt); 1644 r_op2 = gimple_assign_rhs2 (stmt); 1645 1646 /* Make R_OP1 to hold reduction variable. */ 1647 if (r_op2 == PHI_RESULT (header_phi) 1648 && reduction_op == PLUS_EXPR) 1649 std::swap (r_op1, r_op2); 1650 else if (r_op1 != PHI_RESULT (header_phi)) 1651 return false; 1652 1653 /* Check that R_OP1 is used in reduction stmt or in PHI only. */ 1654 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1) 1655 { 1656 gimple *use_stmt = USE_STMT (use_p); 1657 if (is_gimple_debug (use_stmt)) 1658 continue; 1659 if (use_stmt == stmt) 1660 continue; 1661 if (gimple_code (use_stmt) != GIMPLE_PHI) 1662 return false; 1663 } 1664 1665 *op0 = r_op1; *op1 = r_op2; 1666 *reduc = stmt; 1667 return true; 1668 } 1669 1670 /* Converts conditional scalar reduction into unconditional form, e.g. 1671 bb_4 1672 if (_5 != 0) goto bb_5 else goto bb_6 1673 end_bb_4 1674 bb_5 1675 res_6 = res_13 + 1; 1676 end_bb_5 1677 bb_6 1678 # res_2 = PHI <res_13(4), res_6(5)> 1679 end_bb_6 1680 1681 will be converted into sequence 1682 _ifc__1 = _5 != 0 ? 1 : 0; 1683 res_2 = res_13 + _ifc__1; 1684 Argument SWAP tells that arguments of conditional expression should be 1685 swapped. 1686 Returns rhs of resulting PHI assignment. */ 1687 1688 static tree 1689 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, 1690 tree cond, tree op0, tree op1, bool swap) 1691 { 1692 gimple_stmt_iterator stmt_it; 1693 gimple *new_assign; 1694 tree rhs; 1695 tree rhs1 = gimple_assign_rhs1 (reduc); 1696 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_"); 1697 tree c; 1698 tree zero = build_zero_cst (TREE_TYPE (rhs1)); 1699 1700 if (dump_file && (dump_flags & TDF_DETAILS)) 1701 { 1702 fprintf (dump_file, "Found cond scalar reduction.\n"); 1703 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM); 1704 } 1705 1706 /* Build cond expression using COND and constant operand 1707 of reduction rhs. */ 1708 c = fold_build_cond_expr (TREE_TYPE (rhs1), 1709 unshare_expr (cond), 1710 swap ? zero : op1, 1711 swap ? op1 : zero); 1712 1713 /* Create assignment stmt and insert it at GSI. */ 1714 new_assign = gimple_build_assign (tmp, c); 1715 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT); 1716 /* Build rhs for unconditional increment/decrement. */ 1717 rhs = fold_build2 (gimple_assign_rhs_code (reduc), 1718 TREE_TYPE (rhs1), op0, tmp); 1719 1720 /* Delete original reduction stmt. */ 1721 stmt_it = gsi_for_stmt (reduc); 1722 gsi_remove (&stmt_it, true); 1723 release_defs (reduc); 1724 return rhs; 1725 } 1726 1727 /* Produce condition for all occurrences of ARG in PHI node. */ 1728 1729 static tree 1730 gen_phi_arg_condition (gphi *phi, vec<int> *occur, 1731 gimple_stmt_iterator *gsi) 1732 { 1733 int len; 1734 int i; 1735 tree cond = NULL_TREE; 1736 tree c; 1737 edge e; 1738 1739 len = occur->length (); 1740 gcc_assert (len > 0); 1741 for (i = 0; i < len; i++) 1742 { 1743 e = gimple_phi_arg_edge (phi, (*occur)[i]); 1744 c = bb_predicate (e->src); 1745 if (is_true_predicate (c)) 1746 { 1747 cond = c; 1748 break; 1749 } 1750 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c), 1751 is_gimple_condexpr, NULL_TREE, 1752 true, GSI_SAME_STMT); 1753 if (cond != NULL_TREE) 1754 { 1755 /* Must build OR expression. */ 1756 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond); 1757 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), 1758 is_gimple_condexpr, NULL_TREE, 1759 true, GSI_SAME_STMT); 1760 } 1761 else 1762 cond = c; 1763 } 1764 gcc_assert (cond != NULL_TREE); 1765 return cond; 1766 } 1767 1768 /* Local valueization callback that follows all-use SSA edges. */ 1769 1770 static tree 1771 ifcvt_follow_ssa_use_edges (tree val) 1772 { 1773 return val; 1774 } 1775 1776 /* Replace a scalar PHI node with a COND_EXPR using COND as condition. 1777 This routine can handle PHI nodes with more than two arguments. 1778 1779 For example, 1780 S1: A = PHI <x1(1), x2(5)> 1781 is converted into, 1782 S2: A = cond ? x1 : x2; 1783 1784 The generated code is inserted at GSI that points to the top of 1785 basic block's statement list. 1786 If PHI node has more than two arguments a chain of conditional 1787 expression is produced. */ 1788 1789 1790 static void 1791 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) 1792 { 1793 gimple *new_stmt = NULL, *reduc; 1794 tree rhs, res, arg0, arg1, op0, op1, scev; 1795 tree cond; 1796 unsigned int index0; 1797 unsigned int max, args_len; 1798 edge e; 1799 basic_block bb; 1800 unsigned int i; 1801 1802 res = gimple_phi_result (phi); 1803 if (virtual_operand_p (res)) 1804 return; 1805 1806 if ((rhs = degenerate_phi_result (phi)) 1807 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father, 1808 res)) 1809 && !chrec_contains_undetermined (scev) 1810 && scev != res 1811 && (rhs = gimple_phi_arg_def (phi, 0)))) 1812 { 1813 if (dump_file && (dump_flags & TDF_DETAILS)) 1814 { 1815 fprintf (dump_file, "Degenerate phi!\n"); 1816 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); 1817 } 1818 new_stmt = gimple_build_assign (res, rhs); 1819 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 1820 update_stmt (new_stmt); 1821 return; 1822 } 1823 1824 bb = gimple_bb (phi); 1825 if (EDGE_COUNT (bb->preds) == 2) 1826 { 1827 /* Predicate ordinary PHI node with 2 arguments. */ 1828 edge first_edge, second_edge; 1829 basic_block true_bb; 1830 first_edge = EDGE_PRED (bb, 0); 1831 second_edge = EDGE_PRED (bb, 1); 1832 cond = bb_predicate (first_edge->src); 1833 if (TREE_CODE (cond) == TRUTH_NOT_EXPR) 1834 std::swap (first_edge, second_edge); 1835 if (EDGE_COUNT (first_edge->src->succs) > 1) 1836 { 1837 cond = bb_predicate (second_edge->src); 1838 if (TREE_CODE (cond) == TRUTH_NOT_EXPR) 1839 cond = TREE_OPERAND (cond, 0); 1840 else 1841 first_edge = second_edge; 1842 } 1843 else 1844 cond = bb_predicate (first_edge->src); 1845 /* Gimplify the condition to a valid cond-expr conditonal operand. */ 1846 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), 1847 is_gimple_condexpr, NULL_TREE, 1848 true, GSI_SAME_STMT); 1849 true_bb = first_edge->src; 1850 if (EDGE_PRED (bb, 1)->src == true_bb) 1851 { 1852 arg0 = gimple_phi_arg_def (phi, 1); 1853 arg1 = gimple_phi_arg_def (phi, 0); 1854 } 1855 else 1856 { 1857 arg0 = gimple_phi_arg_def (phi, 0); 1858 arg1 = gimple_phi_arg_def (phi, 1); 1859 } 1860 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, 1861 &op0, &op1, false)) 1862 /* Convert reduction stmt into vectorizable form. */ 1863 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, 1864 true_bb != gimple_bb (reduc)); 1865 else 1866 /* Build new RHS using selected condition and arguments. */ 1867 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), 1868 arg0, arg1); 1869 new_stmt = gimple_build_assign (res, rhs); 1870 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 1871 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt); 1872 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges)) 1873 { 1874 new_stmt = gsi_stmt (new_gsi); 1875 update_stmt (new_stmt); 1876 } 1877 1878 if (dump_file && (dump_flags & TDF_DETAILS)) 1879 { 1880 fprintf (dump_file, "new phi replacement stmt\n"); 1881 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); 1882 } 1883 return; 1884 } 1885 1886 /* Create hashmap for PHI node which contain vector of argument indexes 1887 having the same value. */ 1888 bool swap = false; 1889 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map; 1890 unsigned int num_args = gimple_phi_num_args (phi); 1891 int max_ind = -1; 1892 /* Vector of different PHI argument values. */ 1893 auto_vec<tree> args (num_args); 1894 1895 /* Compute phi_arg_map. */ 1896 for (i = 0; i < num_args; i++) 1897 { 1898 tree arg; 1899 1900 arg = gimple_phi_arg_def (phi, i); 1901 if (!phi_arg_map.get (arg)) 1902 args.quick_push (arg); 1903 phi_arg_map.get_or_insert (arg).safe_push (i); 1904 } 1905 1906 /* Determine element with max number of occurrences. */ 1907 max_ind = -1; 1908 max = 1; 1909 args_len = args.length (); 1910 for (i = 0; i < args_len; i++) 1911 { 1912 unsigned int len; 1913 if ((len = phi_arg_map.get (args[i])->length ()) > max) 1914 { 1915 max_ind = (int) i; 1916 max = len; 1917 } 1918 } 1919 1920 /* Put element with max number of occurences to the end of ARGS. */ 1921 if (max_ind != -1 && max_ind +1 != (int) args_len) 1922 std::swap (args[args_len - 1], args[max_ind]); 1923 1924 /* Handle one special case when number of arguments with different values 1925 is equal 2 and one argument has the only occurrence. Such PHI can be 1926 handled as if would have only 2 arguments. */ 1927 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1) 1928 { 1929 vec<int> *indexes; 1930 indexes = phi_arg_map.get (args[0]); 1931 index0 = (*indexes)[0]; 1932 arg0 = args[0]; 1933 arg1 = args[1]; 1934 e = gimple_phi_arg_edge (phi, index0); 1935 cond = bb_predicate (e->src); 1936 if (TREE_CODE (cond) == TRUTH_NOT_EXPR) 1937 { 1938 swap = true; 1939 cond = TREE_OPERAND (cond, 0); 1940 } 1941 /* Gimplify the condition to a valid cond-expr conditonal operand. */ 1942 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond), 1943 is_gimple_condexpr, NULL_TREE, 1944 true, GSI_SAME_STMT); 1945 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1, 1946 &op0, &op1, true))) 1947 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), 1948 swap? arg1 : arg0, 1949 swap? arg0 : arg1); 1950 else 1951 /* Convert reduction stmt into vectorizable form. */ 1952 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, 1953 swap); 1954 new_stmt = gimple_build_assign (res, rhs); 1955 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 1956 update_stmt (new_stmt); 1957 } 1958 else 1959 { 1960 /* Common case. */ 1961 vec<int> *indexes; 1962 tree type = TREE_TYPE (gimple_phi_result (phi)); 1963 tree lhs; 1964 arg1 = args[1]; 1965 for (i = 0; i < args_len; i++) 1966 { 1967 arg0 = args[i]; 1968 indexes = phi_arg_map.get (args[i]); 1969 if (i != args_len - 1) 1970 lhs = make_temp_ssa_name (type, NULL, "_ifc_"); 1971 else 1972 lhs = res; 1973 cond = gen_phi_arg_condition (phi, indexes, gsi); 1974 rhs = fold_build_cond_expr (type, unshare_expr (cond), 1975 arg0, arg1); 1976 new_stmt = gimple_build_assign (lhs, rhs); 1977 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 1978 update_stmt (new_stmt); 1979 arg1 = lhs; 1980 } 1981 } 1982 1983 if (dump_file && (dump_flags & TDF_DETAILS)) 1984 { 1985 fprintf (dump_file, "new extended phi replacement stmt\n"); 1986 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); 1987 } 1988 } 1989 1990 /* Replaces in LOOP all the scalar phi nodes other than those in the 1991 LOOP->header block with conditional modify expressions. */ 1992 1993 static void 1994 predicate_all_scalar_phis (struct loop *loop) 1995 { 1996 basic_block bb; 1997 unsigned int orig_loop_num_nodes = loop->num_nodes; 1998 unsigned int i; 1999 2000 for (i = 1; i < orig_loop_num_nodes; i++) 2001 { 2002 gphi *phi; 2003 gimple_stmt_iterator gsi; 2004 gphi_iterator phi_gsi; 2005 bb = ifc_bbs[i]; 2006 2007 if (bb == loop->header) 2008 continue; 2009 2010 phi_gsi = gsi_start_phis (bb); 2011 if (gsi_end_p (phi_gsi)) 2012 continue; 2013 2014 gsi = gsi_after_labels (bb); 2015 while (!gsi_end_p (phi_gsi)) 2016 { 2017 phi = phi_gsi.phi (); 2018 if (virtual_operand_p (gimple_phi_result (phi))) 2019 gsi_next (&phi_gsi); 2020 else 2021 { 2022 predicate_scalar_phi (phi, &gsi); 2023 remove_phi_node (&phi_gsi, false); 2024 } 2025 } 2026 } 2027 } 2028 2029 /* Insert in each basic block of LOOP the statements produced by the 2030 gimplification of the predicates. */ 2031 2032 static void 2033 insert_gimplified_predicates (loop_p loop) 2034 { 2035 unsigned int i; 2036 2037 for (i = 0; i < loop->num_nodes; i++) 2038 { 2039 basic_block bb = ifc_bbs[i]; 2040 gimple_seq stmts; 2041 if (!is_predicated (bb)) 2042 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL); 2043 if (!is_predicated (bb)) 2044 { 2045 /* Do not insert statements for a basic block that is not 2046 predicated. Also make sure that the predicate of the 2047 basic block is set to true. */ 2048 reset_bb_predicate (bb); 2049 continue; 2050 } 2051 2052 stmts = bb_predicate_gimplified_stmts (bb); 2053 if (stmts) 2054 { 2055 if (any_pred_load_store) 2056 { 2057 /* Insert the predicate of the BB just after the label, 2058 as the if-conversion of memory writes will use this 2059 predicate. */ 2060 gimple_stmt_iterator gsi = gsi_after_labels (bb); 2061 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 2062 } 2063 else 2064 { 2065 /* Insert the predicate of the BB at the end of the BB 2066 as this would reduce the register pressure: the only 2067 use of this predicate will be in successor BBs. */ 2068 gimple_stmt_iterator gsi = gsi_last_bb (bb); 2069 2070 if (gsi_end_p (gsi) 2071 || stmt_ends_bb_p (gsi_stmt (gsi))) 2072 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 2073 else 2074 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); 2075 } 2076 2077 /* Once the sequence is code generated, set it to NULL. */ 2078 set_bb_predicate_gimplified_stmts (bb, NULL); 2079 } 2080 } 2081 } 2082 2083 /* Helper function for predicate_mem_writes. Returns index of existent 2084 mask if it was created for given SIZE and -1 otherwise. */ 2085 2086 static int 2087 mask_exists (int size, vec<int> vec) 2088 { 2089 unsigned int ix; 2090 int v; 2091 FOR_EACH_VEC_ELT (vec, ix, v) 2092 if (v == size) 2093 return (int) ix; 2094 return -1; 2095 } 2096 2097 /* Predicate each write to memory in LOOP. 2098 2099 This function transforms control flow constructs containing memory 2100 writes of the form: 2101 2102 | for (i = 0; i < N; i++) 2103 | if (cond) 2104 | A[i] = expr; 2105 2106 into the following form that does not contain control flow: 2107 2108 | for (i = 0; i < N; i++) 2109 | A[i] = cond ? expr : A[i]; 2110 2111 The original CFG looks like this: 2112 2113 | bb_0 2114 | i = 0 2115 | end_bb_0 2116 | 2117 | bb_1 2118 | if (i < N) goto bb_5 else goto bb_2 2119 | end_bb_1 2120 | 2121 | bb_2 2122 | cond = some_computation; 2123 | if (cond) goto bb_3 else goto bb_4 2124 | end_bb_2 2125 | 2126 | bb_3 2127 | A[i] = expr; 2128 | goto bb_4 2129 | end_bb_3 2130 | 2131 | bb_4 2132 | goto bb_1 2133 | end_bb_4 2134 2135 insert_gimplified_predicates inserts the computation of the COND 2136 expression at the beginning of the destination basic block: 2137 2138 | bb_0 2139 | i = 0 2140 | end_bb_0 2141 | 2142 | bb_1 2143 | if (i < N) goto bb_5 else goto bb_2 2144 | end_bb_1 2145 | 2146 | bb_2 2147 | cond = some_computation; 2148 | if (cond) goto bb_3 else goto bb_4 2149 | end_bb_2 2150 | 2151 | bb_3 2152 | cond = some_computation; 2153 | A[i] = expr; 2154 | goto bb_4 2155 | end_bb_3 2156 | 2157 | bb_4 2158 | goto bb_1 2159 | end_bb_4 2160 2161 predicate_mem_writes is then predicating the memory write as follows: 2162 2163 | bb_0 2164 | i = 0 2165 | end_bb_0 2166 | 2167 | bb_1 2168 | if (i < N) goto bb_5 else goto bb_2 2169 | end_bb_1 2170 | 2171 | bb_2 2172 | if (cond) goto bb_3 else goto bb_4 2173 | end_bb_2 2174 | 2175 | bb_3 2176 | cond = some_computation; 2177 | A[i] = cond ? expr : A[i]; 2178 | goto bb_4 2179 | end_bb_3 2180 | 2181 | bb_4 2182 | goto bb_1 2183 | end_bb_4 2184 2185 and finally combine_blocks removes the basic block boundaries making 2186 the loop vectorizable: 2187 2188 | bb_0 2189 | i = 0 2190 | if (i < N) goto bb_5 else goto bb_1 2191 | end_bb_0 2192 | 2193 | bb_1 2194 | cond = some_computation; 2195 | A[i] = cond ? expr : A[i]; 2196 | if (i < N) goto bb_5 else goto bb_4 2197 | end_bb_1 2198 | 2199 | bb_4 2200 | goto bb_1 2201 | end_bb_4 2202 */ 2203 2204 static void 2205 predicate_mem_writes (loop_p loop) 2206 { 2207 unsigned int i, orig_loop_num_nodes = loop->num_nodes; 2208 auto_vec<int, 1> vect_sizes; 2209 auto_vec<tree, 1> vect_masks; 2210 2211 for (i = 1; i < orig_loop_num_nodes; i++) 2212 { 2213 gimple_stmt_iterator gsi; 2214 basic_block bb = ifc_bbs[i]; 2215 tree cond = bb_predicate (bb); 2216 bool swap; 2217 gimple *stmt; 2218 int index; 2219 2220 if (is_true_predicate (cond)) 2221 continue; 2222 2223 swap = false; 2224 if (TREE_CODE (cond) == TRUTH_NOT_EXPR) 2225 { 2226 swap = true; 2227 cond = TREE_OPERAND (cond, 0); 2228 } 2229 2230 vect_sizes.truncate (0); 2231 vect_masks.truncate (0); 2232 2233 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) 2234 { 2235 if (!gimple_assign_single_p (stmt = gsi_stmt (gsi))) 2236 ; 2237 else if (is_false_predicate (cond) 2238 && gimple_vdef (stmt)) 2239 { 2240 unlink_stmt_vdef (stmt); 2241 gsi_remove (&gsi, true); 2242 release_defs (stmt); 2243 continue; 2244 } 2245 else if (gimple_plf (stmt, GF_PLF_2)) 2246 { 2247 tree lhs = gimple_assign_lhs (stmt); 2248 tree rhs = gimple_assign_rhs1 (stmt); 2249 tree ref, addr, ptr, mask; 2250 gcall *new_stmt; 2251 gimple_seq stmts = NULL; 2252 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 2253 /* We checked before setting GF_PLF_2 that an equivalent 2254 integer mode exists. */ 2255 int bitsize = GET_MODE_BITSIZE (mode).to_constant (); 2256 ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; 2257 mark_addressable (ref); 2258 addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref), 2259 true, NULL_TREE, true, 2260 GSI_SAME_STMT); 2261 if (!vect_sizes.is_empty () 2262 && (index = mask_exists (bitsize, vect_sizes)) != -1) 2263 /* Use created mask. */ 2264 mask = vect_masks[index]; 2265 else 2266 { 2267 if (COMPARISON_CLASS_P (cond)) 2268 mask = gimple_build (&stmts, TREE_CODE (cond), 2269 boolean_type_node, 2270 TREE_OPERAND (cond, 0), 2271 TREE_OPERAND (cond, 1)); 2272 else 2273 mask = cond; 2274 2275 if (swap) 2276 { 2277 tree true_val 2278 = constant_boolean_node (true, TREE_TYPE (mask)); 2279 mask = gimple_build (&stmts, BIT_XOR_EXPR, 2280 TREE_TYPE (mask), mask, true_val); 2281 } 2282 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 2283 2284 /* Save mask and its size for further use. */ 2285 vect_sizes.safe_push (bitsize); 2286 vect_masks.safe_push (mask); 2287 } 2288 ptr = build_int_cst (reference_alias_ptr_type (ref), 2289 get_object_alignment (ref)); 2290 /* Copy points-to info if possible. */ 2291 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr)) 2292 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), 2293 ref); 2294 if (TREE_CODE (lhs) == SSA_NAME) 2295 { 2296 new_stmt 2297 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, 2298 ptr, mask); 2299 gimple_call_set_lhs (new_stmt, lhs); 2300 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 2301 } 2302 else 2303 { 2304 new_stmt 2305 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, 2306 mask, rhs); 2307 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 2308 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 2309 SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; 2310 } 2311 gimple_call_set_nothrow (new_stmt, true); 2312 2313 gsi_replace (&gsi, new_stmt, true); 2314 } 2315 else if (gimple_vdef (stmt)) 2316 { 2317 tree lhs = gimple_assign_lhs (stmt); 2318 tree rhs = gimple_assign_rhs1 (stmt); 2319 tree type = TREE_TYPE (lhs); 2320 2321 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi); 2322 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi); 2323 if (swap) 2324 std::swap (lhs, rhs); 2325 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond), 2326 is_gimple_condexpr, NULL_TREE, 2327 true, GSI_SAME_STMT); 2328 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs); 2329 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); 2330 update_stmt (stmt); 2331 } 2332 gsi_next (&gsi); 2333 } 2334 } 2335 } 2336 2337 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks 2338 other than the exit and latch of the LOOP. Also resets the 2339 GIMPLE_DEBUG information. */ 2340 2341 static void 2342 remove_conditions_and_labels (loop_p loop) 2343 { 2344 gimple_stmt_iterator gsi; 2345 unsigned int i; 2346 2347 for (i = 0; i < loop->num_nodes; i++) 2348 { 2349 basic_block bb = ifc_bbs[i]; 2350 2351 if (bb_with_exit_edge_p (loop, bb) 2352 || bb == loop->latch) 2353 continue; 2354 2355 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) 2356 switch (gimple_code (gsi_stmt (gsi))) 2357 { 2358 case GIMPLE_COND: 2359 case GIMPLE_LABEL: 2360 gsi_remove (&gsi, true); 2361 break; 2362 2363 case GIMPLE_DEBUG: 2364 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */ 2365 if (gimple_debug_bind_p (gsi_stmt (gsi))) 2366 { 2367 gimple_debug_bind_reset_value (gsi_stmt (gsi)); 2368 update_stmt (gsi_stmt (gsi)); 2369 } 2370 gsi_next (&gsi); 2371 break; 2372 2373 default: 2374 gsi_next (&gsi); 2375 } 2376 } 2377 } 2378 2379 /* Combine all the basic blocks from LOOP into one or two super basic 2380 blocks. Replace PHI nodes with conditional modify expressions. */ 2381 2382 static void 2383 combine_blocks (struct loop *loop) 2384 { 2385 basic_block bb, exit_bb, merge_target_bb; 2386 unsigned int orig_loop_num_nodes = loop->num_nodes; 2387 unsigned int i; 2388 edge e; 2389 edge_iterator ei; 2390 2391 remove_conditions_and_labels (loop); 2392 insert_gimplified_predicates (loop); 2393 predicate_all_scalar_phis (loop); 2394 2395 if (any_pred_load_store) 2396 predicate_mem_writes (loop); 2397 2398 /* Merge basic blocks: first remove all the edges in the loop, 2399 except for those from the exit block. */ 2400 exit_bb = NULL; 2401 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes); 2402 for (i = 0; i < orig_loop_num_nodes; i++) 2403 { 2404 bb = ifc_bbs[i]; 2405 predicated[i] = !is_true_predicate (bb_predicate (bb)); 2406 free_bb_predicate (bb); 2407 if (bb_with_exit_edge_p (loop, bb)) 2408 { 2409 gcc_assert (exit_bb == NULL); 2410 exit_bb = bb; 2411 } 2412 } 2413 gcc_assert (exit_bb != loop->latch); 2414 2415 for (i = 1; i < orig_loop_num_nodes; i++) 2416 { 2417 bb = ifc_bbs[i]; 2418 2419 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) 2420 { 2421 if (e->src == exit_bb) 2422 ei_next (&ei); 2423 else 2424 remove_edge (e); 2425 } 2426 } 2427 2428 if (exit_bb != NULL) 2429 { 2430 if (exit_bb != loop->header) 2431 { 2432 /* Connect this node to loop header. */ 2433 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); 2434 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); 2435 } 2436 2437 /* Redirect non-exit edges to loop->latch. */ 2438 FOR_EACH_EDGE (e, ei, exit_bb->succs) 2439 { 2440 if (!loop_exit_edge_p (loop, e)) 2441 redirect_edge_and_branch (e, loop->latch); 2442 } 2443 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); 2444 } 2445 else 2446 { 2447 /* If the loop does not have an exit, reconnect header and latch. */ 2448 make_edge (loop->header, loop->latch, EDGE_FALLTHRU); 2449 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); 2450 } 2451 2452 merge_target_bb = loop->header; 2453 2454 /* Get at the virtual def valid for uses starting at the first block 2455 we merge into the header. Without a virtual PHI the loop has the 2456 same virtual use on all stmts. */ 2457 gphi *vphi = get_virtual_phi (loop->header); 2458 tree last_vdef = NULL_TREE; 2459 if (vphi) 2460 { 2461 last_vdef = gimple_phi_result (vphi); 2462 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header); 2463 ! gsi_end_p (gsi); gsi_next (&gsi)) 2464 if (gimple_vdef (gsi_stmt (gsi))) 2465 last_vdef = gimple_vdef (gsi_stmt (gsi)); 2466 } 2467 for (i = 1; i < orig_loop_num_nodes; i++) 2468 { 2469 gimple_stmt_iterator gsi; 2470 gimple_stmt_iterator last; 2471 2472 bb = ifc_bbs[i]; 2473 2474 if (bb == exit_bb || bb == loop->latch) 2475 continue; 2476 2477 /* We release virtual PHIs late because we have to propagate them 2478 out using the current VUSE. The def might be the one used 2479 after the loop. */ 2480 vphi = get_virtual_phi (bb); 2481 if (vphi) 2482 { 2483 imm_use_iterator iter; 2484 use_operand_p use_p; 2485 gimple *use_stmt; 2486 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) 2487 { 2488 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 2489 SET_USE (use_p, last_vdef); 2490 } 2491 gsi = gsi_for_stmt (vphi); 2492 remove_phi_node (&gsi, true); 2493 } 2494 2495 /* Make stmts member of loop->header and clear range info from all stmts 2496 in BB which is now no longer executed conditional on a predicate we 2497 could have derived it from. */ 2498 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2499 { 2500 gimple *stmt = gsi_stmt (gsi); 2501 gimple_set_bb (stmt, merge_target_bb); 2502 /* Update virtual operands. */ 2503 if (last_vdef) 2504 { 2505 use_operand_p use_p = ssa_vuse_operand (stmt); 2506 if (use_p 2507 && USE_FROM_PTR (use_p) != last_vdef) 2508 SET_USE (use_p, last_vdef); 2509 if (gimple_vdef (stmt)) 2510 last_vdef = gimple_vdef (stmt); 2511 } 2512 if (predicated[i]) 2513 { 2514 ssa_op_iter i; 2515 tree op; 2516 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF) 2517 reset_flow_sensitive_info (op); 2518 } 2519 } 2520 2521 /* Update stmt list. */ 2522 last = gsi_last_bb (merge_target_bb); 2523 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT); 2524 set_bb_seq (bb, NULL); 2525 2526 delete_basic_block (bb); 2527 } 2528 2529 /* If possible, merge loop header to the block with the exit edge. 2530 This reduces the number of basic blocks to two, to please the 2531 vectorizer that handles only loops with two nodes. */ 2532 if (exit_bb 2533 && exit_bb != loop->header) 2534 { 2535 /* We release virtual PHIs late because we have to propagate them 2536 out using the current VUSE. The def might be the one used 2537 after the loop. */ 2538 vphi = get_virtual_phi (exit_bb); 2539 if (vphi) 2540 { 2541 imm_use_iterator iter; 2542 use_operand_p use_p; 2543 gimple *use_stmt; 2544 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) 2545 { 2546 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 2547 SET_USE (use_p, last_vdef); 2548 } 2549 gimple_stmt_iterator gsi = gsi_for_stmt (vphi); 2550 remove_phi_node (&gsi, true); 2551 } 2552 2553 if (can_merge_blocks_p (loop->header, exit_bb)) 2554 merge_blocks (loop->header, exit_bb); 2555 } 2556 2557 free (ifc_bbs); 2558 ifc_bbs = NULL; 2559 free (predicated); 2560 } 2561 2562 /* Version LOOP before if-converting it; the original loop 2563 will be if-converted, the new copy of the loop will not, 2564 and the LOOP_VECTORIZED internal call will be guarding which 2565 loop to execute. The vectorizer pass will fold this 2566 internal call into either true or false. 2567 2568 Note that this function intentionally invalidates profile. Both edges 2569 out of LOOP_VECTORIZED must have 100% probability so the profile remains 2570 consistent after the condition is folded in the vectorizer. */ 2571 2572 static struct loop * 2573 version_loop_for_if_conversion (struct loop *loop) 2574 { 2575 basic_block cond_bb; 2576 tree cond = make_ssa_name (boolean_type_node); 2577 struct loop *new_loop; 2578 gimple *g; 2579 gimple_stmt_iterator gsi; 2580 unsigned int save_length; 2581 2582 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2, 2583 build_int_cst (integer_type_node, loop->num), 2584 integer_zero_node); 2585 gimple_call_set_lhs (g, cond); 2586 2587 /* Save BB->aux around loop_version as that uses the same field. */ 2588 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; 2589 void **saved_preds = XALLOCAVEC (void *, save_length); 2590 for (unsigned i = 0; i < save_length; i++) 2591 saved_preds[i] = ifc_bbs[i]->aux; 2592 2593 initialize_original_copy_tables (); 2594 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED 2595 is re-merged in the vectorizer. */ 2596 new_loop = loop_version (loop, cond, &cond_bb, 2597 profile_probability::always (), 2598 profile_probability::always (), 2599 profile_probability::always (), 2600 profile_probability::always (), true); 2601 free_original_copy_tables (); 2602 2603 for (unsigned i = 0; i < save_length; i++) 2604 ifc_bbs[i]->aux = saved_preds[i]; 2605 2606 if (new_loop == NULL) 2607 return NULL; 2608 2609 new_loop->dont_vectorize = true; 2610 new_loop->force_vectorize = false; 2611 gsi = gsi_last_bb (cond_bb); 2612 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num)); 2613 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 2614 update_ssa (TODO_update_ssa); 2615 return new_loop; 2616 } 2617 2618 /* Return true when LOOP satisfies the follow conditions that will 2619 allow it to be recognized by the vectorizer for outer-loop 2620 vectorization: 2621 - The loop is not the root node of the loop tree. 2622 - The loop has exactly one inner loop. 2623 - The loop has a single exit. 2624 - The loop header has a single successor, which is the inner 2625 loop header. 2626 - Each of the inner and outer loop latches have a single 2627 predecessor. 2628 - The loop exit block has a single predecessor, which is the 2629 inner loop's exit block. */ 2630 2631 static bool 2632 versionable_outer_loop_p (struct loop *loop) 2633 { 2634 if (!loop_outer (loop) 2635 || loop->dont_vectorize 2636 || !loop->inner 2637 || loop->inner->next 2638 || !single_exit (loop) 2639 || !single_succ_p (loop->header) 2640 || single_succ (loop->header) != loop->inner->header 2641 || !single_pred_p (loop->latch) 2642 || !single_pred_p (loop->inner->latch)) 2643 return false; 2644 2645 basic_block outer_exit = single_pred (loop->latch); 2646 basic_block inner_exit = single_pred (loop->inner->latch); 2647 2648 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit) 2649 return false; 2650 2651 if (dump_file) 2652 fprintf (dump_file, "Found vectorizable outer loop for versioning\n"); 2653 2654 return true; 2655 } 2656 2657 /* Performs splitting of critical edges. Skip splitting and return false 2658 if LOOP will not be converted because: 2659 2660 - LOOP is not well formed. 2661 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments. 2662 2663 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */ 2664 2665 static bool 2666 ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv) 2667 { 2668 basic_block *body; 2669 basic_block bb; 2670 unsigned int num = loop->num_nodes; 2671 unsigned int i; 2672 gimple *stmt; 2673 edge e; 2674 edge_iterator ei; 2675 auto_vec<edge> critical_edges; 2676 2677 /* Loop is not well formed. */ 2678 if (num <= 2 || loop->inner || !single_exit (loop)) 2679 return false; 2680 2681 body = get_loop_body (loop); 2682 for (i = 0; i < num; i++) 2683 { 2684 bb = body[i]; 2685 if (!aggressive_if_conv 2686 && phi_nodes (bb) 2687 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM) 2688 { 2689 if (dump_file && (dump_flags & TDF_DETAILS)) 2690 fprintf (dump_file, 2691 "BB %d has complicated PHI with more than %u args.\n", 2692 bb->index, MAX_PHI_ARG_NUM); 2693 2694 free (body); 2695 return false; 2696 } 2697 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb)) 2698 continue; 2699 2700 stmt = last_stmt (bb); 2701 /* Skip basic blocks not ending with conditional branch. */ 2702 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 2703 continue; 2704 2705 FOR_EACH_EDGE (e, ei, bb->succs) 2706 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop) 2707 critical_edges.safe_push (e); 2708 } 2709 free (body); 2710 2711 while (critical_edges.length () > 0) 2712 { 2713 e = critical_edges.pop (); 2714 /* Don't split if bb can be predicated along non-critical edge. */ 2715 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest)) 2716 split_edge (e); 2717 } 2718 2719 return true; 2720 } 2721 2722 /* Delete redundant statements produced by predication which prevents 2723 loop vectorization. */ 2724 2725 static void 2726 ifcvt_local_dce (basic_block bb) 2727 { 2728 gimple *stmt; 2729 gimple *stmt1; 2730 gimple *phi; 2731 gimple_stmt_iterator gsi; 2732 auto_vec<gimple *> worklist; 2733 enum gimple_code code; 2734 use_operand_p use_p; 2735 imm_use_iterator imm_iter; 2736 2737 worklist.create (64); 2738 /* Consider all phi as live statements. */ 2739 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2740 { 2741 phi = gsi_stmt (gsi); 2742 gimple_set_plf (phi, GF_PLF_2, true); 2743 worklist.safe_push (phi); 2744 } 2745 /* Consider load/store statements, CALL and COND as live. */ 2746 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2747 { 2748 stmt = gsi_stmt (gsi); 2749 if (gimple_store_p (stmt) 2750 || gimple_assign_load_p (stmt) 2751 || is_gimple_debug (stmt)) 2752 { 2753 gimple_set_plf (stmt, GF_PLF_2, true); 2754 worklist.safe_push (stmt); 2755 continue; 2756 } 2757 code = gimple_code (stmt); 2758 if (code == GIMPLE_COND || code == GIMPLE_CALL) 2759 { 2760 gimple_set_plf (stmt, GF_PLF_2, true); 2761 worklist.safe_push (stmt); 2762 continue; 2763 } 2764 gimple_set_plf (stmt, GF_PLF_2, false); 2765 2766 if (code == GIMPLE_ASSIGN) 2767 { 2768 tree lhs = gimple_assign_lhs (stmt); 2769 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 2770 { 2771 stmt1 = USE_STMT (use_p); 2772 if (gimple_bb (stmt1) != bb) 2773 { 2774 gimple_set_plf (stmt, GF_PLF_2, true); 2775 worklist.safe_push (stmt); 2776 break; 2777 } 2778 } 2779 } 2780 } 2781 /* Propagate liveness through arguments of live stmt. */ 2782 while (worklist.length () > 0) 2783 { 2784 ssa_op_iter iter; 2785 use_operand_p use_p; 2786 tree use; 2787 2788 stmt = worklist.pop (); 2789 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2790 { 2791 use = USE_FROM_PTR (use_p); 2792 if (TREE_CODE (use) != SSA_NAME) 2793 continue; 2794 stmt1 = SSA_NAME_DEF_STMT (use); 2795 if (gimple_bb (stmt1) != bb 2796 || gimple_plf (stmt1, GF_PLF_2)) 2797 continue; 2798 gimple_set_plf (stmt1, GF_PLF_2, true); 2799 worklist.safe_push (stmt1); 2800 } 2801 } 2802 /* Delete dead statements. */ 2803 gsi = gsi_start_bb (bb); 2804 while (!gsi_end_p (gsi)) 2805 { 2806 stmt = gsi_stmt (gsi); 2807 if (gimple_plf (stmt, GF_PLF_2)) 2808 { 2809 gsi_next (&gsi); 2810 continue; 2811 } 2812 if (dump_file && (dump_flags & TDF_DETAILS)) 2813 { 2814 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index); 2815 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 2816 } 2817 gsi_remove (&gsi, true); 2818 release_defs (stmt); 2819 } 2820 } 2821 2822 /* If-convert LOOP when it is legal. For the moment this pass has no 2823 profitability analysis. Returns non-zero todo flags when something 2824 changed. */ 2825 2826 unsigned int 2827 tree_if_conversion (struct loop *loop) 2828 { 2829 unsigned int todo = 0; 2830 bool aggressive_if_conv; 2831 struct loop *rloop; 2832 2833 again: 2834 rloop = NULL; 2835 ifc_bbs = NULL; 2836 any_pred_load_store = false; 2837 any_complicated_phi = false; 2838 2839 /* Apply more aggressive if-conversion when loop or its outer loop were 2840 marked with simd pragma. When that's the case, we try to if-convert 2841 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */ 2842 aggressive_if_conv = loop->force_vectorize; 2843 if (!aggressive_if_conv) 2844 { 2845 struct loop *outer_loop = loop_outer (loop); 2846 if (outer_loop && outer_loop->force_vectorize) 2847 aggressive_if_conv = true; 2848 } 2849 2850 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv)) 2851 goto cleanup; 2852 2853 if (!if_convertible_loop_p (loop) 2854 || !dbg_cnt (if_conversion_tree)) 2855 goto cleanup; 2856 2857 if ((any_pred_load_store || any_complicated_phi) 2858 && ((!flag_tree_loop_vectorize && !loop->force_vectorize) 2859 || loop->dont_vectorize)) 2860 goto cleanup; 2861 2862 /* Since we have no cost model, always version loops unless the user 2863 specified -ftree-loop-if-convert or unless versioning is required. 2864 Either version this loop, or if the pattern is right for outer-loop 2865 vectorization, version the outer loop. In the latter case we will 2866 still if-convert the original inner loop. */ 2867 if (any_pred_load_store 2868 || any_complicated_phi 2869 || flag_tree_loop_if_convert != 1) 2870 { 2871 struct loop *vloop 2872 = (versionable_outer_loop_p (loop_outer (loop)) 2873 ? loop_outer (loop) : loop); 2874 struct loop *nloop = version_loop_for_if_conversion (vloop); 2875 if (nloop == NULL) 2876 goto cleanup; 2877 if (vloop != loop) 2878 { 2879 /* If versionable_outer_loop_p decided to version the 2880 outer loop, version also the inner loop of the non-vectorized 2881 loop copy. So we transform: 2882 loop1 2883 loop2 2884 into: 2885 if (LOOP_VECTORIZED (1, 3)) 2886 { 2887 loop1 2888 loop2 2889 } 2890 else 2891 loop3 (copy of loop1) 2892 if (LOOP_VECTORIZED (4, 5)) 2893 loop4 (copy of loop2) 2894 else 2895 loop5 (copy of loop4) */ 2896 gcc_assert (nloop->inner && nloop->inner->next == NULL); 2897 rloop = nloop->inner; 2898 } 2899 } 2900 2901 /* Now all statements are if-convertible. Combine all the basic 2902 blocks into one huge basic block doing the if-conversion 2903 on-the-fly. */ 2904 combine_blocks (loop); 2905 2906 /* Delete dead predicate computations. */ 2907 ifcvt_local_dce (loop->header); 2908 2909 todo |= TODO_cleanup_cfg; 2910 2911 cleanup: 2912 if (ifc_bbs) 2913 { 2914 unsigned int i; 2915 2916 for (i = 0; i < loop->num_nodes; i++) 2917 free_bb_predicate (ifc_bbs[i]); 2918 2919 free (ifc_bbs); 2920 ifc_bbs = NULL; 2921 } 2922 if (rloop != NULL) 2923 { 2924 loop = rloop; 2925 goto again; 2926 } 2927 2928 return todo; 2929 } 2930 2931 /* Tree if-conversion pass management. */ 2932 2933 namespace { 2934 2935 const pass_data pass_data_if_conversion = 2936 { 2937 GIMPLE_PASS, /* type */ 2938 "ifcvt", /* name */ 2939 OPTGROUP_NONE, /* optinfo_flags */ 2940 TV_TREE_LOOP_IFCVT, /* tv_id */ 2941 ( PROP_cfg | PROP_ssa ), /* properties_required */ 2942 0, /* properties_provided */ 2943 0, /* properties_destroyed */ 2944 0, /* todo_flags_start */ 2945 0, /* todo_flags_finish */ 2946 }; 2947 2948 class pass_if_conversion : public gimple_opt_pass 2949 { 2950 public: 2951 pass_if_conversion (gcc::context *ctxt) 2952 : gimple_opt_pass (pass_data_if_conversion, ctxt) 2953 {} 2954 2955 /* opt_pass methods: */ 2956 virtual bool gate (function *); 2957 virtual unsigned int execute (function *); 2958 2959 }; // class pass_if_conversion 2960 2961 bool 2962 pass_if_conversion::gate (function *fun) 2963 { 2964 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops) 2965 && flag_tree_loop_if_convert != 0) 2966 || flag_tree_loop_if_convert == 1); 2967 } 2968 2969 unsigned int 2970 pass_if_conversion::execute (function *fun) 2971 { 2972 struct loop *loop; 2973 unsigned todo = 0; 2974 2975 if (number_of_loops (fun) <= 1) 2976 return 0; 2977 2978 FOR_EACH_LOOP (loop, 0) 2979 if (flag_tree_loop_if_convert == 1 2980 || ((flag_tree_loop_vectorize || loop->force_vectorize) 2981 && !loop->dont_vectorize)) 2982 todo |= tree_if_conversion (loop); 2983 2984 if (todo) 2985 { 2986 free_numbers_of_iterations_estimates (fun); 2987 scev_reset (); 2988 } 2989 2990 if (flag_checking) 2991 { 2992 basic_block bb; 2993 FOR_EACH_BB_FN (bb, fun) 2994 gcc_assert (!bb->aux); 2995 } 2996 2997 return todo; 2998 } 2999 3000 } // anon namespace 3001 3002 gimple_opt_pass * 3003 make_pass_if_conversion (gcc::context *ctxt) 3004 { 3005 return new pass_if_conversion (ctxt); 3006 } 3007