1 /* Reassociation for trees. 2 Copyright (C) 2005-2018 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "params.h" 52 #include "builtins.h" 53 #include "gimplify.h" 54 #include "case-cfn-macros.h" 55 56 /* This is a simple global reassociation pass. It is, in part, based 57 on the LLVM pass of the same name (They do some things more/less 58 than we do, in different orders, etc). 59 60 It consists of five steps: 61 62 1. Breaking up subtract operations into addition + negate, where 63 it would promote the reassociation of adds. 64 65 2. Left linearization of the expression trees, so that (A+B)+(C+D) 66 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 67 During linearization, we place the operands of the binary 68 expressions into a vector of operand_entry_* 69 70 3. Optimization of the operand lists, eliminating things like a + 71 -a, a & a, etc. 72 73 3a. Combine repeated factors with the same occurrence counts 74 into a __builtin_powi call that will later be optimized into 75 an optimal number of multiplies. 76 77 4. Rewrite the expression trees we linearized and optimized so 78 they are in proper rank order. 79 80 5. Repropagate negates, as nothing else will clean it up ATM. 81 82 A bit of theory on #4, since nobody seems to write anything down 83 about why it makes sense to do it the way they do it: 84 85 We could do this much nicer theoretically, but don't (for reasons 86 explained after how to do it theoretically nice :P). 87 88 In order to promote the most redundancy elimination, you want 89 binary expressions whose operands are the same rank (or 90 preferably, the same value) exposed to the redundancy eliminator, 91 for possible elimination. 92 93 So the way to do this if we really cared, is to build the new op 94 tree from the leaves to the roots, merging as you go, and putting the 95 new op on the end of the worklist, until you are left with one 96 thing on the worklist. 97 98 IE if you have to rewrite the following set of operands (listed with 99 rank in parentheses), with opcode PLUS_EXPR: 100 101 a (1), b (1), c (1), d (2), e (2) 102 103 104 We start with our merge worklist empty, and the ops list with all of 105 those on it. 106 107 You want to first merge all leaves of the same rank, as much as 108 possible. 109 110 So first build a binary op of 111 112 mergetmp = a + b, and put "mergetmp" on the merge worklist. 113 114 Because there is no three operand form of PLUS_EXPR, c is not going to 115 be exposed to redundancy elimination as a rank 1 operand. 116 117 So you might as well throw it on the merge worklist (you could also 118 consider it to now be a rank two operand, and merge it with d and e, 119 but in this case, you then have evicted e from a binary op. So at 120 least in this situation, you can't win.) 121 122 Then build a binary op of d + e 123 mergetmp2 = d + e 124 125 and put mergetmp2 on the merge worklist. 126 127 so merge worklist = {mergetmp, c, mergetmp2} 128 129 Continue building binary ops of these operations until you have only 130 one operation left on the worklist. 131 132 So we have 133 134 build binary op 135 mergetmp3 = mergetmp + c 136 137 worklist = {mergetmp2, mergetmp3} 138 139 mergetmp4 = mergetmp2 + mergetmp3 140 141 worklist = {mergetmp4} 142 143 because we have one operation left, we can now just set the original 144 statement equal to the result of that operation. 145 146 This will at least expose a + b and d + e to redundancy elimination 147 as binary operations. 148 149 For extra points, you can reuse the old statements to build the 150 mergetmps, since you shouldn't run out. 151 152 So why don't we do this? 153 154 Because it's expensive, and rarely will help. Most trees we are 155 reassociating have 3 or less ops. If they have 2 ops, they already 156 will be written into a nice single binary op. If you have 3 ops, a 157 single simple check suffices to tell you whether the first two are of the 158 same rank. If so, you know to order it 159 160 mergetmp = op1 + op2 161 newstmt = mergetmp + op3 162 163 instead of 164 mergetmp = op2 + op3 165 newstmt = mergetmp + op1 166 167 If all three are of the same rank, you can't expose them all in a 168 single binary operator anyway, so the above is *still* the best you 169 can do. 170 171 Thus, this is what we do. When we have three ops left, we check to see 172 what order to put them in, and call it a day. As a nod to vector sum 173 reduction, we check if any of the ops are really a phi node that is a 174 destructive update for the associating op, and keep the destructive 175 update together for vector sum reduction recognition. */ 176 177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 178 point 3a in the pass header comment. */ 179 static bool reassoc_insert_powi_p; 180 181 /* Statistics */ 182 static struct 183 { 184 int linearized; 185 int constants_eliminated; 186 int ops_eliminated; 187 int rewritten; 188 int pows_encountered; 189 int pows_created; 190 } reassociate_stats; 191 192 /* Operator, rank pair. */ 193 struct operand_entry 194 { 195 unsigned int rank; 196 unsigned int id; 197 tree op; 198 unsigned int count; 199 gimple *stmt_to_insert; 200 }; 201 202 static object_allocator<operand_entry> operand_entry_pool 203 ("operand entry pool"); 204 205 /* This is used to assign a unique ID to each struct operand_entry 206 so that qsort results are identical on different hosts. */ 207 static unsigned int next_operand_entry_id; 208 209 /* Starting rank number for a given basic block, so that we can rank 210 operations using unmovable instructions in that BB based on the bb 211 depth. */ 212 static long *bb_rank; 213 214 /* Operand->rank hashtable. */ 215 static hash_map<tree, long> *operand_rank; 216 217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 218 all basic blocks the CFG should be adjusted - basic blocks 219 split right after that SSA_NAME's definition statement and before 220 the only use, which must be a bit ior. */ 221 static vec<tree> reassoc_branch_fixups; 222 223 /* Forward decls. */ 224 static long get_rank (tree); 225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 226 227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 228 possibly added by gsi_remove. */ 229 230 bool 231 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 232 { 233 gimple *stmt = gsi_stmt (*gsi); 234 235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) 236 return gsi_remove (gsi, true); 237 238 gimple_stmt_iterator prev = *gsi; 239 gsi_prev (&prev); 240 unsigned uid = gimple_uid (stmt); 241 basic_block bb = gimple_bb (stmt); 242 bool ret = gsi_remove (gsi, true); 243 if (!gsi_end_p (prev)) 244 gsi_next (&prev); 245 else 246 prev = gsi_start_bb (bb); 247 gimple *end_stmt = gsi_stmt (*gsi); 248 while ((stmt = gsi_stmt (prev)) != end_stmt) 249 { 250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 251 gimple_set_uid (stmt, uid); 252 gsi_next (&prev); 253 } 254 return ret; 255 } 256 257 /* Bias amount for loop-carried phis. We want this to be larger than 258 the depth of any reassociation tree we can see, but not larger than 259 the rank difference between two blocks. */ 260 #define PHI_LOOP_BIAS (1 << 15) 261 262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 263 an innermost loop, and the phi has only a single use which is inside 264 the loop, then the rank is the block rank of the loop latch plus an 265 extra bias for the loop-carried dependence. This causes expressions 266 calculated into an accumulator variable to be independent for each 267 iteration of the loop. If STMT is some other phi, the rank is the 268 block rank of its containing block. */ 269 static long 270 phi_rank (gimple *stmt) 271 { 272 basic_block bb = gimple_bb (stmt); 273 struct loop *father = bb->loop_father; 274 tree res; 275 unsigned i; 276 use_operand_p use; 277 gimple *use_stmt; 278 279 /* We only care about real loops (those with a latch). */ 280 if (!father->latch) 281 return bb_rank[bb->index]; 282 283 /* Interesting phis must be in headers of innermost loops. */ 284 if (bb != father->header 285 || father->inner) 286 return bb_rank[bb->index]; 287 288 /* Ignore virtual SSA_NAMEs. */ 289 res = gimple_phi_result (stmt); 290 if (virtual_operand_p (res)) 291 return bb_rank[bb->index]; 292 293 /* The phi definition must have a single use, and that use must be 294 within the loop. Otherwise this isn't an accumulator pattern. */ 295 if (!single_imm_use (res, &use, &use_stmt) 296 || gimple_bb (use_stmt)->loop_father != father) 297 return bb_rank[bb->index]; 298 299 /* Look for phi arguments from within the loop. If found, bias this phi. */ 300 for (i = 0; i < gimple_phi_num_args (stmt); i++) 301 { 302 tree arg = gimple_phi_arg_def (stmt, i); 303 if (TREE_CODE (arg) == SSA_NAME 304 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 305 { 306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 307 if (gimple_bb (def_stmt)->loop_father == father) 308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 309 } 310 } 311 312 /* Must be an uninteresting phi. */ 313 return bb_rank[bb->index]; 314 } 315 316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 317 loop-carried dependence of an innermost loop, return TRUE; else 318 return FALSE. */ 319 static bool 320 loop_carried_phi (tree exp) 321 { 322 gimple *phi_stmt; 323 long block_rank; 324 325 if (TREE_CODE (exp) != SSA_NAME 326 || SSA_NAME_IS_DEFAULT_DEF (exp)) 327 return false; 328 329 phi_stmt = SSA_NAME_DEF_STMT (exp); 330 331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 332 return false; 333 334 /* Non-loop-carried phis have block rank. Loop-carried phis have 335 an additional bias added in. If this phi doesn't have block rank, 336 it's biased and should not be propagated. */ 337 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 338 339 if (phi_rank (phi_stmt) != block_rank) 340 return true; 341 342 return false; 343 } 344 345 /* Return the maximum of RANK and the rank that should be propagated 346 from expression OP. For most operands, this is just the rank of OP. 347 For loop-carried phis, the value is zero to avoid undoing the bias 348 in favor of the phi. */ 349 static long 350 propagate_rank (long rank, tree op) 351 { 352 long op_rank; 353 354 if (loop_carried_phi (op)) 355 return rank; 356 357 op_rank = get_rank (op); 358 359 return MAX (rank, op_rank); 360 } 361 362 /* Look up the operand rank structure for expression E. */ 363 364 static inline long 365 find_operand_rank (tree e) 366 { 367 long *slot = operand_rank->get (e); 368 return slot ? *slot : -1; 369 } 370 371 /* Insert {E,RANK} into the operand rank hashtable. */ 372 373 static inline void 374 insert_operand_rank (tree e, long rank) 375 { 376 gcc_assert (rank > 0); 377 gcc_assert (!operand_rank->put (e, rank)); 378 } 379 380 /* Given an expression E, return the rank of the expression. */ 381 382 static long 383 get_rank (tree e) 384 { 385 /* SSA_NAME's have the rank of the expression they are the result 386 of. 387 For globals and uninitialized values, the rank is 0. 388 For function arguments, use the pre-setup rank. 389 For PHI nodes, stores, asm statements, etc, we use the rank of 390 the BB. 391 For simple operations, the rank is the maximum rank of any of 392 its operands, or the bb_rank, whichever is less. 393 I make no claims that this is optimal, however, it gives good 394 results. */ 395 396 /* We make an exception to the normal ranking system to break 397 dependences of accumulator variables in loops. Suppose we 398 have a simple one-block loop containing: 399 400 x_1 = phi(x_0, x_2) 401 b = a + x_1 402 c = b + d 403 x_2 = c + e 404 405 As shown, each iteration of the calculation into x is fully 406 dependent upon the iteration before it. We would prefer to 407 see this in the form: 408 409 x_1 = phi(x_0, x_2) 410 b = a + d 411 c = b + e 412 x_2 = c + x_1 413 414 If the loop is unrolled, the calculations of b and c from 415 different iterations can be interleaved. 416 417 To obtain this result during reassociation, we bias the rank 418 of the phi definition x_1 upward, when it is recognized as an 419 accumulator pattern. The artificial rank causes it to be 420 added last, providing the desired independence. */ 421 422 if (TREE_CODE (e) == SSA_NAME) 423 { 424 ssa_op_iter iter; 425 gimple *stmt; 426 long rank; 427 tree op; 428 429 if (SSA_NAME_IS_DEFAULT_DEF (e)) 430 return find_operand_rank (e); 431 432 stmt = SSA_NAME_DEF_STMT (e); 433 if (gimple_code (stmt) == GIMPLE_PHI) 434 return phi_rank (stmt); 435 436 if (!is_gimple_assign (stmt)) 437 return bb_rank[gimple_bb (stmt)->index]; 438 439 /* If we already have a rank for this expression, use that. */ 440 rank = find_operand_rank (e); 441 if (rank != -1) 442 return rank; 443 444 /* Otherwise, find the maximum rank for the operands. As an 445 exception, remove the bias from loop-carried phis when propagating 446 the rank so that dependent operations are not also biased. */ 447 /* Simply walk over all SSA uses - this takes advatage of the 448 fact that non-SSA operands are is_gimple_min_invariant and 449 thus have rank 0. */ 450 rank = 0; 451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 452 rank = propagate_rank (rank, op); 453 454 if (dump_file && (dump_flags & TDF_DETAILS)) 455 { 456 fprintf (dump_file, "Rank for "); 457 print_generic_expr (dump_file, e); 458 fprintf (dump_file, " is %ld\n", (rank + 1)); 459 } 460 461 /* Note the rank in the hashtable so we don't recompute it. */ 462 insert_operand_rank (e, (rank + 1)); 463 return (rank + 1); 464 } 465 466 /* Constants, globals, etc., are rank 0 */ 467 return 0; 468 } 469 470 471 /* We want integer ones to end up last no matter what, since they are 472 the ones we can do the most with. */ 473 #define INTEGER_CONST_TYPE 1 << 4 474 #define FLOAT_ONE_CONST_TYPE 1 << 3 475 #define FLOAT_CONST_TYPE 1 << 2 476 #define OTHER_CONST_TYPE 1 << 1 477 478 /* Classify an invariant tree into integer, float, or other, so that 479 we can sort them to be near other constants of the same type. */ 480 static inline int 481 constant_type (tree t) 482 { 483 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 484 return INTEGER_CONST_TYPE; 485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 486 { 487 /* Sort -1.0 and 1.0 constants last, while in some cases 488 const_binop can't optimize some inexact operations, multiplication 489 by -1.0 or 1.0 can be always merged with others. */ 490 if (real_onep (t) || real_minus_onep (t)) 491 return FLOAT_ONE_CONST_TYPE; 492 return FLOAT_CONST_TYPE; 493 } 494 else 495 return OTHER_CONST_TYPE; 496 } 497 498 /* qsort comparison function to sort operand entries PA and PB by rank 499 so that the sorted array is ordered by rank in decreasing order. */ 500 static int 501 sort_by_operand_rank (const void *pa, const void *pb) 502 { 503 const operand_entry *oea = *(const operand_entry *const *)pa; 504 const operand_entry *oeb = *(const operand_entry *const *)pb; 505 506 if (oeb->rank != oea->rank) 507 return oeb->rank > oea->rank ? 1 : -1; 508 509 /* It's nicer for optimize_expression if constants that are likely 510 to fold when added/multiplied/whatever are put next to each 511 other. Since all constants have rank 0, order them by type. */ 512 if (oea->rank == 0) 513 { 514 if (constant_type (oeb->op) != constant_type (oea->op)) 515 return constant_type (oea->op) - constant_type (oeb->op); 516 else 517 /* To make sorting result stable, we use unique IDs to determine 518 order. */ 519 return oeb->id > oea->id ? 1 : -1; 520 } 521 522 if (TREE_CODE (oea->op) != SSA_NAME) 523 { 524 if (TREE_CODE (oeb->op) != SSA_NAME) 525 return oeb->id > oea->id ? 1 : -1; 526 else 527 return 1; 528 } 529 else if (TREE_CODE (oeb->op) != SSA_NAME) 530 return -1; 531 532 /* Lastly, make sure the versions that are the same go next to each 533 other. */ 534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 535 { 536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 537 versions of removed SSA_NAMEs, so if possible, prefer to sort 538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 539 See PR60418. */ 540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 542 basic_block bba = gimple_bb (stmta); 543 basic_block bbb = gimple_bb (stmtb); 544 if (bbb != bba) 545 { 546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert 547 but the other might not. */ 548 if (!bba) 549 return 1; 550 if (!bbb) 551 return -1; 552 /* If neither is, compare bb_rank. */ 553 if (bb_rank[bbb->index] != bb_rank[bba->index]) 554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); 555 } 556 557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 559 if (da != db) 560 return da ? 1 : -1; 561 562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 563 } 564 565 return oeb->id > oea->id ? 1 : -1; 566 } 567 568 /* Add an operand entry to *OPS for the tree operand OP. */ 569 570 static void 571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 572 { 573 operand_entry *oe = operand_entry_pool.allocate (); 574 575 oe->op = op; 576 oe->rank = get_rank (op); 577 oe->id = next_operand_entry_id++; 578 oe->count = 1; 579 oe->stmt_to_insert = stmt_to_insert; 580 ops->safe_push (oe); 581 } 582 583 /* Add an operand entry to *OPS for the tree operand OP with repeat 584 count REPEAT. */ 585 586 static void 587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 588 HOST_WIDE_INT repeat) 589 { 590 operand_entry *oe = operand_entry_pool.allocate (); 591 592 oe->op = op; 593 oe->rank = get_rank (op); 594 oe->id = next_operand_entry_id++; 595 oe->count = repeat; 596 oe->stmt_to_insert = NULL; 597 ops->safe_push (oe); 598 599 reassociate_stats.pows_encountered++; 600 } 601 602 /* Return true if STMT is reassociable operation containing a binary 603 operation with tree code CODE, and is inside LOOP. */ 604 605 static bool 606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) 607 { 608 basic_block bb = gimple_bb (stmt); 609 610 if (gimple_bb (stmt) == NULL) 611 return false; 612 613 if (!flow_bb_inside_loop_p (loop, bb)) 614 return false; 615 616 if (is_gimple_assign (stmt) 617 && gimple_assign_rhs_code (stmt) == code 618 && has_single_use (gimple_assign_lhs (stmt))) 619 { 620 tree rhs1 = gimple_assign_rhs1 (stmt); 621 tree rhs2 = gimple_assign_rhs1 (stmt); 622 if (TREE_CODE (rhs1) == SSA_NAME 623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 624 return false; 625 if (rhs2 626 && TREE_CODE (rhs2) == SSA_NAME 627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 628 return false; 629 return true; 630 } 631 632 return false; 633 } 634 635 636 /* Return true if STMT is a nop-conversion. */ 637 638 static bool 639 gimple_nop_conversion_p (gimple *stmt) 640 { 641 if (gassign *ass = dyn_cast <gassign *> (stmt)) 642 { 643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 645 TREE_TYPE (gimple_assign_rhs1 (ass)))) 646 return true; 647 } 648 return false; 649 } 650 651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 652 operand of the negate operation. Otherwise, return NULL. */ 653 654 static tree 655 get_unary_op (tree name, enum tree_code opcode) 656 { 657 gimple *stmt = SSA_NAME_DEF_STMT (name); 658 659 /* Look through nop conversions (sign changes). */ 660 if (gimple_nop_conversion_p (stmt) 661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 663 664 if (!is_gimple_assign (stmt)) 665 return NULL_TREE; 666 667 if (gimple_assign_rhs_code (stmt) == opcode) 668 return gimple_assign_rhs1 (stmt); 669 return NULL_TREE; 670 } 671 672 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 673 674 static bool 675 ops_equal_values_p (tree op1, tree op2) 676 { 677 if (op1 == op2) 678 return true; 679 680 tree orig_op1 = op1; 681 if (TREE_CODE (op1) == SSA_NAME) 682 { 683 gimple *stmt = SSA_NAME_DEF_STMT (op1); 684 if (gimple_nop_conversion_p (stmt)) 685 { 686 op1 = gimple_assign_rhs1 (stmt); 687 if (op1 == op2) 688 return true; 689 } 690 } 691 692 if (TREE_CODE (op2) == SSA_NAME) 693 { 694 gimple *stmt = SSA_NAME_DEF_STMT (op2); 695 if (gimple_nop_conversion_p (stmt)) 696 { 697 op2 = gimple_assign_rhs1 (stmt); 698 if (op1 == op2 699 || orig_op1 == op2) 700 return true; 701 } 702 } 703 704 return false; 705 } 706 707 708 /* If CURR and LAST are a pair of ops that OPCODE allows us to 709 eliminate through equivalences, do so, remove them from OPS, and 710 return true. Otherwise, return false. */ 711 712 static bool 713 eliminate_duplicate_pair (enum tree_code opcode, 714 vec<operand_entry *> *ops, 715 bool *all_done, 716 unsigned int i, 717 operand_entry *curr, 718 operand_entry *last) 719 { 720 721 /* If we have two of the same op, and the opcode is & |, min, or max, 722 we can eliminate one of them. 723 If we have two of the same op, and the opcode is ^, we can 724 eliminate both of them. */ 725 726 if (last && last->op == curr->op) 727 { 728 switch (opcode) 729 { 730 case MAX_EXPR: 731 case MIN_EXPR: 732 case BIT_IOR_EXPR: 733 case BIT_AND_EXPR: 734 if (dump_file && (dump_flags & TDF_DETAILS)) 735 { 736 fprintf (dump_file, "Equivalence: "); 737 print_generic_expr (dump_file, curr->op); 738 fprintf (dump_file, " [&|minmax] "); 739 print_generic_expr (dump_file, last->op); 740 fprintf (dump_file, " -> "); 741 print_generic_stmt (dump_file, last->op); 742 } 743 744 ops->ordered_remove (i); 745 reassociate_stats.ops_eliminated ++; 746 747 return true; 748 749 case BIT_XOR_EXPR: 750 if (dump_file && (dump_flags & TDF_DETAILS)) 751 { 752 fprintf (dump_file, "Equivalence: "); 753 print_generic_expr (dump_file, curr->op); 754 fprintf (dump_file, " ^ "); 755 print_generic_expr (dump_file, last->op); 756 fprintf (dump_file, " -> nothing\n"); 757 } 758 759 reassociate_stats.ops_eliminated += 2; 760 761 if (ops->length () == 2) 762 { 763 ops->truncate (0); 764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 765 *all_done = true; 766 } 767 else 768 { 769 ops->ordered_remove (i-1); 770 ops->ordered_remove (i-1); 771 } 772 773 return true; 774 775 default: 776 break; 777 } 778 } 779 return false; 780 } 781 782 static vec<tree> plus_negates; 783 784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 785 expression, look in OPS for a corresponding positive operation to cancel 786 it out. If we find one, remove the other from OPS, replace 787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 788 return false. */ 789 790 static bool 791 eliminate_plus_minus_pair (enum tree_code opcode, 792 vec<operand_entry *> *ops, 793 unsigned int currindex, 794 operand_entry *curr) 795 { 796 tree negateop; 797 tree notop; 798 unsigned int i; 799 operand_entry *oe; 800 801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 802 return false; 803 804 negateop = get_unary_op (curr->op, NEGATE_EXPR); 805 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 806 if (negateop == NULL_TREE && notop == NULL_TREE) 807 return false; 808 809 /* Any non-negated version will have a rank that is one less than 810 the current rank. So once we hit those ranks, if we don't find 811 one, we can stop. */ 812 813 for (i = currindex + 1; 814 ops->iterate (i, &oe) 815 && oe->rank >= curr->rank - 1 ; 816 i++) 817 { 818 if (negateop 819 && ops_equal_values_p (oe->op, negateop)) 820 { 821 if (dump_file && (dump_flags & TDF_DETAILS)) 822 { 823 fprintf (dump_file, "Equivalence: "); 824 print_generic_expr (dump_file, negateop); 825 fprintf (dump_file, " + -"); 826 print_generic_expr (dump_file, oe->op); 827 fprintf (dump_file, " -> 0\n"); 828 } 829 830 ops->ordered_remove (i); 831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 832 ops->ordered_remove (currindex); 833 reassociate_stats.ops_eliminated ++; 834 835 return true; 836 } 837 else if (notop 838 && ops_equal_values_p (oe->op, notop)) 839 { 840 tree op_type = TREE_TYPE (oe->op); 841 842 if (dump_file && (dump_flags & TDF_DETAILS)) 843 { 844 fprintf (dump_file, "Equivalence: "); 845 print_generic_expr (dump_file, notop); 846 fprintf (dump_file, " + ~"); 847 print_generic_expr (dump_file, oe->op); 848 fprintf (dump_file, " -> -1\n"); 849 } 850 851 ops->ordered_remove (i); 852 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 853 ops->ordered_remove (currindex); 854 reassociate_stats.ops_eliminated ++; 855 856 return true; 857 } 858 } 859 860 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 861 save it for later inspection in repropagate_negates(). */ 862 if (negateop != NULL_TREE 863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 864 plus_negates.safe_push (curr->op); 865 866 return false; 867 } 868 869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 870 bitwise not expression, look in OPS for a corresponding operand to 871 cancel it out. If we find one, remove the other from OPS, replace 872 OPS[CURRINDEX] with 0, and return true. Otherwise, return 873 false. */ 874 875 static bool 876 eliminate_not_pairs (enum tree_code opcode, 877 vec<operand_entry *> *ops, 878 unsigned int currindex, 879 operand_entry *curr) 880 { 881 tree notop; 882 unsigned int i; 883 operand_entry *oe; 884 885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 886 || TREE_CODE (curr->op) != SSA_NAME) 887 return false; 888 889 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 890 if (notop == NULL_TREE) 891 return false; 892 893 /* Any non-not version will have a rank that is one less than 894 the current rank. So once we hit those ranks, if we don't find 895 one, we can stop. */ 896 897 for (i = currindex + 1; 898 ops->iterate (i, &oe) 899 && oe->rank >= curr->rank - 1; 900 i++) 901 { 902 if (oe->op == notop) 903 { 904 if (dump_file && (dump_flags & TDF_DETAILS)) 905 { 906 fprintf (dump_file, "Equivalence: "); 907 print_generic_expr (dump_file, notop); 908 if (opcode == BIT_AND_EXPR) 909 fprintf (dump_file, " & ~"); 910 else if (opcode == BIT_IOR_EXPR) 911 fprintf (dump_file, " | ~"); 912 print_generic_expr (dump_file, oe->op); 913 if (opcode == BIT_AND_EXPR) 914 fprintf (dump_file, " -> 0\n"); 915 else if (opcode == BIT_IOR_EXPR) 916 fprintf (dump_file, " -> -1\n"); 917 } 918 919 if (opcode == BIT_AND_EXPR) 920 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 921 else if (opcode == BIT_IOR_EXPR) 922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 923 924 reassociate_stats.ops_eliminated += ops->length () - 1; 925 ops->truncate (0); 926 ops->quick_push (oe); 927 return true; 928 } 929 } 930 931 return false; 932 } 933 934 /* Use constant value that may be present in OPS to try to eliminate 935 operands. Note that this function is only really used when we've 936 eliminated ops for other reasons, or merged constants. Across 937 single statements, fold already does all of this, plus more. There 938 is little point in duplicating logic, so I've only included the 939 identities that I could ever construct testcases to trigger. */ 940 941 static void 942 eliminate_using_constants (enum tree_code opcode, 943 vec<operand_entry *> *ops) 944 { 945 operand_entry *oelast = ops->last (); 946 tree type = TREE_TYPE (oelast->op); 947 948 if (oelast->rank == 0 949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 950 { 951 switch (opcode) 952 { 953 case BIT_AND_EXPR: 954 if (integer_zerop (oelast->op)) 955 { 956 if (ops->length () != 1) 957 { 958 if (dump_file && (dump_flags & TDF_DETAILS)) 959 fprintf (dump_file, "Found & 0, removing all other ops\n"); 960 961 reassociate_stats.ops_eliminated += ops->length () - 1; 962 963 ops->truncate (0); 964 ops->quick_push (oelast); 965 return; 966 } 967 } 968 else if (integer_all_onesp (oelast->op)) 969 { 970 if (ops->length () != 1) 971 { 972 if (dump_file && (dump_flags & TDF_DETAILS)) 973 fprintf (dump_file, "Found & -1, removing\n"); 974 ops->pop (); 975 reassociate_stats.ops_eliminated++; 976 } 977 } 978 break; 979 case BIT_IOR_EXPR: 980 if (integer_all_onesp (oelast->op)) 981 { 982 if (ops->length () != 1) 983 { 984 if (dump_file && (dump_flags & TDF_DETAILS)) 985 fprintf (dump_file, "Found | -1, removing all other ops\n"); 986 987 reassociate_stats.ops_eliminated += ops->length () - 1; 988 989 ops->truncate (0); 990 ops->quick_push (oelast); 991 return; 992 } 993 } 994 else if (integer_zerop (oelast->op)) 995 { 996 if (ops->length () != 1) 997 { 998 if (dump_file && (dump_flags & TDF_DETAILS)) 999 fprintf (dump_file, "Found | 0, removing\n"); 1000 ops->pop (); 1001 reassociate_stats.ops_eliminated++; 1002 } 1003 } 1004 break; 1005 case MULT_EXPR: 1006 if (integer_zerop (oelast->op) 1007 || (FLOAT_TYPE_P (type) 1008 && !HONOR_NANS (type) 1009 && !HONOR_SIGNED_ZEROS (type) 1010 && real_zerop (oelast->op))) 1011 { 1012 if (ops->length () != 1) 1013 { 1014 if (dump_file && (dump_flags & TDF_DETAILS)) 1015 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1016 1017 reassociate_stats.ops_eliminated += ops->length () - 1; 1018 ops->truncate (1); 1019 ops->quick_push (oelast); 1020 return; 1021 } 1022 } 1023 else if (integer_onep (oelast->op) 1024 || (FLOAT_TYPE_P (type) 1025 && !HONOR_SNANS (type) 1026 && real_onep (oelast->op))) 1027 { 1028 if (ops->length () != 1) 1029 { 1030 if (dump_file && (dump_flags & TDF_DETAILS)) 1031 fprintf (dump_file, "Found * 1, removing\n"); 1032 ops->pop (); 1033 reassociate_stats.ops_eliminated++; 1034 return; 1035 } 1036 } 1037 break; 1038 case BIT_XOR_EXPR: 1039 case PLUS_EXPR: 1040 case MINUS_EXPR: 1041 if (integer_zerop (oelast->op) 1042 || (FLOAT_TYPE_P (type) 1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1044 && fold_real_zero_addition_p (type, oelast->op, 1045 opcode == MINUS_EXPR))) 1046 { 1047 if (ops->length () != 1) 1048 { 1049 if (dump_file && (dump_flags & TDF_DETAILS)) 1050 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1051 ops->pop (); 1052 reassociate_stats.ops_eliminated++; 1053 return; 1054 } 1055 } 1056 break; 1057 default: 1058 break; 1059 } 1060 } 1061 } 1062 1063 1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1065 bool, bool); 1066 1067 /* Structure for tracking and counting operands. */ 1068 struct oecount { 1069 unsigned int cnt; 1070 unsigned int id; 1071 enum tree_code oecode; 1072 tree op; 1073 }; 1074 1075 1076 /* The heap for the oecount hashtable and the sorted list of operands. */ 1077 static vec<oecount> cvec; 1078 1079 1080 /* Oecount hashtable helpers. */ 1081 1082 struct oecount_hasher : int_hash <int, 0, 1> 1083 { 1084 static inline hashval_t hash (int); 1085 static inline bool equal (int, int); 1086 }; 1087 1088 /* Hash function for oecount. */ 1089 1090 inline hashval_t 1091 oecount_hasher::hash (int p) 1092 { 1093 const oecount *c = &cvec[p - 42]; 1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1095 } 1096 1097 /* Comparison function for oecount. */ 1098 1099 inline bool 1100 oecount_hasher::equal (int p1, int p2) 1101 { 1102 const oecount *c1 = &cvec[p1 - 42]; 1103 const oecount *c2 = &cvec[p2 - 42]; 1104 return c1->oecode == c2->oecode && c1->op == c2->op; 1105 } 1106 1107 /* Comparison function for qsort sorting oecount elements by count. */ 1108 1109 static int 1110 oecount_cmp (const void *p1, const void *p2) 1111 { 1112 const oecount *c1 = (const oecount *)p1; 1113 const oecount *c2 = (const oecount *)p2; 1114 if (c1->cnt != c2->cnt) 1115 return c1->cnt > c2->cnt ? 1 : -1; 1116 else 1117 /* If counts are identical, use unique IDs to stabilize qsort. */ 1118 return c1->id > c2->id ? 1 : -1; 1119 } 1120 1121 /* Return TRUE iff STMT represents a builtin call that raises OP 1122 to some exponent. */ 1123 1124 static bool 1125 stmt_is_power_of_op (gimple *stmt, tree op) 1126 { 1127 if (!is_gimple_call (stmt)) 1128 return false; 1129 1130 switch (gimple_call_combined_fn (stmt)) 1131 { 1132 CASE_CFN_POW: 1133 CASE_CFN_POWI: 1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1135 1136 default: 1137 return false; 1138 } 1139 } 1140 1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1142 in place and return the result. Assumes that stmt_is_power_of_op 1143 was previously called for STMT and returned TRUE. */ 1144 1145 static HOST_WIDE_INT 1146 decrement_power (gimple *stmt) 1147 { 1148 REAL_VALUE_TYPE c, cint; 1149 HOST_WIDE_INT power; 1150 tree arg1; 1151 1152 switch (gimple_call_combined_fn (stmt)) 1153 { 1154 CASE_CFN_POW: 1155 arg1 = gimple_call_arg (stmt, 1); 1156 c = TREE_REAL_CST (arg1); 1157 power = real_to_integer (&c) - 1; 1158 real_from_integer (&cint, VOIDmode, power, SIGNED); 1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1160 return power; 1161 1162 CASE_CFN_POWI: 1163 arg1 = gimple_call_arg (stmt, 1); 1164 power = TREE_INT_CST_LOW (arg1) - 1; 1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1166 return power; 1167 1168 default: 1169 gcc_unreachable (); 1170 } 1171 } 1172 1173 /* Replace SSA defined by STMT and replace all its uses with new 1174 SSA. Also return the new SSA. */ 1175 1176 static tree 1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1178 { 1179 gimple *use_stmt; 1180 use_operand_p use; 1181 imm_use_iterator iter; 1182 tree new_lhs, new_debug_lhs = NULL_TREE; 1183 tree lhs = gimple_get_lhs (stmt); 1184 1185 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1186 gimple_set_lhs (stmt, new_lhs); 1187 1188 /* Also need to update GIMPLE_DEBUGs. */ 1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1190 { 1191 tree repl = new_lhs; 1192 if (is_gimple_debug (use_stmt)) 1193 { 1194 if (new_debug_lhs == NULL_TREE) 1195 { 1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL); 1197 gdebug *def_temp 1198 = gimple_build_debug_bind (new_debug_lhs, 1199 build2 (opcode, TREE_TYPE (lhs), 1200 new_lhs, op), 1201 stmt); 1202 DECL_ARTIFICIAL (new_debug_lhs) = 1; 1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); 1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); 1205 gimple_set_uid (def_temp, gimple_uid (stmt)); 1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1208 } 1209 repl = new_debug_lhs; 1210 } 1211 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1212 SET_USE (use, repl); 1213 update_stmt (use_stmt); 1214 } 1215 return new_lhs; 1216 } 1217 1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1219 uses with new SSAs. Also do this for the stmt that defines DEF 1220 if *DEF is not OP. */ 1221 1222 static void 1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1224 vec<gimple *> &stmts_to_fix) 1225 { 1226 unsigned i; 1227 gimple *stmt; 1228 1229 if (*def != op 1230 && TREE_CODE (*def) == SSA_NAME 1231 && (stmt = SSA_NAME_DEF_STMT (*def)) 1232 && gimple_code (stmt) != GIMPLE_NOP) 1233 *def = make_new_ssa_for_def (stmt, opcode, op); 1234 1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1236 make_new_ssa_for_def (stmt, opcode, op); 1237 } 1238 1239 /* Find the single immediate use of STMT's LHS, and replace it 1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1241 replace *DEF with OP as well. */ 1242 1243 static void 1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1245 { 1246 tree lhs; 1247 gimple *use_stmt; 1248 use_operand_p use; 1249 gimple_stmt_iterator gsi; 1250 1251 if (is_gimple_call (stmt)) 1252 lhs = gimple_call_lhs (stmt); 1253 else 1254 lhs = gimple_assign_lhs (stmt); 1255 1256 gcc_assert (has_single_use (lhs)); 1257 single_imm_use (lhs, &use, &use_stmt); 1258 if (lhs == *def) 1259 *def = op; 1260 SET_USE (use, op); 1261 if (TREE_CODE (op) != SSA_NAME) 1262 update_stmt (use_stmt); 1263 gsi = gsi_for_stmt (stmt); 1264 unlink_stmt_vdef (stmt); 1265 reassoc_remove_stmt (&gsi); 1266 release_defs (stmt); 1267 } 1268 1269 /* Walks the linear chain with result *DEF searching for an operation 1270 with operand OP and code OPCODE removing that from the chain. *DEF 1271 is updated if there is only one operand but no operation left. */ 1272 1273 static void 1274 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1275 { 1276 tree orig_def = *def; 1277 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1278 /* PR72835 - Record the stmt chain that has to be updated such that 1279 we dont use the same LHS when the values computed are different. */ 1280 auto_vec<gimple *, 64> stmts_to_fix; 1281 1282 do 1283 { 1284 tree name; 1285 1286 if (opcode == MULT_EXPR) 1287 { 1288 if (stmt_is_power_of_op (stmt, op)) 1289 { 1290 if (decrement_power (stmt) == 1) 1291 { 1292 if (stmts_to_fix.length () > 0) 1293 stmts_to_fix.pop (); 1294 propagate_op_to_single_use (op, stmt, def); 1295 } 1296 break; 1297 } 1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1299 { 1300 if (gimple_assign_rhs1 (stmt) == op) 1301 { 1302 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1303 if (stmts_to_fix.length () > 0) 1304 stmts_to_fix.pop (); 1305 propagate_op_to_single_use (cst, stmt, def); 1306 break; 1307 } 1308 else if (integer_minus_onep (op) 1309 || real_minus_onep (op)) 1310 { 1311 gimple_assign_set_rhs_code 1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1313 break; 1314 } 1315 } 1316 } 1317 1318 name = gimple_assign_rhs1 (stmt); 1319 1320 /* If this is the operation we look for and one of the operands 1321 is ours simply propagate the other operand into the stmts 1322 single use. */ 1323 if (gimple_assign_rhs_code (stmt) == opcode 1324 && (name == op 1325 || gimple_assign_rhs2 (stmt) == op)) 1326 { 1327 if (name == op) 1328 name = gimple_assign_rhs2 (stmt); 1329 if (stmts_to_fix.length () > 0) 1330 stmts_to_fix.pop (); 1331 propagate_op_to_single_use (name, stmt, def); 1332 break; 1333 } 1334 1335 /* We might have a multiply of two __builtin_pow* calls, and 1336 the operand might be hiding in the rightmost one. Likewise 1337 this can happen for a negate. */ 1338 if (opcode == MULT_EXPR 1339 && gimple_assign_rhs_code (stmt) == opcode 1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1341 && has_single_use (gimple_assign_rhs2 (stmt))) 1342 { 1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1344 if (stmt_is_power_of_op (stmt2, op)) 1345 { 1346 if (decrement_power (stmt2) == 1) 1347 propagate_op_to_single_use (op, stmt2, def); 1348 else 1349 stmts_to_fix.safe_push (stmt2); 1350 break; 1351 } 1352 else if (is_gimple_assign (stmt2) 1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1354 { 1355 if (gimple_assign_rhs1 (stmt2) == op) 1356 { 1357 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1358 propagate_op_to_single_use (cst, stmt2, def); 1359 break; 1360 } 1361 else if (integer_minus_onep (op) 1362 || real_minus_onep (op)) 1363 { 1364 stmts_to_fix.safe_push (stmt2); 1365 gimple_assign_set_rhs_code 1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1367 break; 1368 } 1369 } 1370 } 1371 1372 /* Continue walking the chain. */ 1373 gcc_assert (name != op 1374 && TREE_CODE (name) == SSA_NAME); 1375 stmt = SSA_NAME_DEF_STMT (name); 1376 stmts_to_fix.safe_push (stmt); 1377 } 1378 while (1); 1379 1380 if (stmts_to_fix.length () > 0 || *def == orig_def) 1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1382 } 1383 1384 /* Returns true if statement S1 dominates statement S2. Like 1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1386 1387 static bool 1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1389 { 1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1391 1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1393 SSA_NAME. Assume it lives at the beginning of function and 1394 thus dominates everything. */ 1395 if (!bb1 || s1 == s2) 1396 return true; 1397 1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1399 if (!bb2) 1400 return false; 1401 1402 if (bb1 == bb2) 1403 { 1404 /* PHIs in the same basic block are assumed to be 1405 executed all in parallel, if only one stmt is a PHI, 1406 it dominates the other stmt in the same basic block. */ 1407 if (gimple_code (s1) == GIMPLE_PHI) 1408 return true; 1409 1410 if (gimple_code (s2) == GIMPLE_PHI) 1411 return false; 1412 1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1414 1415 if (gimple_uid (s1) < gimple_uid (s2)) 1416 return true; 1417 1418 if (gimple_uid (s1) > gimple_uid (s2)) 1419 return false; 1420 1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1422 unsigned int uid = gimple_uid (s1); 1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1424 { 1425 gimple *s = gsi_stmt (gsi); 1426 if (gimple_uid (s) != uid) 1427 break; 1428 if (s == s2) 1429 return true; 1430 } 1431 1432 return false; 1433 } 1434 1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1436 } 1437 1438 /* Insert STMT after INSERT_POINT. */ 1439 1440 static void 1441 insert_stmt_after (gimple *stmt, gimple *insert_point) 1442 { 1443 gimple_stmt_iterator gsi; 1444 basic_block bb; 1445 1446 if (gimple_code (insert_point) == GIMPLE_PHI) 1447 bb = gimple_bb (insert_point); 1448 else if (!stmt_ends_bb_p (insert_point)) 1449 { 1450 gsi = gsi_for_stmt (insert_point); 1451 gimple_set_uid (stmt, gimple_uid (insert_point)); 1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1453 return; 1454 } 1455 else 1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1457 thus if it must end a basic block, it should be a call that can 1458 throw, or some assignment that can throw. If it throws, the LHS 1459 of it will not be initialized though, so only valid places using 1460 the SSA_NAME should be dominated by the fallthru edge. */ 1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1462 gsi = gsi_after_labels (bb); 1463 if (gsi_end_p (gsi)) 1464 { 1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1466 gimple_set_uid (stmt, 1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1468 } 1469 else 1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1472 } 1473 1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1475 the result. Places the statement after the definition of either 1476 OP1 or OP2. Returns the new statement. */ 1477 1478 static gimple * 1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1480 { 1481 gimple *op1def = NULL, *op2def = NULL; 1482 gimple_stmt_iterator gsi; 1483 tree op; 1484 gassign *sum; 1485 1486 /* Create the addition statement. */ 1487 op = make_ssa_name (type); 1488 sum = gimple_build_assign (op, opcode, op1, op2); 1489 1490 /* Find an insertion place and insert. */ 1491 if (TREE_CODE (op1) == SSA_NAME) 1492 op1def = SSA_NAME_DEF_STMT (op1); 1493 if (TREE_CODE (op2) == SSA_NAME) 1494 op2def = SSA_NAME_DEF_STMT (op2); 1495 if ((!op1def || gimple_nop_p (op1def)) 1496 && (!op2def || gimple_nop_p (op2def))) 1497 { 1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1499 if (gsi_end_p (gsi)) 1500 { 1501 gimple_stmt_iterator gsi2 1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1503 gimple_set_uid (sum, 1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1505 } 1506 else 1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1509 } 1510 else 1511 { 1512 gimple *insert_point; 1513 if ((!op1def || gimple_nop_p (op1def)) 1514 || (op2def && !gimple_nop_p (op2def) 1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1516 insert_point = op2def; 1517 else 1518 insert_point = op1def; 1519 insert_stmt_after (sum, insert_point); 1520 } 1521 update_stmt (sum); 1522 1523 return sum; 1524 } 1525 1526 /* Perform un-distribution of divisions and multiplications. 1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1528 to (A + B) / X for real X. 1529 1530 The algorithm is organized as follows. 1531 1532 - First we walk the addition chain *OPS looking for summands that 1533 are defined by a multiplication or a real division. This results 1534 in the candidates bitmap with relevant indices into *OPS. 1535 1536 - Second we build the chains of multiplications or divisions for 1537 these candidates, counting the number of occurrences of (operand, code) 1538 pairs in all of the candidates chains. 1539 1540 - Third we sort the (operand, code) pairs by number of occurrence and 1541 process them starting with the pair with the most uses. 1542 1543 * For each such pair we walk the candidates again to build a 1544 second candidate bitmap noting all multiplication/division chains 1545 that have at least one occurrence of (operand, code). 1546 1547 * We build an alternate addition chain only covering these 1548 candidates with one (operand, code) operation removed from their 1549 multiplication/division chain. 1550 1551 * The first candidate gets replaced by the alternate addition chain 1552 multiplied/divided by the operand. 1553 1554 * All candidate chains get disabled for further processing and 1555 processing of (operand, code) pairs continues. 1556 1557 The alternate addition chains built are re-processed by the main 1558 reassociation algorithm which allows optimizing a * x * y + b * y * x 1559 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1560 1561 static bool 1562 undistribute_ops_list (enum tree_code opcode, 1563 vec<operand_entry *> *ops, struct loop *loop) 1564 { 1565 unsigned int length = ops->length (); 1566 operand_entry *oe1; 1567 unsigned i, j; 1568 unsigned nr_candidates, nr_candidates2; 1569 sbitmap_iterator sbi0; 1570 vec<operand_entry *> *subops; 1571 bool changed = false; 1572 unsigned int next_oecount_id = 0; 1573 1574 if (length <= 1 1575 || opcode != PLUS_EXPR) 1576 return false; 1577 1578 /* Build a list of candidates to process. */ 1579 auto_sbitmap candidates (length); 1580 bitmap_clear (candidates); 1581 nr_candidates = 0; 1582 FOR_EACH_VEC_ELT (*ops, i, oe1) 1583 { 1584 enum tree_code dcode; 1585 gimple *oe1def; 1586 1587 if (TREE_CODE (oe1->op) != SSA_NAME) 1588 continue; 1589 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1590 if (!is_gimple_assign (oe1def)) 1591 continue; 1592 dcode = gimple_assign_rhs_code (oe1def); 1593 if ((dcode != MULT_EXPR 1594 && dcode != RDIV_EXPR) 1595 || !is_reassociable_op (oe1def, dcode, loop)) 1596 continue; 1597 1598 bitmap_set_bit (candidates, i); 1599 nr_candidates++; 1600 } 1601 1602 if (nr_candidates < 2) 1603 return false; 1604 1605 if (dump_file && (dump_flags & TDF_DETAILS)) 1606 { 1607 fprintf (dump_file, "searching for un-distribute opportunities "); 1608 print_generic_expr (dump_file, 1609 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1610 fprintf (dump_file, " %d\n", nr_candidates); 1611 } 1612 1613 /* Build linearized sub-operand lists and the counting table. */ 1614 cvec.create (0); 1615 1616 hash_table<oecount_hasher> ctable (15); 1617 1618 /* ??? Macro arguments cannot have multi-argument template types in 1619 them. This typedef is needed to workaround that limitation. */ 1620 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1623 { 1624 gimple *oedef; 1625 enum tree_code oecode; 1626 unsigned j; 1627 1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1629 oecode = gimple_assign_rhs_code (oedef); 1630 linearize_expr_tree (&subops[i], oedef, 1631 associative_tree_code (oecode), false); 1632 1633 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1634 { 1635 oecount c; 1636 int *slot; 1637 int idx; 1638 c.oecode = oecode; 1639 c.cnt = 1; 1640 c.id = next_oecount_id++; 1641 c.op = oe1->op; 1642 cvec.safe_push (c); 1643 idx = cvec.length () + 41; 1644 slot = ctable.find_slot (idx, INSERT); 1645 if (!*slot) 1646 { 1647 *slot = idx; 1648 } 1649 else 1650 { 1651 cvec.pop (); 1652 cvec[*slot - 42].cnt++; 1653 } 1654 } 1655 } 1656 1657 /* Sort the counting table. */ 1658 cvec.qsort (oecount_cmp); 1659 1660 if (dump_file && (dump_flags & TDF_DETAILS)) 1661 { 1662 oecount *c; 1663 fprintf (dump_file, "Candidates:\n"); 1664 FOR_EACH_VEC_ELT (cvec, j, c) 1665 { 1666 fprintf (dump_file, " %u %s: ", c->cnt, 1667 c->oecode == MULT_EXPR 1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1669 print_generic_expr (dump_file, c->op); 1670 fprintf (dump_file, "\n"); 1671 } 1672 } 1673 1674 /* Process the (operand, code) pairs in order of most occurrence. */ 1675 auto_sbitmap candidates2 (length); 1676 while (!cvec.is_empty ()) 1677 { 1678 oecount *c = &cvec.last (); 1679 if (c->cnt < 2) 1680 break; 1681 1682 /* Now collect the operands in the outer chain that contain 1683 the common operand in their inner chain. */ 1684 bitmap_clear (candidates2); 1685 nr_candidates2 = 0; 1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1687 { 1688 gimple *oedef; 1689 enum tree_code oecode; 1690 unsigned j; 1691 tree op = (*ops)[i]->op; 1692 1693 /* If we undistributed in this chain already this may be 1694 a constant. */ 1695 if (TREE_CODE (op) != SSA_NAME) 1696 continue; 1697 1698 oedef = SSA_NAME_DEF_STMT (op); 1699 oecode = gimple_assign_rhs_code (oedef); 1700 if (oecode != c->oecode) 1701 continue; 1702 1703 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1704 { 1705 if (oe1->op == c->op) 1706 { 1707 bitmap_set_bit (candidates2, i); 1708 ++nr_candidates2; 1709 break; 1710 } 1711 } 1712 } 1713 1714 if (nr_candidates2 >= 2) 1715 { 1716 operand_entry *oe1, *oe2; 1717 gimple *prod; 1718 int first = bitmap_first_set_bit (candidates2); 1719 1720 /* Build the new addition chain. */ 1721 oe1 = (*ops)[first]; 1722 if (dump_file && (dump_flags & TDF_DETAILS)) 1723 { 1724 fprintf (dump_file, "Building ("); 1725 print_generic_expr (dump_file, oe1->op); 1726 } 1727 zero_one_operation (&oe1->op, c->oecode, c->op); 1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1729 { 1730 gimple *sum; 1731 oe2 = (*ops)[i]; 1732 if (dump_file && (dump_flags & TDF_DETAILS)) 1733 { 1734 fprintf (dump_file, " + "); 1735 print_generic_expr (dump_file, oe2->op); 1736 } 1737 zero_one_operation (&oe2->op, c->oecode, c->op); 1738 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1739 oe1->op, oe2->op, opcode); 1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1741 oe2->rank = 0; 1742 oe1->op = gimple_get_lhs (sum); 1743 } 1744 1745 /* Apply the multiplication/division. */ 1746 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1747 oe1->op, c->op, c->oecode); 1748 if (dump_file && (dump_flags & TDF_DETAILS)) 1749 { 1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1751 print_generic_expr (dump_file, c->op); 1752 fprintf (dump_file, "\n"); 1753 } 1754 1755 /* Record it in the addition chain and disable further 1756 undistribution with this op. */ 1757 oe1->op = gimple_assign_lhs (prod); 1758 oe1->rank = get_rank (oe1->op); 1759 subops[first].release (); 1760 1761 changed = true; 1762 } 1763 1764 cvec.pop (); 1765 } 1766 1767 for (i = 0; i < ops->length (); ++i) 1768 subops[i].release (); 1769 free (subops); 1770 cvec.release (); 1771 1772 return changed; 1773 } 1774 1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1776 expression, examine the other OPS to see if any of them are comparisons 1777 of the same values, which we may be able to combine or eliminate. 1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1779 1780 static bool 1781 eliminate_redundant_comparison (enum tree_code opcode, 1782 vec<operand_entry *> *ops, 1783 unsigned int currindex, 1784 operand_entry *curr) 1785 { 1786 tree op1, op2; 1787 enum tree_code lcode, rcode; 1788 gimple *def1, *def2; 1789 int i; 1790 operand_entry *oe; 1791 1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1793 return false; 1794 1795 /* Check that CURR is a comparison. */ 1796 if (TREE_CODE (curr->op) != SSA_NAME) 1797 return false; 1798 def1 = SSA_NAME_DEF_STMT (curr->op); 1799 if (!is_gimple_assign (def1)) 1800 return false; 1801 lcode = gimple_assign_rhs_code (def1); 1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1803 return false; 1804 op1 = gimple_assign_rhs1 (def1); 1805 op2 = gimple_assign_rhs2 (def1); 1806 1807 /* Now look for a similar comparison in the remaining OPS. */ 1808 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1809 { 1810 tree t; 1811 1812 if (TREE_CODE (oe->op) != SSA_NAME) 1813 continue; 1814 def2 = SSA_NAME_DEF_STMT (oe->op); 1815 if (!is_gimple_assign (def2)) 1816 continue; 1817 rcode = gimple_assign_rhs_code (def2); 1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1819 continue; 1820 1821 /* If we got here, we have a match. See if we can combine the 1822 two comparisons. */ 1823 if (opcode == BIT_IOR_EXPR) 1824 t = maybe_fold_or_comparisons (lcode, op1, op2, 1825 rcode, gimple_assign_rhs1 (def2), 1826 gimple_assign_rhs2 (def2)); 1827 else 1828 t = maybe_fold_and_comparisons (lcode, op1, op2, 1829 rcode, gimple_assign_rhs1 (def2), 1830 gimple_assign_rhs2 (def2)); 1831 if (!t) 1832 continue; 1833 1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1835 always give us a boolean_type_node value back. If the original 1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1837 we need to convert. */ 1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1839 t = fold_convert (TREE_TYPE (curr->op), t); 1840 1841 if (TREE_CODE (t) != INTEGER_CST 1842 && !operand_equal_p (t, curr->op, 0)) 1843 { 1844 enum tree_code subcode; 1845 tree newop1, newop2; 1846 if (!COMPARISON_CLASS_P (t)) 1847 continue; 1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1849 STRIP_USELESS_TYPE_CONVERSION (newop1); 1850 STRIP_USELESS_TYPE_CONVERSION (newop2); 1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1852 continue; 1853 } 1854 1855 if (dump_file && (dump_flags & TDF_DETAILS)) 1856 { 1857 fprintf (dump_file, "Equivalence: "); 1858 print_generic_expr (dump_file, curr->op); 1859 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1860 print_generic_expr (dump_file, oe->op); 1861 fprintf (dump_file, " -> "); 1862 print_generic_expr (dump_file, t); 1863 fprintf (dump_file, "\n"); 1864 } 1865 1866 /* Now we can delete oe, as it has been subsumed by the new combined 1867 expression t. */ 1868 ops->ordered_remove (i); 1869 reassociate_stats.ops_eliminated ++; 1870 1871 /* If t is the same as curr->op, we're done. Otherwise we must 1872 replace curr->op with t. Special case is if we got a constant 1873 back, in which case we add it to the end instead of in place of 1874 the current entry. */ 1875 if (TREE_CODE (t) == INTEGER_CST) 1876 { 1877 ops->ordered_remove (currindex); 1878 add_to_ops_vec (ops, t); 1879 } 1880 else if (!operand_equal_p (t, curr->op, 0)) 1881 { 1882 gimple *sum; 1883 enum tree_code subcode; 1884 tree newop1; 1885 tree newop2; 1886 gcc_assert (COMPARISON_CLASS_P (t)); 1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1888 STRIP_USELESS_TYPE_CONVERSION (newop1); 1889 STRIP_USELESS_TYPE_CONVERSION (newop2); 1890 gcc_checking_assert (is_gimple_val (newop1) 1891 && is_gimple_val (newop2)); 1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1893 curr->op = gimple_get_lhs (sum); 1894 } 1895 return true; 1896 } 1897 1898 return false; 1899 } 1900 1901 1902 /* Transform repeated addition of same values into multiply with 1903 constant. */ 1904 static bool 1905 transform_add_to_multiply (vec<operand_entry *> *ops) 1906 { 1907 operand_entry *oe; 1908 tree op = NULL_TREE; 1909 int j; 1910 int i, start = -1, end = 0, count = 0; 1911 auto_vec<std::pair <int, int> > indxs; 1912 bool changed = false; 1913 1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1916 || !flag_unsafe_math_optimizations)) 1917 return false; 1918 1919 /* Look for repeated operands. */ 1920 FOR_EACH_VEC_ELT (*ops, i, oe) 1921 { 1922 if (start == -1) 1923 { 1924 count = 1; 1925 op = oe->op; 1926 start = i; 1927 } 1928 else if (operand_equal_p (oe->op, op, 0)) 1929 { 1930 count++; 1931 end = i; 1932 } 1933 else 1934 { 1935 if (count > 1) 1936 indxs.safe_push (std::make_pair (start, end)); 1937 count = 1; 1938 op = oe->op; 1939 start = i; 1940 } 1941 } 1942 1943 if (count > 1) 1944 indxs.safe_push (std::make_pair (start, end)); 1945 1946 for (j = indxs.length () - 1; j >= 0; --j) 1947 { 1948 /* Convert repeated operand addition to multiplication. */ 1949 start = indxs[j].first; 1950 end = indxs[j].second; 1951 op = (*ops)[start]->op; 1952 count = end - start + 1; 1953 for (i = end; i >= start; --i) 1954 ops->unordered_remove (i); 1955 tree tmp = make_ssa_name (TREE_TYPE (op)); 1956 tree cst = build_int_cst (integer_type_node, count); 1957 gassign *mul_stmt 1958 = gimple_build_assign (tmp, MULT_EXPR, 1959 op, fold_convert (TREE_TYPE (op), cst)); 1960 gimple_set_visited (mul_stmt, true); 1961 add_to_ops_vec (ops, tmp, mul_stmt); 1962 changed = true; 1963 } 1964 1965 return changed; 1966 } 1967 1968 1969 /* Perform various identities and other optimizations on the list of 1970 operand entries, stored in OPS. The tree code for the binary 1971 operation between all the operands is OPCODE. */ 1972 1973 static void 1974 optimize_ops_list (enum tree_code opcode, 1975 vec<operand_entry *> *ops) 1976 { 1977 unsigned int length = ops->length (); 1978 unsigned int i; 1979 operand_entry *oe; 1980 operand_entry *oelast = NULL; 1981 bool iterate = false; 1982 1983 if (length == 1) 1984 return; 1985 1986 oelast = ops->last (); 1987 1988 /* If the last two are constants, pop the constants off, merge them 1989 and try the next two. */ 1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1991 { 1992 operand_entry *oelm1 = (*ops)[length - 2]; 1993 1994 if (oelm1->rank == 0 1995 && is_gimple_min_invariant (oelm1->op) 1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1997 TREE_TYPE (oelast->op))) 1998 { 1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 2000 oelm1->op, oelast->op); 2001 2002 if (folded && is_gimple_min_invariant (folded)) 2003 { 2004 if (dump_file && (dump_flags & TDF_DETAILS)) 2005 fprintf (dump_file, "Merging constants\n"); 2006 2007 ops->pop (); 2008 ops->pop (); 2009 2010 add_to_ops_vec (ops, folded); 2011 reassociate_stats.constants_eliminated++; 2012 2013 optimize_ops_list (opcode, ops); 2014 return; 2015 } 2016 } 2017 } 2018 2019 eliminate_using_constants (opcode, ops); 2020 oelast = NULL; 2021 2022 for (i = 0; ops->iterate (i, &oe);) 2023 { 2024 bool done = false; 2025 2026 if (eliminate_not_pairs (opcode, ops, i, oe)) 2027 return; 2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2031 { 2032 if (done) 2033 return; 2034 iterate = true; 2035 oelast = NULL; 2036 continue; 2037 } 2038 oelast = oe; 2039 i++; 2040 } 2041 2042 length = ops->length (); 2043 oelast = ops->last (); 2044 2045 if (iterate) 2046 optimize_ops_list (opcode, ops); 2047 } 2048 2049 /* The following functions are subroutines to optimize_range_tests and allow 2050 it to try to change a logical combination of comparisons into a range 2051 test. 2052 2053 For example, both 2054 X == 2 || X == 5 || X == 3 || X == 4 2055 and 2056 X >= 2 && X <= 5 2057 are converted to 2058 (unsigned) (X - 2) <= 3 2059 2060 For more information see comments above fold_test_range in fold-const.c, 2061 this implementation is for GIMPLE. */ 2062 2063 struct range_entry 2064 { 2065 tree exp; 2066 tree low; 2067 tree high; 2068 bool in_p; 2069 bool strict_overflow_p; 2070 unsigned int idx, next; 2071 }; 2072 2073 /* This is similar to make_range in fold-const.c, but on top of 2074 GIMPLE instead of trees. If EXP is non-NULL, it should be 2075 an SSA_NAME and STMT argument is ignored, otherwise STMT 2076 argument should be a GIMPLE_COND. */ 2077 2078 static void 2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2080 { 2081 int in_p; 2082 tree low, high; 2083 bool is_bool, strict_overflow_p; 2084 2085 r->exp = NULL_TREE; 2086 r->in_p = false; 2087 r->strict_overflow_p = false; 2088 r->low = NULL_TREE; 2089 r->high = NULL_TREE; 2090 if (exp != NULL_TREE 2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2092 return; 2093 2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2095 and see if we can refine the range. Some of the cases below may not 2096 happen, but it doesn't seem worth worrying about this. We "continue" 2097 the outer loop when we've changed something; otherwise we "break" 2098 the switch, which will "break" the while. */ 2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2100 high = low; 2101 in_p = 0; 2102 strict_overflow_p = false; 2103 is_bool = false; 2104 if (exp == NULL_TREE) 2105 is_bool = true; 2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2107 { 2108 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2109 is_bool = true; 2110 else 2111 return; 2112 } 2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2114 is_bool = true; 2115 2116 while (1) 2117 { 2118 enum tree_code code; 2119 tree arg0, arg1, exp_type; 2120 tree nexp; 2121 location_t loc; 2122 2123 if (exp != NULL_TREE) 2124 { 2125 if (TREE_CODE (exp) != SSA_NAME 2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2127 break; 2128 2129 stmt = SSA_NAME_DEF_STMT (exp); 2130 if (!is_gimple_assign (stmt)) 2131 break; 2132 2133 code = gimple_assign_rhs_code (stmt); 2134 arg0 = gimple_assign_rhs1 (stmt); 2135 arg1 = gimple_assign_rhs2 (stmt); 2136 exp_type = TREE_TYPE (exp); 2137 } 2138 else 2139 { 2140 code = gimple_cond_code (stmt); 2141 arg0 = gimple_cond_lhs (stmt); 2142 arg1 = gimple_cond_rhs (stmt); 2143 exp_type = boolean_type_node; 2144 } 2145 2146 if (TREE_CODE (arg0) != SSA_NAME) 2147 break; 2148 loc = gimple_location (stmt); 2149 switch (code) 2150 { 2151 case BIT_NOT_EXPR: 2152 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2153 /* Ensure the range is either +[-,0], +[0,0], 2154 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2155 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2156 or similar expression of unconditional true or 2157 false, it should not be negated. */ 2158 && ((high && integer_zerop (high)) 2159 || (low && integer_onep (low)))) 2160 { 2161 in_p = !in_p; 2162 exp = arg0; 2163 continue; 2164 } 2165 break; 2166 case SSA_NAME: 2167 exp = arg0; 2168 continue; 2169 CASE_CONVERT: 2170 if (is_bool) 2171 goto do_default; 2172 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2173 { 2174 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2175 is_bool = true; 2176 else 2177 return; 2178 } 2179 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2180 is_bool = true; 2181 goto do_default; 2182 case EQ_EXPR: 2183 case NE_EXPR: 2184 case LT_EXPR: 2185 case LE_EXPR: 2186 case GE_EXPR: 2187 case GT_EXPR: 2188 is_bool = true; 2189 /* FALLTHRU */ 2190 default: 2191 if (!is_bool) 2192 return; 2193 do_default: 2194 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2195 &low, &high, &in_p, 2196 &strict_overflow_p); 2197 if (nexp != NULL_TREE) 2198 { 2199 exp = nexp; 2200 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2201 continue; 2202 } 2203 break; 2204 } 2205 break; 2206 } 2207 if (is_bool) 2208 { 2209 r->exp = exp; 2210 r->in_p = in_p; 2211 r->low = low; 2212 r->high = high; 2213 r->strict_overflow_p = strict_overflow_p; 2214 } 2215 } 2216 2217 /* Comparison function for qsort. Sort entries 2218 without SSA_NAME exp first, then with SSA_NAMEs sorted 2219 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2220 by increasing ->low and if ->low is the same, by increasing 2221 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2222 maximum. */ 2223 2224 static int 2225 range_entry_cmp (const void *a, const void *b) 2226 { 2227 const struct range_entry *p = (const struct range_entry *) a; 2228 const struct range_entry *q = (const struct range_entry *) b; 2229 2230 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2231 { 2232 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2233 { 2234 /* Group range_entries for the same SSA_NAME together. */ 2235 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2236 return -1; 2237 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2238 return 1; 2239 /* If ->low is different, NULL low goes first, then by 2240 ascending low. */ 2241 if (p->low != NULL_TREE) 2242 { 2243 if (q->low != NULL_TREE) 2244 { 2245 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2246 p->low, q->low); 2247 if (tem && integer_onep (tem)) 2248 return -1; 2249 tem = fold_binary (GT_EXPR, boolean_type_node, 2250 p->low, q->low); 2251 if (tem && integer_onep (tem)) 2252 return 1; 2253 } 2254 else 2255 return 1; 2256 } 2257 else if (q->low != NULL_TREE) 2258 return -1; 2259 /* If ->high is different, NULL high goes last, before that by 2260 ascending high. */ 2261 if (p->high != NULL_TREE) 2262 { 2263 if (q->high != NULL_TREE) 2264 { 2265 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2266 p->high, q->high); 2267 if (tem && integer_onep (tem)) 2268 return -1; 2269 tem = fold_binary (GT_EXPR, boolean_type_node, 2270 p->high, q->high); 2271 if (tem && integer_onep (tem)) 2272 return 1; 2273 } 2274 else 2275 return -1; 2276 } 2277 else if (q->high != NULL_TREE) 2278 return 1; 2279 /* If both ranges are the same, sort below by ascending idx. */ 2280 } 2281 else 2282 return 1; 2283 } 2284 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2285 return -1; 2286 2287 if (p->idx < q->idx) 2288 return -1; 2289 else 2290 { 2291 gcc_checking_assert (p->idx > q->idx); 2292 return 1; 2293 } 2294 } 2295 2296 /* Helper function for update_range_test. Force EXPR into an SSA_NAME, 2297 insert needed statements BEFORE or after GSI. */ 2298 2299 static tree 2300 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) 2301 { 2302 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; 2303 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); 2304 if (TREE_CODE (ret) != SSA_NAME) 2305 { 2306 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); 2307 if (before) 2308 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2309 else 2310 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); 2311 ret = gimple_assign_lhs (g); 2312 } 2313 return ret; 2314 } 2315 2316 /* Helper routine of optimize_range_test. 2317 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2318 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2319 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2320 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2321 an array of COUNT pointers to other ranges. Return 2322 true if the range merge has been successful. 2323 If OPCODE is ERROR_MARK, this is called from within 2324 maybe_optimize_range_tests and is performing inter-bb range optimization. 2325 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2326 oe->rank. */ 2327 2328 static bool 2329 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2330 struct range_entry **otherrangep, 2331 unsigned int count, enum tree_code opcode, 2332 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2333 bool in_p, tree low, tree high, bool strict_overflow_p) 2334 { 2335 operand_entry *oe = (*ops)[range->idx]; 2336 tree op = oe->op; 2337 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2338 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2339 location_t loc = gimple_location (stmt); 2340 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2341 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2342 in_p, low, high); 2343 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2344 gimple_stmt_iterator gsi; 2345 unsigned int i, uid; 2346 2347 if (tem == NULL_TREE) 2348 return false; 2349 2350 /* If op is default def SSA_NAME, there is no place to insert the 2351 new comparison. Give up, unless we can use OP itself as the 2352 range test. */ 2353 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2354 { 2355 if (op == range->exp 2356 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2357 || TREE_CODE (optype) == BOOLEAN_TYPE) 2358 && (op == tem 2359 || (TREE_CODE (tem) == EQ_EXPR 2360 && TREE_OPERAND (tem, 0) == op 2361 && integer_onep (TREE_OPERAND (tem, 1)))) 2362 && opcode != BIT_IOR_EXPR 2363 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2364 { 2365 stmt = NULL; 2366 tem = op; 2367 } 2368 else 2369 return false; 2370 } 2371 2372 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2373 warning_at (loc, OPT_Wstrict_overflow, 2374 "assuming signed overflow does not occur " 2375 "when simplifying range test"); 2376 2377 if (dump_file && (dump_flags & TDF_DETAILS)) 2378 { 2379 struct range_entry *r; 2380 fprintf (dump_file, "Optimizing range tests "); 2381 print_generic_expr (dump_file, range->exp); 2382 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2383 print_generic_expr (dump_file, range->low); 2384 fprintf (dump_file, ", "); 2385 print_generic_expr (dump_file, range->high); 2386 fprintf (dump_file, "]"); 2387 for (i = 0; i < count; i++) 2388 { 2389 if (otherrange) 2390 r = otherrange + i; 2391 else 2392 r = otherrangep[i]; 2393 if (r->exp 2394 && r->exp != range->exp 2395 && TREE_CODE (r->exp) == SSA_NAME) 2396 { 2397 fprintf (dump_file, " and "); 2398 print_generic_expr (dump_file, r->exp); 2399 } 2400 else 2401 fprintf (dump_file, " and"); 2402 fprintf (dump_file, " %c[", r->in_p ? '+' : '-'); 2403 print_generic_expr (dump_file, r->low); 2404 fprintf (dump_file, ", "); 2405 print_generic_expr (dump_file, r->high); 2406 fprintf (dump_file, "]"); 2407 } 2408 fprintf (dump_file, "\n into "); 2409 print_generic_expr (dump_file, tem); 2410 fprintf (dump_file, "\n"); 2411 } 2412 2413 if (opcode == BIT_IOR_EXPR 2414 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2415 tem = invert_truthvalue_loc (loc, tem); 2416 2417 tem = fold_convert_loc (loc, optype, tem); 2418 if (stmt) 2419 { 2420 gsi = gsi_for_stmt (stmt); 2421 uid = gimple_uid (stmt); 2422 } 2423 else 2424 { 2425 gsi = gsi_none (); 2426 uid = 0; 2427 } 2428 if (stmt == NULL) 2429 gcc_checking_assert (tem == op); 2430 /* In rare cases range->exp can be equal to lhs of stmt. 2431 In that case we have to insert after the stmt rather then before 2432 it. If stmt is a PHI, insert it at the start of the basic block. */ 2433 else if (op != range->exp) 2434 { 2435 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2436 tem = force_into_ssa_name (&gsi, tem, true); 2437 gsi_prev (&gsi); 2438 } 2439 else if (gimple_code (stmt) != GIMPLE_PHI) 2440 { 2441 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2442 tem = force_into_ssa_name (&gsi, tem, false); 2443 } 2444 else 2445 { 2446 gsi = gsi_after_labels (gimple_bb (stmt)); 2447 if (!gsi_end_p (gsi)) 2448 uid = gimple_uid (gsi_stmt (gsi)); 2449 else 2450 { 2451 gsi = gsi_start_bb (gimple_bb (stmt)); 2452 uid = 1; 2453 while (!gsi_end_p (gsi)) 2454 { 2455 uid = gimple_uid (gsi_stmt (gsi)); 2456 gsi_next (&gsi); 2457 } 2458 } 2459 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2460 tem = force_into_ssa_name (&gsi, tem, true); 2461 if (gsi_end_p (gsi)) 2462 gsi = gsi_last_bb (gimple_bb (stmt)); 2463 else 2464 gsi_prev (&gsi); 2465 } 2466 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2467 if (gimple_uid (gsi_stmt (gsi))) 2468 break; 2469 else 2470 gimple_set_uid (gsi_stmt (gsi), uid); 2471 2472 oe->op = tem; 2473 range->exp = exp; 2474 range->low = low; 2475 range->high = high; 2476 range->in_p = in_p; 2477 range->strict_overflow_p = false; 2478 2479 for (i = 0; i < count; i++) 2480 { 2481 if (otherrange) 2482 range = otherrange + i; 2483 else 2484 range = otherrangep[i]; 2485 oe = (*ops)[range->idx]; 2486 /* Now change all the other range test immediate uses, so that 2487 those tests will be optimized away. */ 2488 if (opcode == ERROR_MARK) 2489 { 2490 if (oe->op) 2491 oe->op = build_int_cst (TREE_TYPE (oe->op), 2492 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2493 else 2494 oe->op = (oe->rank == BIT_IOR_EXPR 2495 ? boolean_false_node : boolean_true_node); 2496 } 2497 else 2498 oe->op = error_mark_node; 2499 range->exp = NULL_TREE; 2500 range->low = NULL_TREE; 2501 range->high = NULL_TREE; 2502 } 2503 return true; 2504 } 2505 2506 /* Optimize X == CST1 || X == CST2 2507 if popcount (CST1 ^ CST2) == 1 into 2508 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2509 Similarly for ranges. E.g. 2510 X != 2 && X != 3 && X != 10 && X != 11 2511 will be transformed by the previous optimization into 2512 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2513 and this loop can transform that into 2514 !(((X & ~8) - 2U) <= 1U). */ 2515 2516 static bool 2517 optimize_range_tests_xor (enum tree_code opcode, tree type, 2518 tree lowi, tree lowj, tree highi, tree highj, 2519 vec<operand_entry *> *ops, 2520 struct range_entry *rangei, 2521 struct range_entry *rangej) 2522 { 2523 tree lowxor, highxor, tem, exp; 2524 /* Check lowi ^ lowj == highi ^ highj and 2525 popcount (lowi ^ lowj) == 1. */ 2526 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2527 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2528 return false; 2529 if (!integer_pow2p (lowxor)) 2530 return false; 2531 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2532 if (!tree_int_cst_equal (lowxor, highxor)) 2533 return false; 2534 2535 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2536 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2537 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2538 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2539 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2540 NULL, rangei->in_p, lowj, highj, 2541 rangei->strict_overflow_p 2542 || rangej->strict_overflow_p)) 2543 return true; 2544 return false; 2545 } 2546 2547 /* Optimize X == CST1 || X == CST2 2548 if popcount (CST2 - CST1) == 1 into 2549 ((X - CST1) & ~(CST2 - CST1)) == 0. 2550 Similarly for ranges. E.g. 2551 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2552 || X == 75 || X == 45 2553 will be transformed by the previous optimization into 2554 (X - 43U) <= 3U || (X - 75U) <= 3U 2555 and this loop can transform that into 2556 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2557 static bool 2558 optimize_range_tests_diff (enum tree_code opcode, tree type, 2559 tree lowi, tree lowj, tree highi, tree highj, 2560 vec<operand_entry *> *ops, 2561 struct range_entry *rangei, 2562 struct range_entry *rangej) 2563 { 2564 tree tem1, tem2, mask; 2565 /* Check highi - lowi == highj - lowj. */ 2566 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2567 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2568 return false; 2569 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2570 if (!tree_int_cst_equal (tem1, tem2)) 2571 return false; 2572 /* Check popcount (lowj - lowi) == 1. */ 2573 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2574 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2575 return false; 2576 if (!integer_pow2p (tem1)) 2577 return false; 2578 2579 type = unsigned_type_for (type); 2580 tem1 = fold_convert (type, tem1); 2581 tem2 = fold_convert (type, tem2); 2582 lowi = fold_convert (type, lowi); 2583 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2584 tem1 = fold_build2 (MINUS_EXPR, type, 2585 fold_convert (type, rangei->exp), lowi); 2586 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2587 lowj = build_int_cst (type, 0); 2588 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2589 NULL, rangei->in_p, lowj, tem2, 2590 rangei->strict_overflow_p 2591 || rangej->strict_overflow_p)) 2592 return true; 2593 return false; 2594 } 2595 2596 /* It does some common checks for function optimize_range_tests_xor and 2597 optimize_range_tests_diff. 2598 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2599 Else it calls optimize_range_tests_diff. */ 2600 2601 static bool 2602 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2603 bool optimize_xor, vec<operand_entry *> *ops, 2604 struct range_entry *ranges) 2605 { 2606 int i, j; 2607 bool any_changes = false; 2608 for (i = first; i < length; i++) 2609 { 2610 tree lowi, highi, lowj, highj, type, tem; 2611 2612 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2613 continue; 2614 type = TREE_TYPE (ranges[i].exp); 2615 if (!INTEGRAL_TYPE_P (type)) 2616 continue; 2617 lowi = ranges[i].low; 2618 if (lowi == NULL_TREE) 2619 lowi = TYPE_MIN_VALUE (type); 2620 highi = ranges[i].high; 2621 if (highi == NULL_TREE) 2622 continue; 2623 for (j = i + 1; j < length && j < i + 64; j++) 2624 { 2625 bool changes; 2626 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2627 continue; 2628 lowj = ranges[j].low; 2629 if (lowj == NULL_TREE) 2630 continue; 2631 highj = ranges[j].high; 2632 if (highj == NULL_TREE) 2633 highj = TYPE_MAX_VALUE (type); 2634 /* Check lowj > highi. */ 2635 tem = fold_binary (GT_EXPR, boolean_type_node, 2636 lowj, highi); 2637 if (tem == NULL_TREE || !integer_onep (tem)) 2638 continue; 2639 if (optimize_xor) 2640 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2641 highi, highj, ops, 2642 ranges + i, ranges + j); 2643 else 2644 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2645 highi, highj, ops, 2646 ranges + i, ranges + j); 2647 if (changes) 2648 { 2649 any_changes = true; 2650 break; 2651 } 2652 } 2653 } 2654 return any_changes; 2655 } 2656 2657 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 2658 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2659 EXP on success, NULL otherwise. */ 2660 2661 static tree 2662 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2663 wide_int *mask, tree *totallowp) 2664 { 2665 tree tem = int_const_binop (MINUS_EXPR, high, low); 2666 if (tem == NULL_TREE 2667 || TREE_CODE (tem) != INTEGER_CST 2668 || TREE_OVERFLOW (tem) 2669 || tree_int_cst_sgn (tem) == -1 2670 || compare_tree_int (tem, prec) != -1) 2671 return NULL_TREE; 2672 2673 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2674 *mask = wi::shifted_mask (0, max, false, prec); 2675 if (TREE_CODE (exp) == BIT_AND_EXPR 2676 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2677 { 2678 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2679 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2680 if (wi::popcount (msk) == 1 2681 && wi::ltu_p (msk, prec - max)) 2682 { 2683 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2684 max += msk.to_uhwi (); 2685 exp = TREE_OPERAND (exp, 0); 2686 if (integer_zerop (low) 2687 && TREE_CODE (exp) == PLUS_EXPR 2688 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2689 { 2690 tree ret = TREE_OPERAND (exp, 0); 2691 STRIP_NOPS (ret); 2692 widest_int bias 2693 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2694 TYPE_PRECISION (TREE_TYPE (low)))); 2695 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2696 if (totallowp) 2697 { 2698 *totallowp = tbias; 2699 return ret; 2700 } 2701 else if (!tree_int_cst_lt (totallow, tbias)) 2702 return NULL_TREE; 2703 bias = wi::to_widest (tbias); 2704 bias -= wi::to_widest (totallow); 2705 if (bias >= 0 && bias < prec - max) 2706 { 2707 *mask = wi::lshift (*mask, bias); 2708 return ret; 2709 } 2710 } 2711 } 2712 } 2713 if (totallowp) 2714 return exp; 2715 if (!tree_int_cst_lt (totallow, low)) 2716 return exp; 2717 tem = int_const_binop (MINUS_EXPR, low, totallow); 2718 if (tem == NULL_TREE 2719 || TREE_CODE (tem) != INTEGER_CST 2720 || TREE_OVERFLOW (tem) 2721 || compare_tree_int (tem, prec - max) == 1) 2722 return NULL_TREE; 2723 2724 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2725 return exp; 2726 } 2727 2728 /* Attempt to optimize small range tests using bit test. 2729 E.g. 2730 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2731 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2732 has been by earlier optimizations optimized into: 2733 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2734 As all the 43 through 82 range is less than 64 numbers, 2735 for 64-bit word targets optimize that into: 2736 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2737 2738 static bool 2739 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2740 vec<operand_entry *> *ops, 2741 struct range_entry *ranges) 2742 { 2743 int i, j; 2744 bool any_changes = false; 2745 int prec = GET_MODE_BITSIZE (word_mode); 2746 auto_vec<struct range_entry *, 64> candidates; 2747 2748 for (i = first; i < length - 2; i++) 2749 { 2750 tree lowi, highi, lowj, highj, type; 2751 2752 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2753 continue; 2754 type = TREE_TYPE (ranges[i].exp); 2755 if (!INTEGRAL_TYPE_P (type)) 2756 continue; 2757 lowi = ranges[i].low; 2758 if (lowi == NULL_TREE) 2759 lowi = TYPE_MIN_VALUE (type); 2760 highi = ranges[i].high; 2761 if (highi == NULL_TREE) 2762 continue; 2763 wide_int mask; 2764 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2765 highi, &mask, &lowi); 2766 if (exp == NULL_TREE) 2767 continue; 2768 bool strict_overflow_p = ranges[i].strict_overflow_p; 2769 candidates.truncate (0); 2770 int end = MIN (i + 64, length); 2771 for (j = i + 1; j < end; j++) 2772 { 2773 tree exp2; 2774 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2775 continue; 2776 if (ranges[j].exp == exp) 2777 ; 2778 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2779 { 2780 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2781 if (exp2 == exp) 2782 ; 2783 else if (TREE_CODE (exp2) == PLUS_EXPR) 2784 { 2785 exp2 = TREE_OPERAND (exp2, 0); 2786 STRIP_NOPS (exp2); 2787 if (exp2 != exp) 2788 continue; 2789 } 2790 else 2791 continue; 2792 } 2793 else 2794 continue; 2795 lowj = ranges[j].low; 2796 if (lowj == NULL_TREE) 2797 continue; 2798 highj = ranges[j].high; 2799 if (highj == NULL_TREE) 2800 highj = TYPE_MAX_VALUE (type); 2801 wide_int mask2; 2802 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2803 highj, &mask2, NULL); 2804 if (exp2 != exp) 2805 continue; 2806 mask |= mask2; 2807 strict_overflow_p |= ranges[j].strict_overflow_p; 2808 candidates.safe_push (&ranges[j]); 2809 } 2810 2811 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2812 if (candidates.length () >= 2) 2813 { 2814 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2815 wi::to_widest (lowi) 2816 + prec - 1 - wi::clz (mask)); 2817 operand_entry *oe = (*ops)[ranges[i].idx]; 2818 tree op = oe->op; 2819 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2820 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2821 location_t loc = gimple_location (stmt); 2822 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2823 2824 /* See if it isn't cheaper to pretend the minimum value of the 2825 range is 0, if maximum value is small enough. 2826 We can avoid then subtraction of the minimum value, but the 2827 mask constant could be perhaps more expensive. */ 2828 if (compare_tree_int (lowi, 0) > 0 2829 && compare_tree_int (high, prec) < 0) 2830 { 2831 int cost_diff; 2832 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2833 rtx reg = gen_raw_REG (word_mode, 10000); 2834 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2835 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2836 GEN_INT (-m)), speed_p); 2837 rtx r = immed_wide_int_const (mask, word_mode); 2838 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2839 word_mode, speed_p); 2840 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2841 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2842 word_mode, speed_p); 2843 if (cost_diff > 0) 2844 { 2845 mask = wi::lshift (mask, m); 2846 lowi = build_zero_cst (TREE_TYPE (lowi)); 2847 } 2848 } 2849 2850 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2851 false, lowi, high); 2852 if (tem == NULL_TREE || is_gimple_val (tem)) 2853 continue; 2854 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2855 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2856 fold_convert_loc (loc, etype, exp), 2857 fold_convert_loc (loc, etype, lowi)); 2858 exp = fold_convert_loc (loc, integer_type_node, exp); 2859 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2860 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2861 build_int_cst (word_type, 1), exp); 2862 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2863 wide_int_to_tree (word_type, mask)); 2864 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2865 build_zero_cst (word_type)); 2866 if (is_gimple_val (exp)) 2867 continue; 2868 2869 /* The shift might have undefined behavior if TEM is true, 2870 but reassociate_bb isn't prepared to have basic blocks 2871 split when it is running. So, temporarily emit a code 2872 with BIT_IOR_EXPR instead of &&, and fix it up in 2873 branch_fixup. */ 2874 gimple_seq seq; 2875 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2876 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2877 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2878 gimple_seq seq2; 2879 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2880 gimple_seq_add_seq_without_update (&seq, seq2); 2881 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2882 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2883 gimple *g = gimple_build_assign (make_ssa_name (optype), 2884 BIT_IOR_EXPR, tem, exp); 2885 gimple_set_location (g, loc); 2886 gimple_seq_add_stmt_without_update (&seq, g); 2887 exp = gimple_assign_lhs (g); 2888 tree val = build_zero_cst (optype); 2889 if (update_range_test (&ranges[i], NULL, candidates.address (), 2890 candidates.length (), opcode, ops, exp, 2891 seq, false, val, val, strict_overflow_p)) 2892 { 2893 any_changes = true; 2894 reassoc_branch_fixups.safe_push (tem); 2895 } 2896 else 2897 gimple_seq_discard (seq); 2898 } 2899 } 2900 return any_changes; 2901 } 2902 2903 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 2904 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */ 2905 2906 static bool 2907 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, 2908 vec<operand_entry *> *ops, 2909 struct range_entry *ranges) 2910 { 2911 int i; 2912 unsigned int b; 2913 bool any_changes = false; 2914 auto_vec<int, 128> buckets; 2915 auto_vec<int, 32> chains; 2916 auto_vec<struct range_entry *, 32> candidates; 2917 2918 for (i = first; i < length; i++) 2919 { 2920 if (ranges[i].exp == NULL_TREE 2921 || TREE_CODE (ranges[i].exp) != SSA_NAME 2922 || !ranges[i].in_p 2923 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 2924 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE 2925 || ranges[i].low == NULL_TREE 2926 || ranges[i].low != ranges[i].high) 2927 continue; 2928 2929 bool zero_p = integer_zerop (ranges[i].low); 2930 if (!zero_p && !integer_all_onesp (ranges[i].low)) 2931 continue; 2932 2933 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p; 2934 if (buckets.length () <= b) 2935 buckets.safe_grow_cleared (b + 1); 2936 if (chains.length () <= (unsigned) i) 2937 chains.safe_grow (i + 1); 2938 chains[i] = buckets[b]; 2939 buckets[b] = i + 1; 2940 } 2941 2942 FOR_EACH_VEC_ELT (buckets, b, i) 2943 if (i && chains[i - 1]) 2944 { 2945 int j, k = i; 2946 for (j = chains[i - 1]; j; j = chains[j - 1]) 2947 { 2948 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); 2949 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); 2950 if (reassoc_stmt_dominates_stmt_p (gk, gj)) 2951 k = j; 2952 } 2953 tree type1 = TREE_TYPE (ranges[k - 1].exp); 2954 tree type2 = NULL_TREE; 2955 bool strict_overflow_p = false; 2956 candidates.truncate (0); 2957 for (j = i; j; j = chains[j - 1]) 2958 { 2959 tree type = TREE_TYPE (ranges[j - 1].exp); 2960 strict_overflow_p |= ranges[j - 1].strict_overflow_p; 2961 if (j == k 2962 || useless_type_conversion_p (type1, type)) 2963 ; 2964 else if (type2 == NULL_TREE 2965 || useless_type_conversion_p (type2, type)) 2966 { 2967 if (type2 == NULL_TREE) 2968 type2 = type; 2969 candidates.safe_push (&ranges[j - 1]); 2970 } 2971 } 2972 unsigned l = candidates.length (); 2973 for (j = i; j; j = chains[j - 1]) 2974 { 2975 tree type = TREE_TYPE (ranges[j - 1].exp); 2976 if (j == k) 2977 continue; 2978 if (useless_type_conversion_p (type1, type)) 2979 ; 2980 else if (type2 == NULL_TREE 2981 || useless_type_conversion_p (type2, type)) 2982 continue; 2983 candidates.safe_push (&ranges[j - 1]); 2984 } 2985 gimple_seq seq = NULL; 2986 tree op = NULL_TREE; 2987 unsigned int id; 2988 struct range_entry *r; 2989 candidates.safe_push (&ranges[k - 1]); 2990 FOR_EACH_VEC_ELT (candidates, id, r) 2991 { 2992 gimple *g; 2993 if (id == 0) 2994 { 2995 op = r->exp; 2996 continue; 2997 } 2998 if (id == l) 2999 { 3000 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); 3001 gimple_seq_add_stmt_without_update (&seq, g); 3002 op = gimple_assign_lhs (g); 3003 } 3004 tree type = TREE_TYPE (r->exp); 3005 tree exp = r->exp; 3006 if (id >= l && !useless_type_conversion_p (type1, type)) 3007 { 3008 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); 3009 gimple_seq_add_stmt_without_update (&seq, g); 3010 exp = gimple_assign_lhs (g); 3011 } 3012 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), 3013 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR, 3014 op, exp); 3015 gimple_seq_add_stmt_without_update (&seq, g); 3016 op = gimple_assign_lhs (g); 3017 } 3018 candidates.pop (); 3019 if (update_range_test (&ranges[k - 1], NULL, candidates.address (), 3020 candidates.length (), opcode, ops, op, 3021 seq, true, ranges[k - 1].low, 3022 ranges[k - 1].low, strict_overflow_p)) 3023 any_changes = true; 3024 else 3025 gimple_seq_discard (seq); 3026 } 3027 3028 return any_changes; 3029 } 3030 3031 /* Attempt to optimize for signed a and b where b is known to be >= 0: 3032 a >= 0 && a < b into (unsigned) a < (unsigned) b 3033 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 3034 3035 static bool 3036 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 3037 vec<operand_entry *> *ops, 3038 struct range_entry *ranges, 3039 basic_block first_bb) 3040 { 3041 int i; 3042 bool any_changes = false; 3043 hash_map<tree, int> *map = NULL; 3044 3045 for (i = first; i < length; i++) 3046 { 3047 if (ranges[i].exp == NULL_TREE 3048 || TREE_CODE (ranges[i].exp) != SSA_NAME 3049 || !ranges[i].in_p) 3050 continue; 3051 3052 tree type = TREE_TYPE (ranges[i].exp); 3053 if (!INTEGRAL_TYPE_P (type) 3054 || TYPE_UNSIGNED (type) 3055 || ranges[i].low == NULL_TREE 3056 || !integer_zerop (ranges[i].low) 3057 || ranges[i].high != NULL_TREE) 3058 continue; 3059 /* EXP >= 0 here. */ 3060 if (map == NULL) 3061 map = new hash_map <tree, int>; 3062 map->put (ranges[i].exp, i); 3063 } 3064 3065 if (map == NULL) 3066 return false; 3067 3068 for (i = 0; i < length; i++) 3069 { 3070 bool in_p = ranges[i].in_p; 3071 if (ranges[i].low == NULL_TREE 3072 || ranges[i].high == NULL_TREE) 3073 continue; 3074 if (!integer_zerop (ranges[i].low) 3075 || !integer_zerop (ranges[i].high)) 3076 { 3077 if (ranges[i].exp 3078 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 3079 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3080 && integer_onep (ranges[i].low) 3081 && integer_onep (ranges[i].high)) 3082 in_p = !in_p; 3083 else 3084 continue; 3085 } 3086 3087 gimple *stmt; 3088 tree_code ccode; 3089 tree rhs1, rhs2; 3090 if (ranges[i].exp) 3091 { 3092 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 3093 continue; 3094 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 3095 if (!is_gimple_assign (stmt)) 3096 continue; 3097 ccode = gimple_assign_rhs_code (stmt); 3098 rhs1 = gimple_assign_rhs1 (stmt); 3099 rhs2 = gimple_assign_rhs2 (stmt); 3100 } 3101 else 3102 { 3103 operand_entry *oe = (*ops)[ranges[i].idx]; 3104 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3105 if (gimple_code (stmt) != GIMPLE_COND) 3106 continue; 3107 ccode = gimple_cond_code (stmt); 3108 rhs1 = gimple_cond_lhs (stmt); 3109 rhs2 = gimple_cond_rhs (stmt); 3110 } 3111 3112 if (TREE_CODE (rhs1) != SSA_NAME 3113 || rhs2 == NULL_TREE 3114 || TREE_CODE (rhs2) != SSA_NAME) 3115 continue; 3116 3117 switch (ccode) 3118 { 3119 case GT_EXPR: 3120 case GE_EXPR: 3121 case LT_EXPR: 3122 case LE_EXPR: 3123 break; 3124 default: 3125 continue; 3126 } 3127 if (in_p) 3128 ccode = invert_tree_comparison (ccode, false); 3129 switch (ccode) 3130 { 3131 case GT_EXPR: 3132 case GE_EXPR: 3133 std::swap (rhs1, rhs2); 3134 ccode = swap_tree_comparison (ccode); 3135 break; 3136 case LT_EXPR: 3137 case LE_EXPR: 3138 break; 3139 default: 3140 gcc_unreachable (); 3141 } 3142 3143 int *idx = map->get (rhs1); 3144 if (idx == NULL) 3145 continue; 3146 3147 /* maybe_optimize_range_tests allows statements without side-effects 3148 in the basic blocks as long as they are consumed in the same bb. 3149 Make sure rhs2's def stmt is not among them, otherwise we can't 3150 use safely get_nonzero_bits on it. E.g. in: 3151 # RANGE [-83, 1] NONZERO 173 3152 # k_32 = PHI <k_47(13), k_12(9)> 3153 ... 3154 if (k_32 >= 0) 3155 goto <bb 5>; [26.46%] 3156 else 3157 goto <bb 9>; [73.54%] 3158 3159 <bb 5> [local count: 140323371]: 3160 # RANGE [0, 1] NONZERO 1 3161 _5 = (int) k_32; 3162 # RANGE [0, 4] NONZERO 4 3163 _21 = _5 << 2; 3164 # RANGE [0, 4] NONZERO 4 3165 iftmp.0_44 = (char) _21; 3166 if (k_32 < iftmp.0_44) 3167 goto <bb 6>; [84.48%] 3168 else 3169 goto <bb 9>; [15.52%] 3170 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3171 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3172 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3173 those stmts even for negative k_32 and the value ranges would be no 3174 longer guaranteed and so the optimization would be invalid. */ 3175 if (opcode == ERROR_MARK) 3176 { 3177 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3178 basic_block bb2 = gimple_bb (g); 3179 if (bb2 3180 && bb2 != first_bb 3181 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3182 { 3183 /* As an exception, handle a few common cases. */ 3184 if (gimple_assign_cast_p (g) 3185 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))) 3186 && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))) 3187 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3188 > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g))))) 3189 /* Zero-extension is always ok. */ ; 3190 else if (is_gimple_assign (g) 3191 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3192 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3193 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) 3194 /* Masking with INTEGER_CST with MSB clear is always ok 3195 too. */ ; 3196 else 3197 continue; 3198 } 3199 } 3200 3201 wide_int nz = get_nonzero_bits (rhs2); 3202 if (wi::neg_p (nz)) 3203 continue; 3204 3205 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3206 and RHS2 is known to be RHS2 >= 0. */ 3207 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3208 3209 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3210 if ((ranges[*idx].strict_overflow_p 3211 || ranges[i].strict_overflow_p) 3212 && issue_strict_overflow_warning (wc)) 3213 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3214 "assuming signed overflow does not occur " 3215 "when simplifying range test"); 3216 3217 if (dump_file && (dump_flags & TDF_DETAILS)) 3218 { 3219 struct range_entry *r = &ranges[*idx]; 3220 fprintf (dump_file, "Optimizing range test "); 3221 print_generic_expr (dump_file, r->exp); 3222 fprintf (dump_file, " +["); 3223 print_generic_expr (dump_file, r->low); 3224 fprintf (dump_file, ", "); 3225 print_generic_expr (dump_file, r->high); 3226 fprintf (dump_file, "] and comparison "); 3227 print_generic_expr (dump_file, rhs1); 3228 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3229 print_generic_expr (dump_file, rhs2); 3230 fprintf (dump_file, "\n into ("); 3231 print_generic_expr (dump_file, utype); 3232 fprintf (dump_file, ") "); 3233 print_generic_expr (dump_file, rhs1); 3234 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3235 print_generic_expr (dump_file, utype); 3236 fprintf (dump_file, ") "); 3237 print_generic_expr (dump_file, rhs2); 3238 fprintf (dump_file, "\n"); 3239 } 3240 3241 operand_entry *oe = (*ops)[ranges[i].idx]; 3242 ranges[i].in_p = 0; 3243 if (opcode == BIT_IOR_EXPR 3244 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3245 { 3246 ranges[i].in_p = 1; 3247 ccode = invert_tree_comparison (ccode, false); 3248 } 3249 3250 unsigned int uid = gimple_uid (stmt); 3251 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3252 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3253 gimple_set_uid (g, uid); 3254 rhs1 = gimple_assign_lhs (g); 3255 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3256 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3257 gimple_set_uid (g, uid); 3258 rhs2 = gimple_assign_lhs (g); 3259 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3260 if (tree_swap_operands_p (rhs1, rhs2)) 3261 { 3262 std::swap (rhs1, rhs2); 3263 ccode = swap_tree_comparison (ccode); 3264 } 3265 if (gimple_code (stmt) == GIMPLE_COND) 3266 { 3267 gcond *c = as_a <gcond *> (stmt); 3268 gimple_cond_set_code (c, ccode); 3269 gimple_cond_set_lhs (c, rhs1); 3270 gimple_cond_set_rhs (c, rhs2); 3271 update_stmt (stmt); 3272 } 3273 else 3274 { 3275 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3276 if (!INTEGRAL_TYPE_P (ctype) 3277 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3278 && TYPE_PRECISION (ctype) != 1)) 3279 ctype = boolean_type_node; 3280 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3281 gimple_set_uid (g, uid); 3282 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3283 if (oe->op && ctype != TREE_TYPE (oe->op)) 3284 { 3285 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3286 NOP_EXPR, gimple_assign_lhs (g)); 3287 gimple_set_uid (g, uid); 3288 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3289 } 3290 ranges[i].exp = gimple_assign_lhs (g); 3291 oe->op = ranges[i].exp; 3292 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 3293 ranges[i].high = ranges[i].low; 3294 } 3295 ranges[i].strict_overflow_p = false; 3296 oe = (*ops)[ranges[*idx].idx]; 3297 /* Now change all the other range test immediate uses, so that 3298 those tests will be optimized away. */ 3299 if (opcode == ERROR_MARK) 3300 { 3301 if (oe->op) 3302 oe->op = build_int_cst (TREE_TYPE (oe->op), 3303 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3304 else 3305 oe->op = (oe->rank == BIT_IOR_EXPR 3306 ? boolean_false_node : boolean_true_node); 3307 } 3308 else 3309 oe->op = error_mark_node; 3310 ranges[*idx].exp = NULL_TREE; 3311 ranges[*idx].low = NULL_TREE; 3312 ranges[*idx].high = NULL_TREE; 3313 any_changes = true; 3314 } 3315 3316 delete map; 3317 return any_changes; 3318 } 3319 3320 /* Optimize range tests, similarly how fold_range_test optimizes 3321 it on trees. The tree code for the binary 3322 operation between all the operands is OPCODE. 3323 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 3324 maybe_optimize_range_tests for inter-bb range optimization. 3325 In that case if oe->op is NULL, oe->id is bb->index whose 3326 GIMPLE_COND is && or ||ed into the test, and oe->rank says 3327 the actual opcode. 3328 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 3329 3330 static bool 3331 optimize_range_tests (enum tree_code opcode, 3332 vec<operand_entry *> *ops, basic_block first_bb) 3333 { 3334 unsigned int length = ops->length (), i, j, first; 3335 operand_entry *oe; 3336 struct range_entry *ranges; 3337 bool any_changes = false; 3338 3339 if (length == 1) 3340 return false; 3341 3342 ranges = XNEWVEC (struct range_entry, length); 3343 for (i = 0; i < length; i++) 3344 { 3345 oe = (*ops)[i]; 3346 ranges[i].idx = i; 3347 init_range_entry (ranges + i, oe->op, 3348 oe->op 3349 ? NULL 3350 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 3351 /* For | invert it now, we will invert it again before emitting 3352 the optimized expression. */ 3353 if (opcode == BIT_IOR_EXPR 3354 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3355 ranges[i].in_p = !ranges[i].in_p; 3356 } 3357 3358 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 3359 for (i = 0; i < length; i++) 3360 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 3361 break; 3362 3363 /* Try to merge ranges. */ 3364 for (first = i; i < length; i++) 3365 { 3366 tree low = ranges[i].low; 3367 tree high = ranges[i].high; 3368 int in_p = ranges[i].in_p; 3369 bool strict_overflow_p = ranges[i].strict_overflow_p; 3370 int update_fail_count = 0; 3371 3372 for (j = i + 1; j < length; j++) 3373 { 3374 if (ranges[i].exp != ranges[j].exp) 3375 break; 3376 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 3377 ranges[j].in_p, ranges[j].low, ranges[j].high)) 3378 break; 3379 strict_overflow_p |= ranges[j].strict_overflow_p; 3380 } 3381 3382 if (j == i + 1) 3383 continue; 3384 3385 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 3386 opcode, ops, ranges[i].exp, NULL, in_p, 3387 low, high, strict_overflow_p)) 3388 { 3389 i = j - 1; 3390 any_changes = true; 3391 } 3392 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 3393 while update_range_test would fail. */ 3394 else if (update_fail_count == 64) 3395 i = j - 1; 3396 else 3397 ++update_fail_count; 3398 } 3399 3400 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 3401 ops, ranges); 3402 3403 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 3404 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 3405 ops, ranges); 3406 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 3407 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 3408 ops, ranges); 3409 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, 3410 ops, ranges); 3411 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 3412 ranges, first_bb); 3413 3414 if (any_changes && opcode != ERROR_MARK) 3415 { 3416 j = 0; 3417 FOR_EACH_VEC_ELT (*ops, i, oe) 3418 { 3419 if (oe->op == error_mark_node) 3420 continue; 3421 else if (i != j) 3422 (*ops)[j] = oe; 3423 j++; 3424 } 3425 ops->truncate (j); 3426 } 3427 3428 XDELETEVEC (ranges); 3429 return any_changes; 3430 } 3431 3432 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3433 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3434 otherwise the comparison code. */ 3435 3436 static tree_code 3437 ovce_extract_ops (tree var, gassign **rets, bool *reti) 3438 { 3439 if (TREE_CODE (var) != SSA_NAME) 3440 return ERROR_MARK; 3441 3442 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3443 if (stmt == NULL) 3444 return ERROR_MARK; 3445 3446 /* ??? If we start creating more COND_EXPR, we could perform 3447 this same optimization with them. For now, simplify. */ 3448 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 3449 return ERROR_MARK; 3450 3451 tree cond = gimple_assign_rhs1 (stmt); 3452 tree_code cmp = TREE_CODE (cond); 3453 if (TREE_CODE_CLASS (cmp) != tcc_comparison) 3454 return ERROR_MARK; 3455 3456 /* ??? For now, allow only canonical true and false result vectors. 3457 We could expand this to other constants should the need arise, 3458 but at the moment we don't create them. */ 3459 tree t = gimple_assign_rhs2 (stmt); 3460 tree f = gimple_assign_rhs3 (stmt); 3461 bool inv; 3462 if (integer_all_onesp (t)) 3463 inv = false; 3464 else if (integer_all_onesp (f)) 3465 { 3466 cmp = invert_tree_comparison (cmp, false); 3467 inv = true; 3468 } 3469 else 3470 return ERROR_MARK; 3471 if (!integer_zerop (f)) 3472 return ERROR_MARK; 3473 3474 /* Success! */ 3475 if (rets) 3476 *rets = stmt; 3477 if (reti) 3478 *reti = inv; 3479 return cmp; 3480 } 3481 3482 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3483 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3484 3485 static bool 3486 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 3487 { 3488 unsigned int length = ops->length (), i, j; 3489 bool any_changes = false; 3490 3491 if (length == 1) 3492 return false; 3493 3494 for (i = 0; i < length; ++i) 3495 { 3496 tree elt0 = (*ops)[i]->op; 3497 3498 gassign *stmt0; 3499 bool invert; 3500 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); 3501 if (cmp0 == ERROR_MARK) 3502 continue; 3503 3504 for (j = i + 1; j < length; ++j) 3505 { 3506 tree &elt1 = (*ops)[j]->op; 3507 3508 gassign *stmt1; 3509 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); 3510 if (cmp1 == ERROR_MARK) 3511 continue; 3512 3513 tree cond0 = gimple_assign_rhs1 (stmt0); 3514 tree x0 = TREE_OPERAND (cond0, 0); 3515 tree y0 = TREE_OPERAND (cond0, 1); 3516 3517 tree cond1 = gimple_assign_rhs1 (stmt1); 3518 tree x1 = TREE_OPERAND (cond1, 0); 3519 tree y1 = TREE_OPERAND (cond1, 1); 3520 3521 tree comb; 3522 if (opcode == BIT_AND_EXPR) 3523 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3524 else if (opcode == BIT_IOR_EXPR) 3525 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3526 else 3527 gcc_unreachable (); 3528 if (comb == NULL) 3529 continue; 3530 3531 /* Success! */ 3532 if (dump_file && (dump_flags & TDF_DETAILS)) 3533 { 3534 fprintf (dump_file, "Transforming "); 3535 print_generic_expr (dump_file, cond0); 3536 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 3537 print_generic_expr (dump_file, cond1); 3538 fprintf (dump_file, " into "); 3539 print_generic_expr (dump_file, comb); 3540 fputc ('\n', dump_file); 3541 } 3542 3543 gimple_assign_set_rhs1 (stmt0, comb); 3544 if (invert) 3545 std::swap (*gimple_assign_rhs2_ptr (stmt0), 3546 *gimple_assign_rhs3_ptr (stmt0)); 3547 update_stmt (stmt0); 3548 3549 elt1 = error_mark_node; 3550 any_changes = true; 3551 } 3552 } 3553 3554 if (any_changes) 3555 { 3556 operand_entry *oe; 3557 j = 0; 3558 FOR_EACH_VEC_ELT (*ops, i, oe) 3559 { 3560 if (oe->op == error_mark_node) 3561 continue; 3562 else if (i != j) 3563 (*ops)[j] = oe; 3564 j++; 3565 } 3566 ops->truncate (j); 3567 } 3568 3569 return any_changes; 3570 } 3571 3572 /* Return true if STMT is a cast like: 3573 <bb N>: 3574 ... 3575 _123 = (int) _234; 3576 3577 <bb M>: 3578 # _345 = PHI <_123(N), 1(...), 1(...)> 3579 where _234 has bool type, _123 has single use and 3580 bb N has a single successor M. This is commonly used in 3581 the last block of a range test. 3582 3583 Also Return true if STMT is tcc_compare like: 3584 <bb N>: 3585 ... 3586 _234 = a_2(D) == 2; 3587 3588 <bb M>: 3589 # _345 = PHI <_234(N), 1(...), 1(...)> 3590 _346 = (int) _345; 3591 where _234 has booltype, single use and 3592 bb N has a single successor M. This is commonly used in 3593 the last block of a range test. */ 3594 3595 static bool 3596 final_range_test_p (gimple *stmt) 3597 { 3598 basic_block bb, rhs_bb, lhs_bb; 3599 edge e; 3600 tree lhs, rhs; 3601 use_operand_p use_p; 3602 gimple *use_stmt; 3603 3604 if (!gimple_assign_cast_p (stmt) 3605 && (!is_gimple_assign (stmt) 3606 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3607 != tcc_comparison))) 3608 return false; 3609 bb = gimple_bb (stmt); 3610 if (!single_succ_p (bb)) 3611 return false; 3612 e = single_succ_edge (bb); 3613 if (e->flags & EDGE_COMPLEX) 3614 return false; 3615 3616 lhs = gimple_assign_lhs (stmt); 3617 rhs = gimple_assign_rhs1 (stmt); 3618 if (gimple_assign_cast_p (stmt) 3619 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3620 || TREE_CODE (rhs) != SSA_NAME 3621 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 3622 return false; 3623 3624 if (!gimple_assign_cast_p (stmt) 3625 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 3626 return false; 3627 3628 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 3629 if (!single_imm_use (lhs, &use_p, &use_stmt)) 3630 return false; 3631 3632 if (gimple_code (use_stmt) != GIMPLE_PHI 3633 || gimple_bb (use_stmt) != e->dest) 3634 return false; 3635 3636 /* And that the rhs is defined in the same loop. */ 3637 if (gimple_assign_cast_p (stmt)) 3638 { 3639 if (TREE_CODE (rhs) != SSA_NAME 3640 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 3641 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 3642 return false; 3643 } 3644 else 3645 { 3646 if (TREE_CODE (lhs) != SSA_NAME 3647 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 3648 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 3649 return false; 3650 } 3651 3652 return true; 3653 } 3654 3655 /* Return true if BB is suitable basic block for inter-bb range test 3656 optimization. If BACKWARD is true, BB should be the only predecessor 3657 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 3658 or compared with to find a common basic block to which all conditions 3659 branch to if true resp. false. If BACKWARD is false, TEST_BB should 3660 be the only predecessor of BB. */ 3661 3662 static bool 3663 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 3664 bool backward) 3665 { 3666 edge_iterator ei, ei2; 3667 edge e, e2; 3668 gimple *stmt; 3669 gphi_iterator gsi; 3670 bool other_edge_seen = false; 3671 bool is_cond; 3672 3673 if (test_bb == bb) 3674 return false; 3675 /* Check last stmt first. */ 3676 stmt = last_stmt (bb); 3677 if (stmt == NULL 3678 || (gimple_code (stmt) != GIMPLE_COND 3679 && (backward || !final_range_test_p (stmt))) 3680 || gimple_visited_p (stmt) 3681 || stmt_could_throw_p (stmt) 3682 || *other_bb == bb) 3683 return false; 3684 is_cond = gimple_code (stmt) == GIMPLE_COND; 3685 if (is_cond) 3686 { 3687 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 3688 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 3689 to *OTHER_BB (if not set yet, try to find it out). */ 3690 if (EDGE_COUNT (bb->succs) != 2) 3691 return false; 3692 FOR_EACH_EDGE (e, ei, bb->succs) 3693 { 3694 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3695 return false; 3696 if (e->dest == test_bb) 3697 { 3698 if (backward) 3699 continue; 3700 else 3701 return false; 3702 } 3703 if (e->dest == bb) 3704 return false; 3705 if (*other_bb == NULL) 3706 { 3707 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 3708 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3709 return false; 3710 else if (e->dest == e2->dest) 3711 *other_bb = e->dest; 3712 if (*other_bb == NULL) 3713 return false; 3714 } 3715 if (e->dest == *other_bb) 3716 other_edge_seen = true; 3717 else if (backward) 3718 return false; 3719 } 3720 if (*other_bb == NULL || !other_edge_seen) 3721 return false; 3722 } 3723 else if (single_succ (bb) != *other_bb) 3724 return false; 3725 3726 /* Now check all PHIs of *OTHER_BB. */ 3727 e = find_edge (bb, *other_bb); 3728 e2 = find_edge (test_bb, *other_bb); 3729 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 3730 { 3731 gphi *phi = gsi.phi (); 3732 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 3733 corresponding to BB and TEST_BB predecessor must be the same. */ 3734 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 3735 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 3736 { 3737 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 3738 one of the PHIs should have the lhs of the last stmt in 3739 that block as PHI arg and that PHI should have 0 or 1 3740 corresponding to it in all other range test basic blocks 3741 considered. */ 3742 if (!is_cond) 3743 { 3744 if (gimple_phi_arg_def (phi, e->dest_idx) 3745 == gimple_assign_lhs (stmt) 3746 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 3747 || integer_onep (gimple_phi_arg_def (phi, 3748 e2->dest_idx)))) 3749 continue; 3750 } 3751 else 3752 { 3753 gimple *test_last = last_stmt (test_bb); 3754 if (gimple_code (test_last) != GIMPLE_COND 3755 && gimple_phi_arg_def (phi, e2->dest_idx) 3756 == gimple_assign_lhs (test_last) 3757 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 3758 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 3759 continue; 3760 } 3761 3762 return false; 3763 } 3764 } 3765 return true; 3766 } 3767 3768 /* Return true if BB doesn't have side-effects that would disallow 3769 range test optimization, all SSA_NAMEs set in the bb are consumed 3770 in the bb and there are no PHIs. */ 3771 3772 static bool 3773 no_side_effect_bb (basic_block bb) 3774 { 3775 gimple_stmt_iterator gsi; 3776 gimple *last; 3777 3778 if (!gimple_seq_empty_p (phi_nodes (bb))) 3779 return false; 3780 last = last_stmt (bb); 3781 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3782 { 3783 gimple *stmt = gsi_stmt (gsi); 3784 tree lhs; 3785 imm_use_iterator imm_iter; 3786 use_operand_p use_p; 3787 3788 if (is_gimple_debug (stmt)) 3789 continue; 3790 if (gimple_has_side_effects (stmt)) 3791 return false; 3792 if (stmt == last) 3793 return true; 3794 if (!is_gimple_assign (stmt)) 3795 return false; 3796 lhs = gimple_assign_lhs (stmt); 3797 if (TREE_CODE (lhs) != SSA_NAME) 3798 return false; 3799 if (gimple_assign_rhs_could_trap_p (stmt)) 3800 return false; 3801 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 3802 { 3803 gimple *use_stmt = USE_STMT (use_p); 3804 if (is_gimple_debug (use_stmt)) 3805 continue; 3806 if (gimple_bb (use_stmt) != bb) 3807 return false; 3808 } 3809 } 3810 return false; 3811 } 3812 3813 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3814 return true and fill in *OPS recursively. */ 3815 3816 static bool 3817 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 3818 struct loop *loop) 3819 { 3820 gimple *stmt = SSA_NAME_DEF_STMT (var); 3821 tree rhs[2]; 3822 int i; 3823 3824 if (!is_reassociable_op (stmt, code, loop)) 3825 return false; 3826 3827 rhs[0] = gimple_assign_rhs1 (stmt); 3828 rhs[1] = gimple_assign_rhs2 (stmt); 3829 gimple_set_visited (stmt, true); 3830 for (i = 0; i < 2; i++) 3831 if (TREE_CODE (rhs[i]) == SSA_NAME 3832 && !get_ops (rhs[i], code, ops, loop) 3833 && has_single_use (rhs[i])) 3834 { 3835 operand_entry *oe = operand_entry_pool.allocate (); 3836 3837 oe->op = rhs[i]; 3838 oe->rank = code; 3839 oe->id = 0; 3840 oe->count = 1; 3841 oe->stmt_to_insert = NULL; 3842 ops->safe_push (oe); 3843 } 3844 return true; 3845 } 3846 3847 /* Find the ops that were added by get_ops starting from VAR, see if 3848 they were changed during update_range_test and if yes, create new 3849 stmts. */ 3850 3851 static tree 3852 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 3853 unsigned int *pidx, struct loop *loop) 3854 { 3855 gimple *stmt = SSA_NAME_DEF_STMT (var); 3856 tree rhs[4]; 3857 int i; 3858 3859 if (!is_reassociable_op (stmt, code, loop)) 3860 return NULL; 3861 3862 rhs[0] = gimple_assign_rhs1 (stmt); 3863 rhs[1] = gimple_assign_rhs2 (stmt); 3864 rhs[2] = rhs[0]; 3865 rhs[3] = rhs[1]; 3866 for (i = 0; i < 2; i++) 3867 if (TREE_CODE (rhs[i]) == SSA_NAME) 3868 { 3869 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3870 if (rhs[2 + i] == NULL_TREE) 3871 { 3872 if (has_single_use (rhs[i])) 3873 rhs[2 + i] = ops[(*pidx)++]->op; 3874 else 3875 rhs[2 + i] = rhs[i]; 3876 } 3877 } 3878 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3879 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3880 { 3881 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3882 var = make_ssa_name (TREE_TYPE (var)); 3883 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3884 rhs[2], rhs[3]); 3885 gimple_set_uid (g, gimple_uid (stmt)); 3886 gimple_set_visited (g, true); 3887 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3888 } 3889 return var; 3890 } 3891 3892 /* Structure to track the initial value passed to get_ops and 3893 the range in the ops vector for each basic block. */ 3894 3895 struct inter_bb_range_test_entry 3896 { 3897 tree op; 3898 unsigned int first_idx, last_idx; 3899 }; 3900 3901 /* Inter-bb range test optimization. 3902 3903 Returns TRUE if a gimple conditional is optimized to a true/false, 3904 otherwise return FALSE. 3905 3906 This indicates to the caller that it should run a CFG cleanup pass 3907 once reassociation is completed. */ 3908 3909 static bool 3910 maybe_optimize_range_tests (gimple *stmt) 3911 { 3912 basic_block first_bb = gimple_bb (stmt); 3913 basic_block last_bb = first_bb; 3914 basic_block other_bb = NULL; 3915 basic_block bb; 3916 edge_iterator ei; 3917 edge e; 3918 auto_vec<operand_entry *> ops; 3919 auto_vec<inter_bb_range_test_entry> bbinfo; 3920 bool any_changes = false; 3921 bool cfg_cleanup_needed = false; 3922 3923 /* Consider only basic blocks that end with GIMPLE_COND or 3924 a cast statement satisfying final_range_test_p. All 3925 but the last bb in the first_bb .. last_bb range 3926 should end with GIMPLE_COND. */ 3927 if (gimple_code (stmt) == GIMPLE_COND) 3928 { 3929 if (EDGE_COUNT (first_bb->succs) != 2) 3930 return cfg_cleanup_needed; 3931 } 3932 else if (final_range_test_p (stmt)) 3933 other_bb = single_succ (first_bb); 3934 else 3935 return cfg_cleanup_needed; 3936 3937 if (stmt_could_throw_p (stmt)) 3938 return cfg_cleanup_needed; 3939 3940 /* As relative ordering of post-dominator sons isn't fixed, 3941 maybe_optimize_range_tests can be called first on any 3942 bb in the range we want to optimize. So, start searching 3943 backwards, if first_bb can be set to a predecessor. */ 3944 while (single_pred_p (first_bb)) 3945 { 3946 basic_block pred_bb = single_pred (first_bb); 3947 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3948 break; 3949 if (!no_side_effect_bb (first_bb)) 3950 break; 3951 first_bb = pred_bb; 3952 } 3953 /* If first_bb is last_bb, other_bb hasn't been computed yet. 3954 Before starting forward search in last_bb successors, find 3955 out the other_bb. */ 3956 if (first_bb == last_bb) 3957 { 3958 other_bb = NULL; 3959 /* As non-GIMPLE_COND last stmt always terminates the range, 3960 if forward search didn't discover anything, just give up. */ 3961 if (gimple_code (stmt) != GIMPLE_COND) 3962 return cfg_cleanup_needed; 3963 /* Look at both successors. Either it ends with a GIMPLE_COND 3964 and satisfies suitable_cond_bb, or ends with a cast and 3965 other_bb is that cast's successor. */ 3966 FOR_EACH_EDGE (e, ei, first_bb->succs) 3967 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 3968 || e->dest == first_bb) 3969 return cfg_cleanup_needed; 3970 else if (single_pred_p (e->dest)) 3971 { 3972 stmt = last_stmt (e->dest); 3973 if (stmt 3974 && gimple_code (stmt) == GIMPLE_COND 3975 && EDGE_COUNT (e->dest->succs) == 2) 3976 { 3977 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 3978 break; 3979 else 3980 other_bb = NULL; 3981 } 3982 else if (stmt 3983 && final_range_test_p (stmt) 3984 && find_edge (first_bb, single_succ (e->dest))) 3985 { 3986 other_bb = single_succ (e->dest); 3987 if (other_bb == first_bb) 3988 other_bb = NULL; 3989 } 3990 } 3991 if (other_bb == NULL) 3992 return cfg_cleanup_needed; 3993 } 3994 /* Now do the forward search, moving last_bb to successor bbs 3995 that aren't other_bb. */ 3996 while (EDGE_COUNT (last_bb->succs) == 2) 3997 { 3998 FOR_EACH_EDGE (e, ei, last_bb->succs) 3999 if (e->dest != other_bb) 4000 break; 4001 if (e == NULL) 4002 break; 4003 if (!single_pred_p (e->dest)) 4004 break; 4005 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 4006 break; 4007 if (!no_side_effect_bb (e->dest)) 4008 break; 4009 last_bb = e->dest; 4010 } 4011 if (first_bb == last_bb) 4012 return cfg_cleanup_needed; 4013 /* Here basic blocks first_bb through last_bb's predecessor 4014 end with GIMPLE_COND, all of them have one of the edges to 4015 other_bb and another to another block in the range, 4016 all blocks except first_bb don't have side-effects and 4017 last_bb ends with either GIMPLE_COND, or cast satisfying 4018 final_range_test_p. */ 4019 for (bb = last_bb; ; bb = single_pred (bb)) 4020 { 4021 enum tree_code code; 4022 tree lhs, rhs; 4023 inter_bb_range_test_entry bb_ent; 4024 4025 bb_ent.op = NULL_TREE; 4026 bb_ent.first_idx = ops.length (); 4027 bb_ent.last_idx = bb_ent.first_idx; 4028 e = find_edge (bb, other_bb); 4029 stmt = last_stmt (bb); 4030 gimple_set_visited (stmt, true); 4031 if (gimple_code (stmt) != GIMPLE_COND) 4032 { 4033 use_operand_p use_p; 4034 gimple *phi; 4035 edge e2; 4036 unsigned int d; 4037 4038 lhs = gimple_assign_lhs (stmt); 4039 rhs = gimple_assign_rhs1 (stmt); 4040 gcc_assert (bb == last_bb); 4041 4042 /* stmt is 4043 _123 = (int) _234; 4044 OR 4045 _234 = a_2(D) == 2; 4046 4047 followed by: 4048 <bb M>: 4049 # _345 = PHI <_123(N), 1(...), 1(...)> 4050 4051 or 0 instead of 1. If it is 0, the _234 4052 range test is anded together with all the 4053 other range tests, if it is 1, it is ored with 4054 them. */ 4055 single_imm_use (lhs, &use_p, &phi); 4056 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 4057 e2 = find_edge (first_bb, other_bb); 4058 d = e2->dest_idx; 4059 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 4060 if (integer_zerop (gimple_phi_arg_def (phi, d))) 4061 code = BIT_AND_EXPR; 4062 else 4063 { 4064 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 4065 code = BIT_IOR_EXPR; 4066 } 4067 4068 /* If _234 SSA_NAME_DEF_STMT is 4069 _234 = _567 | _789; 4070 (or &, corresponding to 1/0 in the phi arguments, 4071 push into ops the individual range test arguments 4072 of the bitwise or resp. and, recursively. */ 4073 if (TREE_CODE (rhs) == SSA_NAME 4074 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4075 != tcc_comparison) 4076 && !get_ops (rhs, code, &ops, 4077 loop_containing_stmt (stmt)) 4078 && has_single_use (rhs)) 4079 { 4080 /* Otherwise, push the _234 range test itself. */ 4081 operand_entry *oe = operand_entry_pool.allocate (); 4082 4083 oe->op = rhs; 4084 oe->rank = code; 4085 oe->id = 0; 4086 oe->count = 1; 4087 oe->stmt_to_insert = NULL; 4088 ops.safe_push (oe); 4089 bb_ent.last_idx++; 4090 bb_ent.op = rhs; 4091 } 4092 else if (is_gimple_assign (stmt) 4093 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4094 == tcc_comparison) 4095 && !get_ops (lhs, code, &ops, 4096 loop_containing_stmt (stmt)) 4097 && has_single_use (lhs)) 4098 { 4099 operand_entry *oe = operand_entry_pool.allocate (); 4100 oe->op = lhs; 4101 oe->rank = code; 4102 oe->id = 0; 4103 oe->count = 1; 4104 ops.safe_push (oe); 4105 bb_ent.last_idx++; 4106 bb_ent.op = lhs; 4107 } 4108 else 4109 { 4110 bb_ent.last_idx = ops.length (); 4111 bb_ent.op = rhs; 4112 } 4113 bbinfo.safe_push (bb_ent); 4114 continue; 4115 } 4116 /* Otherwise stmt is GIMPLE_COND. */ 4117 code = gimple_cond_code (stmt); 4118 lhs = gimple_cond_lhs (stmt); 4119 rhs = gimple_cond_rhs (stmt); 4120 if (TREE_CODE (lhs) == SSA_NAME 4121 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4122 && ((code != EQ_EXPR && code != NE_EXPR) 4123 || rhs != boolean_false_node 4124 /* Either push into ops the individual bitwise 4125 or resp. and operands, depending on which 4126 edge is other_bb. */ 4127 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 4128 ^ (code == EQ_EXPR)) 4129 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 4130 loop_containing_stmt (stmt)))) 4131 { 4132 /* Or push the GIMPLE_COND stmt itself. */ 4133 operand_entry *oe = operand_entry_pool.allocate (); 4134 4135 oe->op = NULL; 4136 oe->rank = (e->flags & EDGE_TRUE_VALUE) 4137 ? BIT_IOR_EXPR : BIT_AND_EXPR; 4138 /* oe->op = NULL signs that there is no SSA_NAME 4139 for the range test, and oe->id instead is the 4140 basic block number, at which's end the GIMPLE_COND 4141 is. */ 4142 oe->id = bb->index; 4143 oe->count = 1; 4144 oe->stmt_to_insert = NULL; 4145 ops.safe_push (oe); 4146 bb_ent.op = NULL; 4147 bb_ent.last_idx++; 4148 } 4149 else if (ops.length () > bb_ent.first_idx) 4150 { 4151 bb_ent.op = lhs; 4152 bb_ent.last_idx = ops.length (); 4153 } 4154 bbinfo.safe_push (bb_ent); 4155 if (bb == first_bb) 4156 break; 4157 } 4158 if (ops.length () > 1) 4159 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 4160 if (any_changes) 4161 { 4162 unsigned int idx, max_idx = 0; 4163 /* update_ops relies on has_single_use predicates returning the 4164 same values as it did during get_ops earlier. Additionally it 4165 never removes statements, only adds new ones and it should walk 4166 from the single imm use and check the predicate already before 4167 making those changes. 4168 On the other side, the handling of GIMPLE_COND directly can turn 4169 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4170 it needs to be done in a separate loop afterwards. */ 4171 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4172 { 4173 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4174 && bbinfo[idx].op != NULL_TREE) 4175 { 4176 tree new_op; 4177 4178 max_idx = idx; 4179 stmt = last_stmt (bb); 4180 new_op = update_ops (bbinfo[idx].op, 4181 (enum tree_code) 4182 ops[bbinfo[idx].first_idx]->rank, 4183 ops, &bbinfo[idx].first_idx, 4184 loop_containing_stmt (stmt)); 4185 if (new_op == NULL_TREE) 4186 { 4187 gcc_assert (bb == last_bb); 4188 new_op = ops[bbinfo[idx].first_idx++]->op; 4189 } 4190 if (bbinfo[idx].op != new_op) 4191 { 4192 imm_use_iterator iter; 4193 use_operand_p use_p; 4194 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4195 4196 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 4197 if (is_gimple_debug (use_stmt)) 4198 continue; 4199 else if (gimple_code (use_stmt) == GIMPLE_COND 4200 || gimple_code (use_stmt) == GIMPLE_PHI) 4201 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4202 SET_USE (use_p, new_op); 4203 else if ((is_gimple_assign (use_stmt) 4204 && (TREE_CODE_CLASS 4205 (gimple_assign_rhs_code (use_stmt)) 4206 == tcc_comparison))) 4207 cast_or_tcc_cmp_stmt = use_stmt; 4208 else if (gimple_assign_cast_p (use_stmt)) 4209 cast_or_tcc_cmp_stmt = use_stmt; 4210 else 4211 gcc_unreachable (); 4212 4213 if (cast_or_tcc_cmp_stmt) 4214 { 4215 gcc_assert (bb == last_bb); 4216 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 4217 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 4218 enum tree_code rhs_code 4219 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 4220 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 4221 : CONVERT_EXPR; 4222 gassign *g; 4223 if (is_gimple_min_invariant (new_op)) 4224 { 4225 new_op = fold_convert (TREE_TYPE (lhs), new_op); 4226 g = gimple_build_assign (new_lhs, new_op); 4227 } 4228 else 4229 g = gimple_build_assign (new_lhs, rhs_code, new_op); 4230 gimple_stmt_iterator gsi 4231 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 4232 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 4233 gimple_set_visited (g, true); 4234 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4235 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 4236 if (is_gimple_debug (use_stmt)) 4237 continue; 4238 else if (gimple_code (use_stmt) == GIMPLE_COND 4239 || gimple_code (use_stmt) == GIMPLE_PHI) 4240 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4241 SET_USE (use_p, new_lhs); 4242 else 4243 gcc_unreachable (); 4244 } 4245 } 4246 } 4247 if (bb == first_bb) 4248 break; 4249 } 4250 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4251 { 4252 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4253 && bbinfo[idx].op == NULL_TREE 4254 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 4255 { 4256 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 4257 4258 if (idx > max_idx) 4259 max_idx = idx; 4260 4261 /* If we collapse the conditional to a true/false 4262 condition, then bubble that knowledge up to our caller. */ 4263 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 4264 { 4265 gimple_cond_make_false (cond_stmt); 4266 cfg_cleanup_needed = true; 4267 } 4268 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 4269 { 4270 gimple_cond_make_true (cond_stmt); 4271 cfg_cleanup_needed = true; 4272 } 4273 else 4274 { 4275 gimple_cond_set_code (cond_stmt, NE_EXPR); 4276 gimple_cond_set_lhs (cond_stmt, 4277 ops[bbinfo[idx].first_idx]->op); 4278 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 4279 } 4280 update_stmt (cond_stmt); 4281 } 4282 if (bb == first_bb) 4283 break; 4284 } 4285 4286 /* The above changes could result in basic blocks after the first 4287 modified one, up to and including last_bb, to be executed even if 4288 they would not be in the original program. If the value ranges of 4289 assignment lhs' in those bbs were dependent on the conditions 4290 guarding those basic blocks which now can change, the VRs might 4291 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 4292 are only used within the same bb, it should be not a big deal if 4293 we just reset all the VRs in those bbs. See PR68671. */ 4294 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 4295 reset_flow_sensitive_info_in_bb (bb); 4296 } 4297 return cfg_cleanup_needed; 4298 } 4299 4300 /* Return true if OPERAND is defined by a PHI node which uses the LHS 4301 of STMT in it's operands. This is also known as a "destructive 4302 update" operation. */ 4303 4304 static bool 4305 is_phi_for_stmt (gimple *stmt, tree operand) 4306 { 4307 gimple *def_stmt; 4308 gphi *def_phi; 4309 tree lhs; 4310 use_operand_p arg_p; 4311 ssa_op_iter i; 4312 4313 if (TREE_CODE (operand) != SSA_NAME) 4314 return false; 4315 4316 lhs = gimple_assign_lhs (stmt); 4317 4318 def_stmt = SSA_NAME_DEF_STMT (operand); 4319 def_phi = dyn_cast <gphi *> (def_stmt); 4320 if (!def_phi) 4321 return false; 4322 4323 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 4324 if (lhs == USE_FROM_PTR (arg_p)) 4325 return true; 4326 return false; 4327 } 4328 4329 /* Remove def stmt of VAR if VAR has zero uses and recurse 4330 on rhs1 operand if so. */ 4331 4332 static void 4333 remove_visited_stmt_chain (tree var) 4334 { 4335 gimple *stmt; 4336 gimple_stmt_iterator gsi; 4337 4338 while (1) 4339 { 4340 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 4341 return; 4342 stmt = SSA_NAME_DEF_STMT (var); 4343 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 4344 { 4345 var = gimple_assign_rhs1 (stmt); 4346 gsi = gsi_for_stmt (stmt); 4347 reassoc_remove_stmt (&gsi); 4348 release_defs (stmt); 4349 } 4350 else 4351 return; 4352 } 4353 } 4354 4355 /* This function checks three consequtive operands in 4356 passed operands vector OPS starting from OPINDEX and 4357 swaps two operands if it is profitable for binary operation 4358 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 4359 4360 We pair ops with the same rank if possible. 4361 4362 The alternative we try is to see if STMT is a destructive 4363 update style statement, which is like: 4364 b = phi (a, ...) 4365 a = c + b; 4366 In that case, we want to use the destructive update form to 4367 expose the possible vectorizer sum reduction opportunity. 4368 In that case, the third operand will be the phi node. This 4369 check is not performed if STMT is null. 4370 4371 We could, of course, try to be better as noted above, and do a 4372 lot of work to try to find these opportunities in >3 operand 4373 cases, but it is unlikely to be worth it. */ 4374 4375 static void 4376 swap_ops_for_binary_stmt (vec<operand_entry *> ops, 4377 unsigned int opindex, gimple *stmt) 4378 { 4379 operand_entry *oe1, *oe2, *oe3; 4380 4381 oe1 = ops[opindex]; 4382 oe2 = ops[opindex + 1]; 4383 oe3 = ops[opindex + 2]; 4384 4385 if ((oe1->rank == oe2->rank 4386 && oe2->rank != oe3->rank) 4387 || (stmt && is_phi_for_stmt (stmt, oe3->op) 4388 && !is_phi_for_stmt (stmt, oe1->op) 4389 && !is_phi_for_stmt (stmt, oe2->op))) 4390 std::swap (*oe1, *oe3); 4391 else if ((oe1->rank == oe3->rank 4392 && oe2->rank != oe3->rank) 4393 || (stmt && is_phi_for_stmt (stmt, oe2->op) 4394 && !is_phi_for_stmt (stmt, oe1->op) 4395 && !is_phi_for_stmt (stmt, oe3->op))) 4396 std::swap (*oe1, *oe2); 4397 } 4398 4399 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 4400 two definitions, otherwise return STMT. */ 4401 4402 static inline gimple * 4403 find_insert_point (gimple *stmt, tree rhs1, tree rhs2) 4404 { 4405 if (TREE_CODE (rhs1) == SSA_NAME 4406 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 4407 stmt = SSA_NAME_DEF_STMT (rhs1); 4408 if (TREE_CODE (rhs2) == SSA_NAME 4409 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 4410 stmt = SSA_NAME_DEF_STMT (rhs2); 4411 return stmt; 4412 } 4413 4414 /* If the stmt that defines operand has to be inserted, insert it 4415 before the use. */ 4416 static void 4417 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 4418 { 4419 gcc_assert (is_gimple_assign (stmt_to_insert)); 4420 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 4421 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 4422 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2); 4423 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 4424 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 4425 4426 /* If the insert point is not stmt, then insert_point would be 4427 the point where operand rhs1 or rhs2 is defined. In this case, 4428 stmt_to_insert has to be inserted afterwards. This would 4429 only happen when the stmt insertion point is flexible. */ 4430 if (stmt == insert_point) 4431 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 4432 else 4433 insert_stmt_after (stmt_to_insert, insert_point); 4434 } 4435 4436 4437 /* Recursively rewrite our linearized statements so that the operators 4438 match those in OPS[OPINDEX], putting the computation in rank 4439 order. Return new lhs. 4440 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 4441 the current stmt and during recursive invocations. 4442 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 4443 recursive invocations. */ 4444 4445 static tree 4446 rewrite_expr_tree (gimple *stmt, unsigned int opindex, 4447 vec<operand_entry *> ops, bool changed, bool next_changed) 4448 { 4449 tree rhs1 = gimple_assign_rhs1 (stmt); 4450 tree rhs2 = gimple_assign_rhs2 (stmt); 4451 tree lhs = gimple_assign_lhs (stmt); 4452 operand_entry *oe; 4453 4454 /* The final recursion case for this function is that you have 4455 exactly two operations left. 4456 If we had exactly one op in the entire list to start with, we 4457 would have never called this function, and the tail recursion 4458 rewrites them one at a time. */ 4459 if (opindex + 2 == ops.length ()) 4460 { 4461 operand_entry *oe1, *oe2; 4462 4463 oe1 = ops[opindex]; 4464 oe2 = ops[opindex + 1]; 4465 4466 if (rhs1 != oe1->op || rhs2 != oe2->op) 4467 { 4468 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4469 unsigned int uid = gimple_uid (stmt); 4470 4471 if (dump_file && (dump_flags & TDF_DETAILS)) 4472 { 4473 fprintf (dump_file, "Transforming "); 4474 print_gimple_stmt (dump_file, stmt, 0); 4475 } 4476 4477 /* If the stmt that defines operand has to be inserted, insert it 4478 before the use. */ 4479 if (oe1->stmt_to_insert) 4480 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 4481 if (oe2->stmt_to_insert) 4482 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 4483 /* Even when changed is false, reassociation could have e.g. removed 4484 some redundant operations, so unless we are just swapping the 4485 arguments or unless there is no change at all (then we just 4486 return lhs), force creation of a new SSA_NAME. */ 4487 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 4488 { 4489 gimple *insert_point 4490 = find_insert_point (stmt, oe1->op, oe2->op); 4491 lhs = make_ssa_name (TREE_TYPE (lhs)); 4492 stmt 4493 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4494 oe1->op, oe2->op); 4495 gimple_set_uid (stmt, uid); 4496 gimple_set_visited (stmt, true); 4497 if (insert_point == gsi_stmt (gsi)) 4498 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4499 else 4500 insert_stmt_after (stmt, insert_point); 4501 } 4502 else 4503 { 4504 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 4505 == stmt); 4506 gimple_assign_set_rhs1 (stmt, oe1->op); 4507 gimple_assign_set_rhs2 (stmt, oe2->op); 4508 update_stmt (stmt); 4509 } 4510 4511 if (rhs1 != oe1->op && rhs1 != oe2->op) 4512 remove_visited_stmt_chain (rhs1); 4513 4514 if (dump_file && (dump_flags & TDF_DETAILS)) 4515 { 4516 fprintf (dump_file, " into "); 4517 print_gimple_stmt (dump_file, stmt, 0); 4518 } 4519 } 4520 return lhs; 4521 } 4522 4523 /* If we hit here, we should have 3 or more ops left. */ 4524 gcc_assert (opindex + 2 < ops.length ()); 4525 4526 /* Rewrite the next operator. */ 4527 oe = ops[opindex]; 4528 4529 /* If the stmt that defines operand has to be inserted, insert it 4530 before the use. */ 4531 if (oe->stmt_to_insert) 4532 insert_stmt_before_use (stmt, oe->stmt_to_insert); 4533 4534 /* Recurse on the LHS of the binary operator, which is guaranteed to 4535 be the non-leaf side. */ 4536 tree new_rhs1 4537 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 4538 changed || oe->op != rhs2 || next_changed, 4539 false); 4540 4541 if (oe->op != rhs2 || new_rhs1 != rhs1) 4542 { 4543 if (dump_file && (dump_flags & TDF_DETAILS)) 4544 { 4545 fprintf (dump_file, "Transforming "); 4546 print_gimple_stmt (dump_file, stmt, 0); 4547 } 4548 4549 /* If changed is false, this is either opindex == 0 4550 or all outer rhs2's were equal to corresponding oe->op, 4551 and powi_result is NULL. 4552 That means lhs is equivalent before and after reassociation. 4553 Otherwise ensure the old lhs SSA_NAME is not reused and 4554 create a new stmt as well, so that any debug stmts will be 4555 properly adjusted. */ 4556 if (changed) 4557 { 4558 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4559 unsigned int uid = gimple_uid (stmt); 4560 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); 4561 4562 lhs = make_ssa_name (TREE_TYPE (lhs)); 4563 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4564 new_rhs1, oe->op); 4565 gimple_set_uid (stmt, uid); 4566 gimple_set_visited (stmt, true); 4567 if (insert_point == gsi_stmt (gsi)) 4568 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4569 else 4570 insert_stmt_after (stmt, insert_point); 4571 } 4572 else 4573 { 4574 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 4575 == stmt); 4576 gimple_assign_set_rhs1 (stmt, new_rhs1); 4577 gimple_assign_set_rhs2 (stmt, oe->op); 4578 update_stmt (stmt); 4579 } 4580 4581 if (dump_file && (dump_flags & TDF_DETAILS)) 4582 { 4583 fprintf (dump_file, " into "); 4584 print_gimple_stmt (dump_file, stmt, 0); 4585 } 4586 } 4587 return lhs; 4588 } 4589 4590 /* Find out how many cycles we need to compute statements chain. 4591 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 4592 maximum number of independent statements we may execute per cycle. */ 4593 4594 static int 4595 get_required_cycles (int ops_num, int cpu_width) 4596 { 4597 int res; 4598 int elog; 4599 unsigned int rest; 4600 4601 /* While we have more than 2 * cpu_width operands 4602 we may reduce number of operands by cpu_width 4603 per cycle. */ 4604 res = ops_num / (2 * cpu_width); 4605 4606 /* Remained operands count may be reduced twice per cycle 4607 until we have only one operand. */ 4608 rest = (unsigned)(ops_num - res * cpu_width); 4609 elog = exact_log2 (rest); 4610 if (elog >= 0) 4611 res += elog; 4612 else 4613 res += floor_log2 (rest) + 1; 4614 4615 return res; 4616 } 4617 4618 /* Returns an optimal number of registers to use for computation of 4619 given statements. */ 4620 4621 static int 4622 get_reassociation_width (int ops_num, enum tree_code opc, 4623 machine_mode mode) 4624 { 4625 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 4626 int width; 4627 int width_min; 4628 int cycles_best; 4629 4630 if (param_width > 0) 4631 width = param_width; 4632 else 4633 width = targetm.sched.reassociation_width (opc, mode); 4634 4635 if (width == 1) 4636 return width; 4637 4638 /* Get the minimal time required for sequence computation. */ 4639 cycles_best = get_required_cycles (ops_num, width); 4640 4641 /* Check if we may use less width and still compute sequence for 4642 the same time. It will allow us to reduce registers usage. 4643 get_required_cycles is monotonically increasing with lower width 4644 so we can perform a binary search for the minimal width that still 4645 results in the optimal cycle count. */ 4646 width_min = 1; 4647 while (width > width_min) 4648 { 4649 int width_mid = (width + width_min) / 2; 4650 4651 if (get_required_cycles (ops_num, width_mid) == cycles_best) 4652 width = width_mid; 4653 else if (width_min < width_mid) 4654 width_min = width_mid; 4655 else 4656 break; 4657 } 4658 4659 return width; 4660 } 4661 4662 /* Recursively rewrite our linearized statements so that the operators 4663 match those in OPS[OPINDEX], putting the computation in rank 4664 order and trying to allow operations to be executed in 4665 parallel. */ 4666 4667 static void 4668 rewrite_expr_tree_parallel (gassign *stmt, int width, 4669 vec<operand_entry *> ops) 4670 { 4671 enum tree_code opcode = gimple_assign_rhs_code (stmt); 4672 int op_num = ops.length (); 4673 gcc_assert (op_num > 0); 4674 int stmt_num = op_num - 1; 4675 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 4676 int op_index = op_num - 1; 4677 int stmt_index = 0; 4678 int ready_stmts_end = 0; 4679 int i = 0; 4680 gimple *stmt1 = NULL, *stmt2 = NULL; 4681 tree last_rhs1 = gimple_assign_rhs1 (stmt); 4682 4683 /* We start expression rewriting from the top statements. 4684 So, in this loop we create a full list of statements 4685 we will work with. */ 4686 stmts[stmt_num - 1] = stmt; 4687 for (i = stmt_num - 2; i >= 0; i--) 4688 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 4689 4690 for (i = 0; i < stmt_num; i++) 4691 { 4692 tree op1, op2; 4693 4694 /* Determine whether we should use results of 4695 already handled statements or not. */ 4696 if (ready_stmts_end == 0 4697 && (i - stmt_index >= width || op_index < 1)) 4698 ready_stmts_end = i; 4699 4700 /* Now we choose operands for the next statement. Non zero 4701 value in ready_stmts_end means here that we should use 4702 the result of already generated statements as new operand. */ 4703 if (ready_stmts_end > 0) 4704 { 4705 op1 = gimple_assign_lhs (stmts[stmt_index++]); 4706 if (ready_stmts_end > stmt_index) 4707 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4708 else if (op_index >= 0) 4709 { 4710 operand_entry *oe = ops[op_index--]; 4711 stmt2 = oe->stmt_to_insert; 4712 op2 = oe->op; 4713 } 4714 else 4715 { 4716 gcc_assert (stmt_index < i); 4717 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4718 } 4719 4720 if (stmt_index >= ready_stmts_end) 4721 ready_stmts_end = 0; 4722 } 4723 else 4724 { 4725 if (op_index > 1) 4726 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 4727 operand_entry *oe2 = ops[op_index--]; 4728 operand_entry *oe1 = ops[op_index--]; 4729 op2 = oe2->op; 4730 stmt2 = oe2->stmt_to_insert; 4731 op1 = oe1->op; 4732 stmt1 = oe1->stmt_to_insert; 4733 } 4734 4735 /* If we emit the last statement then we should put 4736 operands into the last statement. It will also 4737 break the loop. */ 4738 if (op_index < 0 && stmt_index == i) 4739 i = stmt_num - 1; 4740 4741 if (dump_file && (dump_flags & TDF_DETAILS)) 4742 { 4743 fprintf (dump_file, "Transforming "); 4744 print_gimple_stmt (dump_file, stmts[i], 0); 4745 } 4746 4747 /* If the stmt that defines operand has to be inserted, insert it 4748 before the use. */ 4749 if (stmt1) 4750 insert_stmt_before_use (stmts[i], stmt1); 4751 if (stmt2) 4752 insert_stmt_before_use (stmts[i], stmt2); 4753 stmt1 = stmt2 = NULL; 4754 4755 /* We keep original statement only for the last one. All 4756 others are recreated. */ 4757 if (i == stmt_num - 1) 4758 { 4759 gimple_assign_set_rhs1 (stmts[i], op1); 4760 gimple_assign_set_rhs2 (stmts[i], op2); 4761 update_stmt (stmts[i]); 4762 } 4763 else 4764 { 4765 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 4766 } 4767 if (dump_file && (dump_flags & TDF_DETAILS)) 4768 { 4769 fprintf (dump_file, " into "); 4770 print_gimple_stmt (dump_file, stmts[i], 0); 4771 } 4772 } 4773 4774 remove_visited_stmt_chain (last_rhs1); 4775 } 4776 4777 /* Transform STMT, which is really (A +B) + (C + D) into the left 4778 linear form, ((A+B)+C)+D. 4779 Recurse on D if necessary. */ 4780 4781 static void 4782 linearize_expr (gimple *stmt) 4783 { 4784 gimple_stmt_iterator gsi; 4785 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 4786 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4787 gimple *oldbinrhs = binrhs; 4788 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4789 gimple *newbinrhs = NULL; 4790 struct loop *loop = loop_containing_stmt (stmt); 4791 tree lhs = gimple_assign_lhs (stmt); 4792 4793 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 4794 && is_reassociable_op (binrhs, rhscode, loop)); 4795 4796 gsi = gsi_for_stmt (stmt); 4797 4798 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 4799 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 4800 gimple_assign_rhs_code (binrhs), 4801 gimple_assign_lhs (binlhs), 4802 gimple_assign_rhs2 (binrhs)); 4803 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 4804 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 4805 gimple_set_uid (binrhs, gimple_uid (stmt)); 4806 4807 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 4808 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4809 4810 if (dump_file && (dump_flags & TDF_DETAILS)) 4811 { 4812 fprintf (dump_file, "Linearized: "); 4813 print_gimple_stmt (dump_file, stmt, 0); 4814 } 4815 4816 reassociate_stats.linearized++; 4817 update_stmt (stmt); 4818 4819 gsi = gsi_for_stmt (oldbinrhs); 4820 reassoc_remove_stmt (&gsi); 4821 release_defs (oldbinrhs); 4822 4823 gimple_set_visited (stmt, true); 4824 gimple_set_visited (binlhs, true); 4825 gimple_set_visited (binrhs, true); 4826 4827 /* Tail recurse on the new rhs if it still needs reassociation. */ 4828 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 4829 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 4830 want to change the algorithm while converting to tuples. */ 4831 linearize_expr (stmt); 4832 } 4833 4834 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 4835 it. Otherwise, return NULL. */ 4836 4837 static gimple * 4838 get_single_immediate_use (tree lhs) 4839 { 4840 use_operand_p immuse; 4841 gimple *immusestmt; 4842 4843 if (TREE_CODE (lhs) == SSA_NAME 4844 && single_imm_use (lhs, &immuse, &immusestmt) 4845 && is_gimple_assign (immusestmt)) 4846 return immusestmt; 4847 4848 return NULL; 4849 } 4850 4851 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 4852 representing the negated value. Insertions of any necessary 4853 instructions go before GSI. 4854 This function is recursive in that, if you hand it "a_5" as the 4855 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 4856 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 4857 4858 static tree 4859 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 4860 { 4861 gimple *negatedefstmt = NULL; 4862 tree resultofnegate; 4863 gimple_stmt_iterator gsi; 4864 unsigned int uid; 4865 4866 /* If we are trying to negate a name, defined by an add, negate the 4867 add operands instead. */ 4868 if (TREE_CODE (tonegate) == SSA_NAME) 4869 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 4870 if (TREE_CODE (tonegate) == SSA_NAME 4871 && is_gimple_assign (negatedefstmt) 4872 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 4873 && has_single_use (gimple_assign_lhs (negatedefstmt)) 4874 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 4875 { 4876 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 4877 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 4878 tree lhs = gimple_assign_lhs (negatedefstmt); 4879 gimple *g; 4880 4881 gsi = gsi_for_stmt (negatedefstmt); 4882 rhs1 = negate_value (rhs1, &gsi); 4883 4884 gsi = gsi_for_stmt (negatedefstmt); 4885 rhs2 = negate_value (rhs2, &gsi); 4886 4887 gsi = gsi_for_stmt (negatedefstmt); 4888 lhs = make_ssa_name (TREE_TYPE (lhs)); 4889 gimple_set_visited (negatedefstmt, true); 4890 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 4891 gimple_set_uid (g, gimple_uid (negatedefstmt)); 4892 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4893 return lhs; 4894 } 4895 4896 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 4897 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 4898 NULL_TREE, true, GSI_SAME_STMT); 4899 gsi = *gsip; 4900 uid = gimple_uid (gsi_stmt (gsi)); 4901 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 4902 { 4903 gimple *stmt = gsi_stmt (gsi); 4904 if (gimple_uid (stmt) != 0) 4905 break; 4906 gimple_set_uid (stmt, uid); 4907 } 4908 return resultofnegate; 4909 } 4910 4911 /* Return true if we should break up the subtract in STMT into an add 4912 with negate. This is true when we the subtract operands are really 4913 adds, or the subtract itself is used in an add expression. In 4914 either case, breaking up the subtract into an add with negate 4915 exposes the adds to reassociation. */ 4916 4917 static bool 4918 should_break_up_subtract (gimple *stmt) 4919 { 4920 tree lhs = gimple_assign_lhs (stmt); 4921 tree binlhs = gimple_assign_rhs1 (stmt); 4922 tree binrhs = gimple_assign_rhs2 (stmt); 4923 gimple *immusestmt; 4924 struct loop *loop = loop_containing_stmt (stmt); 4925 4926 if (TREE_CODE (binlhs) == SSA_NAME 4927 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4928 return true; 4929 4930 if (TREE_CODE (binrhs) == SSA_NAME 4931 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4932 return true; 4933 4934 if (TREE_CODE (lhs) == SSA_NAME 4935 && (immusestmt = get_single_immediate_use (lhs)) 4936 && is_gimple_assign (immusestmt) 4937 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4938 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR 4939 && gimple_assign_rhs1 (immusestmt) == lhs) 4940 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4941 return true; 4942 return false; 4943 } 4944 4945 /* Transform STMT from A - B into A + -B. */ 4946 4947 static void 4948 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 4949 { 4950 tree rhs1 = gimple_assign_rhs1 (stmt); 4951 tree rhs2 = gimple_assign_rhs2 (stmt); 4952 4953 if (dump_file && (dump_flags & TDF_DETAILS)) 4954 { 4955 fprintf (dump_file, "Breaking up subtract "); 4956 print_gimple_stmt (dump_file, stmt, 0); 4957 } 4958 4959 rhs2 = negate_value (rhs2, gsip); 4960 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 4961 update_stmt (stmt); 4962 } 4963 4964 /* Determine whether STMT is a builtin call that raises an SSA name 4965 to an integer power and has only one use. If so, and this is early 4966 reassociation and unsafe math optimizations are permitted, place 4967 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 4968 If any of these conditions does not hold, return FALSE. */ 4969 4970 static bool 4971 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 4972 { 4973 tree arg1; 4974 REAL_VALUE_TYPE c, cint; 4975 4976 switch (gimple_call_combined_fn (stmt)) 4977 { 4978 CASE_CFN_POW: 4979 if (flag_errno_math) 4980 return false; 4981 4982 *base = gimple_call_arg (stmt, 0); 4983 arg1 = gimple_call_arg (stmt, 1); 4984 4985 if (TREE_CODE (arg1) != REAL_CST) 4986 return false; 4987 4988 c = TREE_REAL_CST (arg1); 4989 4990 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 4991 return false; 4992 4993 *exponent = real_to_integer (&c); 4994 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 4995 if (!real_identical (&c, &cint)) 4996 return false; 4997 4998 break; 4999 5000 CASE_CFN_POWI: 5001 *base = gimple_call_arg (stmt, 0); 5002 arg1 = gimple_call_arg (stmt, 1); 5003 5004 if (!tree_fits_shwi_p (arg1)) 5005 return false; 5006 5007 *exponent = tree_to_shwi (arg1); 5008 break; 5009 5010 default: 5011 return false; 5012 } 5013 5014 /* Expanding negative exponents is generally unproductive, so we don't 5015 complicate matters with those. Exponents of zero and one should 5016 have been handled by expression folding. */ 5017 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 5018 return false; 5019 5020 return true; 5021 } 5022 5023 /* Try to derive and add operand entry for OP to *OPS. Return false if 5024 unsuccessful. */ 5025 5026 static bool 5027 try_special_add_to_ops (vec<operand_entry *> *ops, 5028 enum tree_code code, 5029 tree op, gimple* def_stmt) 5030 { 5031 tree base = NULL_TREE; 5032 HOST_WIDE_INT exponent = 0; 5033 5034 if (TREE_CODE (op) != SSA_NAME 5035 || ! has_single_use (op)) 5036 return false; 5037 5038 if (code == MULT_EXPR 5039 && reassoc_insert_powi_p 5040 && flag_unsafe_math_optimizations 5041 && is_gimple_call (def_stmt) 5042 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 5043 { 5044 add_repeat_to_ops_vec (ops, base, exponent); 5045 gimple_set_visited (def_stmt, true); 5046 return true; 5047 } 5048 else if (code == MULT_EXPR 5049 && is_gimple_assign (def_stmt) 5050 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 5051 && !HONOR_SNANS (TREE_TYPE (op)) 5052 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 5053 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) 5054 { 5055 tree rhs1 = gimple_assign_rhs1 (def_stmt); 5056 tree cst = build_minus_one_cst (TREE_TYPE (op)); 5057 add_to_ops_vec (ops, rhs1); 5058 add_to_ops_vec (ops, cst); 5059 gimple_set_visited (def_stmt, true); 5060 return true; 5061 } 5062 5063 return false; 5064 } 5065 5066 /* Recursively linearize a binary expression that is the RHS of STMT. 5067 Place the operands of the expression tree in the vector named OPS. */ 5068 5069 static void 5070 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 5071 bool is_associative, bool set_visited) 5072 { 5073 tree binlhs = gimple_assign_rhs1 (stmt); 5074 tree binrhs = gimple_assign_rhs2 (stmt); 5075 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5076 bool binlhsisreassoc = false; 5077 bool binrhsisreassoc = false; 5078 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5079 struct loop *loop = loop_containing_stmt (stmt); 5080 5081 if (set_visited) 5082 gimple_set_visited (stmt, true); 5083 5084 if (TREE_CODE (binlhs) == SSA_NAME) 5085 { 5086 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 5087 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 5088 && !stmt_could_throw_p (binlhsdef)); 5089 } 5090 5091 if (TREE_CODE (binrhs) == SSA_NAME) 5092 { 5093 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 5094 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 5095 && !stmt_could_throw_p (binrhsdef)); 5096 } 5097 5098 /* If the LHS is not reassociable, but the RHS is, we need to swap 5099 them. If neither is reassociable, there is nothing we can do, so 5100 just put them in the ops vector. If the LHS is reassociable, 5101 linearize it. If both are reassociable, then linearize the RHS 5102 and the LHS. */ 5103 5104 if (!binlhsisreassoc) 5105 { 5106 /* If this is not a associative operation like division, give up. */ 5107 if (!is_associative) 5108 { 5109 add_to_ops_vec (ops, binrhs); 5110 return; 5111 } 5112 5113 if (!binrhsisreassoc) 5114 { 5115 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5116 add_to_ops_vec (ops, binrhs); 5117 5118 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 5119 add_to_ops_vec (ops, binlhs); 5120 5121 return; 5122 } 5123 5124 if (dump_file && (dump_flags & TDF_DETAILS)) 5125 { 5126 fprintf (dump_file, "swapping operands of "); 5127 print_gimple_stmt (dump_file, stmt, 0); 5128 } 5129 5130 swap_ssa_operands (stmt, 5131 gimple_assign_rhs1_ptr (stmt), 5132 gimple_assign_rhs2_ptr (stmt)); 5133 update_stmt (stmt); 5134 5135 if (dump_file && (dump_flags & TDF_DETAILS)) 5136 { 5137 fprintf (dump_file, " is now "); 5138 print_gimple_stmt (dump_file, stmt, 0); 5139 } 5140 5141 /* We want to make it so the lhs is always the reassociative op, 5142 so swap. */ 5143 std::swap (binlhs, binrhs); 5144 } 5145 else if (binrhsisreassoc) 5146 { 5147 linearize_expr (stmt); 5148 binlhs = gimple_assign_rhs1 (stmt); 5149 binrhs = gimple_assign_rhs2 (stmt); 5150 } 5151 5152 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 5153 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 5154 rhscode, loop)); 5155 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 5156 is_associative, set_visited); 5157 5158 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5159 add_to_ops_vec (ops, binrhs); 5160 } 5161 5162 /* Repropagate the negates back into subtracts, since no other pass 5163 currently does it. */ 5164 5165 static void 5166 repropagate_negates (void) 5167 { 5168 unsigned int i = 0; 5169 tree negate; 5170 5171 FOR_EACH_VEC_ELT (plus_negates, i, negate) 5172 { 5173 gimple *user = get_single_immediate_use (negate); 5174 5175 if (!user || !is_gimple_assign (user)) 5176 continue; 5177 5178 /* The negate operand can be either operand of a PLUS_EXPR 5179 (it can be the LHS if the RHS is a constant for example). 5180 5181 Force the negate operand to the RHS of the PLUS_EXPR, then 5182 transform the PLUS_EXPR into a MINUS_EXPR. */ 5183 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 5184 { 5185 /* If the negated operand appears on the LHS of the 5186 PLUS_EXPR, exchange the operands of the PLUS_EXPR 5187 to force the negated operand to the RHS of the PLUS_EXPR. */ 5188 if (gimple_assign_rhs1 (user) == negate) 5189 { 5190 swap_ssa_operands (user, 5191 gimple_assign_rhs1_ptr (user), 5192 gimple_assign_rhs2_ptr (user)); 5193 } 5194 5195 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 5196 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 5197 if (gimple_assign_rhs2 (user) == negate) 5198 { 5199 tree rhs1 = gimple_assign_rhs1 (user); 5200 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 5201 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5202 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 5203 update_stmt (user); 5204 } 5205 } 5206 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 5207 { 5208 if (gimple_assign_rhs1 (user) == negate) 5209 { 5210 /* We have 5211 x = -a 5212 y = x - b 5213 which we transform into 5214 x = a + b 5215 y = -x . 5216 This pushes down the negate which we possibly can merge 5217 into some other operation, hence insert it into the 5218 plus_negates vector. */ 5219 gimple *feed = SSA_NAME_DEF_STMT (negate); 5220 tree a = gimple_assign_rhs1 (feed); 5221 tree b = gimple_assign_rhs2 (user); 5222 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 5223 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 5224 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 5225 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); 5226 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5227 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 5228 user = gsi_stmt (gsi2); 5229 update_stmt (user); 5230 reassoc_remove_stmt (&gsi); 5231 release_defs (feed); 5232 plus_negates.safe_push (gimple_assign_lhs (user)); 5233 } 5234 else 5235 { 5236 /* Transform "x = -a; y = b - x" into "y = b + a", getting 5237 rid of one operation. */ 5238 gimple *feed = SSA_NAME_DEF_STMT (negate); 5239 tree a = gimple_assign_rhs1 (feed); 5240 tree rhs1 = gimple_assign_rhs1 (user); 5241 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5242 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 5243 update_stmt (gsi_stmt (gsi)); 5244 } 5245 } 5246 } 5247 } 5248 5249 /* Returns true if OP is of a type for which we can do reassociation. 5250 That is for integral or non-saturating fixed-point types, and for 5251 floating point type when associative-math is enabled. */ 5252 5253 static bool 5254 can_reassociate_p (tree op) 5255 { 5256 tree type = TREE_TYPE (op); 5257 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 5258 return false; 5259 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 5260 || NON_SAT_FIXED_POINT_TYPE_P (type) 5261 || (flag_associative_math && FLOAT_TYPE_P (type))) 5262 return true; 5263 return false; 5264 } 5265 5266 /* Break up subtract operations in block BB. 5267 5268 We do this top down because we don't know whether the subtract is 5269 part of a possible chain of reassociation except at the top. 5270 5271 IE given 5272 d = f + g 5273 c = a + e 5274 b = c - d 5275 q = b - r 5276 k = t - q 5277 5278 we want to break up k = t - q, but we won't until we've transformed q 5279 = b - r, which won't be broken up until we transform b = c - d. 5280 5281 En passant, clear the GIMPLE visited flag on every statement 5282 and set UIDs within each basic block. */ 5283 5284 static void 5285 break_up_subtract_bb (basic_block bb) 5286 { 5287 gimple_stmt_iterator gsi; 5288 basic_block son; 5289 unsigned int uid = 1; 5290 5291 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5292 { 5293 gimple *stmt = gsi_stmt (gsi); 5294 gimple_set_visited (stmt, false); 5295 gimple_set_uid (stmt, uid++); 5296 5297 if (!is_gimple_assign (stmt) 5298 || !can_reassociate_p (gimple_assign_lhs (stmt))) 5299 continue; 5300 5301 /* Look for simple gimple subtract operations. */ 5302 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 5303 { 5304 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 5305 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 5306 continue; 5307 5308 /* Check for a subtract used only in an addition. If this 5309 is the case, transform it into add of a negate for better 5310 reassociation. IE transform C = A-B into C = A + -B if C 5311 is only used in an addition. */ 5312 if (should_break_up_subtract (stmt)) 5313 break_up_subtract (stmt, &gsi); 5314 } 5315 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 5316 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 5317 plus_negates.safe_push (gimple_assign_lhs (stmt)); 5318 } 5319 for (son = first_dom_son (CDI_DOMINATORS, bb); 5320 son; 5321 son = next_dom_son (CDI_DOMINATORS, son)) 5322 break_up_subtract_bb (son); 5323 } 5324 5325 /* Used for repeated factor analysis. */ 5326 struct repeat_factor 5327 { 5328 /* An SSA name that occurs in a multiply chain. */ 5329 tree factor; 5330 5331 /* Cached rank of the factor. */ 5332 unsigned rank; 5333 5334 /* Number of occurrences of the factor in the chain. */ 5335 HOST_WIDE_INT count; 5336 5337 /* An SSA name representing the product of this factor and 5338 all factors appearing later in the repeated factor vector. */ 5339 tree repr; 5340 }; 5341 5342 5343 static vec<repeat_factor> repeat_factor_vec; 5344 5345 /* Used for sorting the repeat factor vector. Sort primarily by 5346 ascending occurrence count, secondarily by descending rank. */ 5347 5348 static int 5349 compare_repeat_factors (const void *x1, const void *x2) 5350 { 5351 const repeat_factor *rf1 = (const repeat_factor *) x1; 5352 const repeat_factor *rf2 = (const repeat_factor *) x2; 5353 5354 if (rf1->count != rf2->count) 5355 return rf1->count - rf2->count; 5356 5357 return rf2->rank - rf1->rank; 5358 } 5359 5360 /* Look for repeated operands in OPS in the multiply tree rooted at 5361 STMT. Replace them with an optimal sequence of multiplies and powi 5362 builtin calls, and remove the used operands from OPS. Return an 5363 SSA name representing the value of the replacement sequence. */ 5364 5365 static tree 5366 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 5367 { 5368 unsigned i, j, vec_len; 5369 int ii; 5370 operand_entry *oe; 5371 repeat_factor *rf1, *rf2; 5372 repeat_factor rfnew; 5373 tree result = NULL_TREE; 5374 tree target_ssa, iter_result; 5375 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 5376 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 5377 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5378 gimple *mul_stmt, *pow_stmt; 5379 5380 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 5381 target. */ 5382 if (!powi_fndecl) 5383 return NULL_TREE; 5384 5385 /* Allocate the repeated factor vector. */ 5386 repeat_factor_vec.create (10); 5387 5388 /* Scan the OPS vector for all SSA names in the product and build 5389 up a vector of occurrence counts for each factor. */ 5390 FOR_EACH_VEC_ELT (*ops, i, oe) 5391 { 5392 if (TREE_CODE (oe->op) == SSA_NAME) 5393 { 5394 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5395 { 5396 if (rf1->factor == oe->op) 5397 { 5398 rf1->count += oe->count; 5399 break; 5400 } 5401 } 5402 5403 if (j >= repeat_factor_vec.length ()) 5404 { 5405 rfnew.factor = oe->op; 5406 rfnew.rank = oe->rank; 5407 rfnew.count = oe->count; 5408 rfnew.repr = NULL_TREE; 5409 repeat_factor_vec.safe_push (rfnew); 5410 } 5411 } 5412 } 5413 5414 /* Sort the repeated factor vector by (a) increasing occurrence count, 5415 and (b) decreasing rank. */ 5416 repeat_factor_vec.qsort (compare_repeat_factors); 5417 5418 /* It is generally best to combine as many base factors as possible 5419 into a product before applying __builtin_powi to the result. 5420 However, the sort order chosen for the repeated factor vector 5421 allows us to cache partial results for the product of the base 5422 factors for subsequent use. When we already have a cached partial 5423 result from a previous iteration, it is best to make use of it 5424 before looking for another __builtin_pow opportunity. 5425 5426 As an example, consider x * x * y * y * y * z * z * z * z. 5427 We want to first compose the product x * y * z, raise it to the 5428 second power, then multiply this by y * z, and finally multiply 5429 by z. This can be done in 5 multiplies provided we cache y * z 5430 for use in both expressions: 5431 5432 t1 = y * z 5433 t2 = t1 * x 5434 t3 = t2 * t2 5435 t4 = t1 * t3 5436 result = t4 * z 5437 5438 If we instead ignored the cached y * z and first multiplied by 5439 the __builtin_pow opportunity z * z, we would get the inferior: 5440 5441 t1 = y * z 5442 t2 = t1 * x 5443 t3 = t2 * t2 5444 t4 = z * z 5445 t5 = t3 * t4 5446 result = t5 * y */ 5447 5448 vec_len = repeat_factor_vec.length (); 5449 5450 /* Repeatedly look for opportunities to create a builtin_powi call. */ 5451 while (true) 5452 { 5453 HOST_WIDE_INT power; 5454 5455 /* First look for the largest cached product of factors from 5456 preceding iterations. If found, create a builtin_powi for 5457 it if the minimum occurrence count for its factors is at 5458 least 2, or just use this cached product as our next 5459 multiplicand if the minimum occurrence count is 1. */ 5460 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5461 { 5462 if (rf1->repr && rf1->count > 0) 5463 break; 5464 } 5465 5466 if (j < vec_len) 5467 { 5468 power = rf1->count; 5469 5470 if (power == 1) 5471 { 5472 iter_result = rf1->repr; 5473 5474 if (dump_file && (dump_flags & TDF_DETAILS)) 5475 { 5476 unsigned elt; 5477 repeat_factor *rf; 5478 fputs ("Multiplying by cached product ", dump_file); 5479 for (elt = j; elt < vec_len; elt++) 5480 { 5481 rf = &repeat_factor_vec[elt]; 5482 print_generic_expr (dump_file, rf->factor); 5483 if (elt < vec_len - 1) 5484 fputs (" * ", dump_file); 5485 } 5486 fputs ("\n", dump_file); 5487 } 5488 } 5489 else 5490 { 5491 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5492 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5493 build_int_cst (integer_type_node, 5494 power)); 5495 gimple_call_set_lhs (pow_stmt, iter_result); 5496 gimple_set_location (pow_stmt, gimple_location (stmt)); 5497 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5498 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5499 5500 if (dump_file && (dump_flags & TDF_DETAILS)) 5501 { 5502 unsigned elt; 5503 repeat_factor *rf; 5504 fputs ("Building __builtin_pow call for cached product (", 5505 dump_file); 5506 for (elt = j; elt < vec_len; elt++) 5507 { 5508 rf = &repeat_factor_vec[elt]; 5509 print_generic_expr (dump_file, rf->factor); 5510 if (elt < vec_len - 1) 5511 fputs (" * ", dump_file); 5512 } 5513 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 5514 power); 5515 } 5516 } 5517 } 5518 else 5519 { 5520 /* Otherwise, find the first factor in the repeated factor 5521 vector whose occurrence count is at least 2. If no such 5522 factor exists, there are no builtin_powi opportunities 5523 remaining. */ 5524 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5525 { 5526 if (rf1->count >= 2) 5527 break; 5528 } 5529 5530 if (j >= vec_len) 5531 break; 5532 5533 power = rf1->count; 5534 5535 if (dump_file && (dump_flags & TDF_DETAILS)) 5536 { 5537 unsigned elt; 5538 repeat_factor *rf; 5539 fputs ("Building __builtin_pow call for (", dump_file); 5540 for (elt = j; elt < vec_len; elt++) 5541 { 5542 rf = &repeat_factor_vec[elt]; 5543 print_generic_expr (dump_file, rf->factor); 5544 if (elt < vec_len - 1) 5545 fputs (" * ", dump_file); 5546 } 5547 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 5548 } 5549 5550 reassociate_stats.pows_created++; 5551 5552 /* Visit each element of the vector in reverse order (so that 5553 high-occurrence elements are visited first, and within the 5554 same occurrence count, lower-ranked elements are visited 5555 first). Form a linear product of all elements in this order 5556 whose occurrencce count is at least that of element J. 5557 Record the SSA name representing the product of each element 5558 with all subsequent elements in the vector. */ 5559 if (j == vec_len - 1) 5560 rf1->repr = rf1->factor; 5561 else 5562 { 5563 for (ii = vec_len - 2; ii >= (int)j; ii--) 5564 { 5565 tree op1, op2; 5566 5567 rf1 = &repeat_factor_vec[ii]; 5568 rf2 = &repeat_factor_vec[ii + 1]; 5569 5570 /* Init the last factor's representative to be itself. */ 5571 if (!rf2->repr) 5572 rf2->repr = rf2->factor; 5573 5574 op1 = rf1->factor; 5575 op2 = rf2->repr; 5576 5577 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 5578 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 5579 op1, op2); 5580 gimple_set_location (mul_stmt, gimple_location (stmt)); 5581 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5582 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5583 rf1->repr = target_ssa; 5584 5585 /* Don't reprocess the multiply we just introduced. */ 5586 gimple_set_visited (mul_stmt, true); 5587 } 5588 } 5589 5590 /* Form a call to __builtin_powi for the maximum product 5591 just formed, raised to the power obtained earlier. */ 5592 rf1 = &repeat_factor_vec[j]; 5593 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5594 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5595 build_int_cst (integer_type_node, 5596 power)); 5597 gimple_call_set_lhs (pow_stmt, iter_result); 5598 gimple_set_location (pow_stmt, gimple_location (stmt)); 5599 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5600 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5601 } 5602 5603 /* If we previously formed at least one other builtin_powi call, 5604 form the product of this one and those others. */ 5605 if (result) 5606 { 5607 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5608 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 5609 result, iter_result); 5610 gimple_set_location (mul_stmt, gimple_location (stmt)); 5611 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5612 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5613 gimple_set_visited (mul_stmt, true); 5614 result = new_result; 5615 } 5616 else 5617 result = iter_result; 5618 5619 /* Decrement the occurrence count of each element in the product 5620 by the count found above, and remove this many copies of each 5621 factor from OPS. */ 5622 for (i = j; i < vec_len; i++) 5623 { 5624 unsigned k = power; 5625 unsigned n; 5626 5627 rf1 = &repeat_factor_vec[i]; 5628 rf1->count -= power; 5629 5630 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 5631 { 5632 if (oe->op == rf1->factor) 5633 { 5634 if (oe->count <= k) 5635 { 5636 ops->ordered_remove (n); 5637 k -= oe->count; 5638 5639 if (k == 0) 5640 break; 5641 } 5642 else 5643 { 5644 oe->count -= k; 5645 break; 5646 } 5647 } 5648 } 5649 } 5650 } 5651 5652 /* At this point all elements in the repeated factor vector have a 5653 remaining occurrence count of 0 or 1, and those with a count of 1 5654 don't have cached representatives. Re-sort the ops vector and 5655 clean up. */ 5656 ops->qsort (sort_by_operand_rank); 5657 repeat_factor_vec.release (); 5658 5659 /* Return the final product computed herein. Note that there may 5660 still be some elements with single occurrence count left in OPS; 5661 those will be handled by the normal reassociation logic. */ 5662 return result; 5663 } 5664 5665 /* Attempt to optimize 5666 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 5667 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 5668 5669 static void 5670 attempt_builtin_copysign (vec<operand_entry *> *ops) 5671 { 5672 operand_entry *oe; 5673 unsigned int i; 5674 unsigned int length = ops->length (); 5675 tree cst = ops->last ()->op; 5676 5677 if (length == 1 || TREE_CODE (cst) != REAL_CST) 5678 return; 5679 5680 FOR_EACH_VEC_ELT (*ops, i, oe) 5681 { 5682 if (TREE_CODE (oe->op) == SSA_NAME 5683 && has_single_use (oe->op)) 5684 { 5685 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 5686 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 5687 { 5688 tree arg0, arg1; 5689 switch (gimple_call_combined_fn (old_call)) 5690 { 5691 CASE_CFN_COPYSIGN: 5692 CASE_CFN_COPYSIGN_FN: 5693 arg0 = gimple_call_arg (old_call, 0); 5694 arg1 = gimple_call_arg (old_call, 1); 5695 /* The first argument of copysign must be a constant, 5696 otherwise there's nothing to do. */ 5697 if (TREE_CODE (arg0) == REAL_CST) 5698 { 5699 tree type = TREE_TYPE (arg0); 5700 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 5701 /* If we couldn't fold to a single constant, skip it. 5702 That happens e.g. for inexact multiplication when 5703 -frounding-math. */ 5704 if (mul == NULL_TREE) 5705 break; 5706 /* Instead of adjusting OLD_CALL, let's build a new 5707 call to not leak the LHS and prevent keeping bogus 5708 debug statements. DCE will clean up the old call. */ 5709 gcall *new_call; 5710 if (gimple_call_internal_p (old_call)) 5711 new_call = gimple_build_call_internal 5712 (IFN_COPYSIGN, 2, mul, arg1); 5713 else 5714 new_call = gimple_build_call 5715 (gimple_call_fndecl (old_call), 2, mul, arg1); 5716 tree lhs = make_ssa_name (type); 5717 gimple_call_set_lhs (new_call, lhs); 5718 gimple_set_location (new_call, 5719 gimple_location (old_call)); 5720 insert_stmt_after (new_call, old_call); 5721 /* We've used the constant, get rid of it. */ 5722 ops->pop (); 5723 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 5724 /* Handle the CST1 < 0 case by negating the result. */ 5725 if (cst1_neg) 5726 { 5727 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 5728 gimple *negate_stmt 5729 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 5730 insert_stmt_after (negate_stmt, new_call); 5731 oe->op = negrhs; 5732 } 5733 else 5734 oe->op = lhs; 5735 if (dump_file && (dump_flags & TDF_DETAILS)) 5736 { 5737 fprintf (dump_file, "Optimizing copysign: "); 5738 print_generic_expr (dump_file, cst); 5739 fprintf (dump_file, " * COPYSIGN ("); 5740 print_generic_expr (dump_file, arg0); 5741 fprintf (dump_file, ", "); 5742 print_generic_expr (dump_file, arg1); 5743 fprintf (dump_file, ") into %sCOPYSIGN (", 5744 cst1_neg ? "-" : ""); 5745 print_generic_expr (dump_file, mul); 5746 fprintf (dump_file, ", "); 5747 print_generic_expr (dump_file, arg1); 5748 fprintf (dump_file, "\n"); 5749 } 5750 return; 5751 } 5752 break; 5753 default: 5754 break; 5755 } 5756 } 5757 } 5758 } 5759 } 5760 5761 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 5762 5763 static void 5764 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 5765 { 5766 tree rhs1; 5767 5768 if (dump_file && (dump_flags & TDF_DETAILS)) 5769 { 5770 fprintf (dump_file, "Transforming "); 5771 print_gimple_stmt (dump_file, stmt, 0); 5772 } 5773 5774 rhs1 = gimple_assign_rhs1 (stmt); 5775 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 5776 update_stmt (stmt); 5777 remove_visited_stmt_chain (rhs1); 5778 5779 if (dump_file && (dump_flags & TDF_DETAILS)) 5780 { 5781 fprintf (dump_file, " into "); 5782 print_gimple_stmt (dump_file, stmt, 0); 5783 } 5784 } 5785 5786 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 5787 5788 static void 5789 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 5790 tree rhs1, tree rhs2) 5791 { 5792 if (dump_file && (dump_flags & TDF_DETAILS)) 5793 { 5794 fprintf (dump_file, "Transforming "); 5795 print_gimple_stmt (dump_file, stmt, 0); 5796 } 5797 5798 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 5799 update_stmt (gsi_stmt (*gsi)); 5800 remove_visited_stmt_chain (rhs1); 5801 5802 if (dump_file && (dump_flags & TDF_DETAILS)) 5803 { 5804 fprintf (dump_file, " into "); 5805 print_gimple_stmt (dump_file, stmt, 0); 5806 } 5807 } 5808 5809 /* Reassociate expressions in basic block BB and its post-dominator as 5810 children. 5811 5812 Bubble up return status from maybe_optimize_range_tests. */ 5813 5814 static bool 5815 reassociate_bb (basic_block bb) 5816 { 5817 gimple_stmt_iterator gsi; 5818 basic_block son; 5819 gimple *stmt = last_stmt (bb); 5820 bool cfg_cleanup_needed = false; 5821 5822 if (stmt && !gimple_visited_p (stmt)) 5823 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 5824 5825 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 5826 { 5827 stmt = gsi_stmt (gsi); 5828 5829 if (is_gimple_assign (stmt) 5830 && !stmt_could_throw_p (stmt)) 5831 { 5832 tree lhs, rhs1, rhs2; 5833 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 5834 5835 /* If this is not a gimple binary expression, there is 5836 nothing for us to do with it. */ 5837 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 5838 continue; 5839 5840 /* If this was part of an already processed statement, 5841 we don't need to touch it again. */ 5842 if (gimple_visited_p (stmt)) 5843 { 5844 /* This statement might have become dead because of previous 5845 reassociations. */ 5846 if (has_zero_uses (gimple_get_lhs (stmt))) 5847 { 5848 reassoc_remove_stmt (&gsi); 5849 release_defs (stmt); 5850 /* We might end up removing the last stmt above which 5851 places the iterator to the end of the sequence. 5852 Reset it to the last stmt in this case which might 5853 be the end of the sequence as well if we removed 5854 the last statement of the sequence. In which case 5855 we need to bail out. */ 5856 if (gsi_end_p (gsi)) 5857 { 5858 gsi = gsi_last_bb (bb); 5859 if (gsi_end_p (gsi)) 5860 break; 5861 } 5862 } 5863 continue; 5864 } 5865 5866 lhs = gimple_assign_lhs (stmt); 5867 rhs1 = gimple_assign_rhs1 (stmt); 5868 rhs2 = gimple_assign_rhs2 (stmt); 5869 5870 /* For non-bit or min/max operations we can't associate 5871 all types. Verify that here. */ 5872 if (rhs_code != BIT_IOR_EXPR 5873 && rhs_code != BIT_AND_EXPR 5874 && rhs_code != BIT_XOR_EXPR 5875 && rhs_code != MIN_EXPR 5876 && rhs_code != MAX_EXPR 5877 && (!can_reassociate_p (lhs) 5878 || !can_reassociate_p (rhs1) 5879 || !can_reassociate_p (rhs2))) 5880 continue; 5881 5882 if (associative_tree_code (rhs_code)) 5883 { 5884 auto_vec<operand_entry *> ops; 5885 tree powi_result = NULL_TREE; 5886 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 5887 5888 /* There may be no immediate uses left by the time we 5889 get here because we may have eliminated them all. */ 5890 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 5891 continue; 5892 5893 gimple_set_visited (stmt, true); 5894 linearize_expr_tree (&ops, stmt, true, true); 5895 ops.qsort (sort_by_operand_rank); 5896 int orig_len = ops.length (); 5897 optimize_ops_list (rhs_code, &ops); 5898 if (undistribute_ops_list (rhs_code, &ops, 5899 loop_containing_stmt (stmt))) 5900 { 5901 ops.qsort (sort_by_operand_rank); 5902 optimize_ops_list (rhs_code, &ops); 5903 } 5904 5905 if (rhs_code == PLUS_EXPR 5906 && transform_add_to_multiply (&ops)) 5907 ops.qsort (sort_by_operand_rank); 5908 5909 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 5910 { 5911 if (is_vector) 5912 optimize_vec_cond_expr (rhs_code, &ops); 5913 else 5914 optimize_range_tests (rhs_code, &ops, NULL); 5915 } 5916 5917 if (rhs_code == MULT_EXPR && !is_vector) 5918 { 5919 attempt_builtin_copysign (&ops); 5920 5921 if (reassoc_insert_powi_p 5922 && flag_unsafe_math_optimizations) 5923 powi_result = attempt_builtin_powi (stmt, &ops); 5924 } 5925 5926 operand_entry *last; 5927 bool negate_result = false; 5928 if (ops.length () > 1 5929 && rhs_code == MULT_EXPR) 5930 { 5931 last = ops.last (); 5932 if ((integer_minus_onep (last->op) 5933 || real_minus_onep (last->op)) 5934 && !HONOR_SNANS (TREE_TYPE (lhs)) 5935 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 5936 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 5937 { 5938 ops.pop (); 5939 negate_result = true; 5940 } 5941 } 5942 5943 tree new_lhs = lhs; 5944 /* If the operand vector is now empty, all operands were 5945 consumed by the __builtin_powi optimization. */ 5946 if (ops.length () == 0) 5947 transform_stmt_to_copy (&gsi, stmt, powi_result); 5948 else if (ops.length () == 1) 5949 { 5950 tree last_op = ops.last ()->op; 5951 5952 /* If the stmt that defines operand has to be inserted, insert it 5953 before the use. */ 5954 if (ops.last ()->stmt_to_insert) 5955 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 5956 if (powi_result) 5957 transform_stmt_to_multiply (&gsi, stmt, last_op, 5958 powi_result); 5959 else 5960 transform_stmt_to_copy (&gsi, stmt, last_op); 5961 } 5962 else 5963 { 5964 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 5965 int ops_num = ops.length (); 5966 int width = get_reassociation_width (ops_num, rhs_code, mode); 5967 5968 if (dump_file && (dump_flags & TDF_DETAILS)) 5969 fprintf (dump_file, 5970 "Width = %d was chosen for reassociation\n", width); 5971 5972 5973 /* For binary bit operations, if there are at least 3 5974 operands and the last last operand in OPS is a constant, 5975 move it to the front. This helps ensure that we generate 5976 (X & Y) & C rather than (X & C) & Y. The former will 5977 often match a canonical bit test when we get to RTL. */ 5978 if (ops.length () > 2 5979 && (rhs_code == BIT_AND_EXPR 5980 || rhs_code == BIT_IOR_EXPR 5981 || rhs_code == BIT_XOR_EXPR) 5982 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 5983 std::swap (*ops[0], *ops[ops_num - 1]); 5984 5985 if (width > 1 5986 && ops.length () > 3) 5987 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 5988 width, ops); 5989 else 5990 { 5991 /* When there are three operands left, we want 5992 to make sure the ones that get the double 5993 binary op are chosen wisely. */ 5994 int len = ops.length (); 5995 if (len >= 3) 5996 swap_ops_for_binary_stmt (ops, len - 3, stmt); 5997 5998 new_lhs = rewrite_expr_tree (stmt, 0, ops, 5999 powi_result != NULL 6000 || negate_result, 6001 len != orig_len); 6002 } 6003 6004 /* If we combined some repeated factors into a 6005 __builtin_powi call, multiply that result by the 6006 reassociated operands. */ 6007 if (powi_result) 6008 { 6009 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 6010 tree type = TREE_TYPE (lhs); 6011 tree target_ssa = make_temp_ssa_name (type, NULL, 6012 "reassocpow"); 6013 gimple_set_lhs (lhs_stmt, target_ssa); 6014 update_stmt (lhs_stmt); 6015 if (lhs != new_lhs) 6016 { 6017 target_ssa = new_lhs; 6018 new_lhs = lhs; 6019 } 6020 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 6021 powi_result, target_ssa); 6022 gimple_set_location (mul_stmt, gimple_location (stmt)); 6023 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6024 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 6025 } 6026 } 6027 6028 if (negate_result) 6029 { 6030 stmt = SSA_NAME_DEF_STMT (lhs); 6031 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 6032 gimple_set_lhs (stmt, tmp); 6033 if (lhs != new_lhs) 6034 tmp = new_lhs; 6035 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 6036 tmp); 6037 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 6038 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 6039 update_stmt (stmt); 6040 } 6041 } 6042 } 6043 } 6044 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 6045 son; 6046 son = next_dom_son (CDI_POST_DOMINATORS, son)) 6047 cfg_cleanup_needed |= reassociate_bb (son); 6048 6049 return cfg_cleanup_needed; 6050 } 6051 6052 /* Add jumps around shifts for range tests turned into bit tests. 6053 For each SSA_NAME VAR we have code like: 6054 VAR = ...; // final stmt of range comparison 6055 // bit test here...; 6056 OTHERVAR = ...; // final stmt of the bit test sequence 6057 RES = VAR | OTHERVAR; 6058 Turn the above into: 6059 VAR = ...; 6060 if (VAR != 0) 6061 goto <l3>; 6062 else 6063 goto <l2>; 6064 <l2>: 6065 // bit test here...; 6066 OTHERVAR = ...; 6067 <l3>: 6068 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 6069 6070 static void 6071 branch_fixup (void) 6072 { 6073 tree var; 6074 unsigned int i; 6075 6076 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 6077 { 6078 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 6079 gimple *use_stmt; 6080 use_operand_p use; 6081 bool ok = single_imm_use (var, &use, &use_stmt); 6082 gcc_assert (ok 6083 && is_gimple_assign (use_stmt) 6084 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 6085 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 6086 6087 basic_block cond_bb = gimple_bb (def_stmt); 6088 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 6089 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 6090 6091 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 6092 gimple *g = gimple_build_cond (NE_EXPR, var, 6093 build_zero_cst (TREE_TYPE (var)), 6094 NULL_TREE, NULL_TREE); 6095 location_t loc = gimple_location (use_stmt); 6096 gimple_set_location (g, loc); 6097 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 6098 6099 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 6100 etrue->probability = profile_probability::even (); 6101 edge efalse = find_edge (cond_bb, then_bb); 6102 efalse->flags = EDGE_FALSE_VALUE; 6103 efalse->probability -= etrue->probability; 6104 then_bb->count -= etrue->count (); 6105 6106 tree othervar = NULL_TREE; 6107 if (gimple_assign_rhs1 (use_stmt) == var) 6108 othervar = gimple_assign_rhs2 (use_stmt); 6109 else if (gimple_assign_rhs2 (use_stmt) == var) 6110 othervar = gimple_assign_rhs1 (use_stmt); 6111 else 6112 gcc_unreachable (); 6113 tree lhs = gimple_assign_lhs (use_stmt); 6114 gphi *phi = create_phi_node (lhs, merge_bb); 6115 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 6116 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 6117 gsi = gsi_for_stmt (use_stmt); 6118 gsi_remove (&gsi, true); 6119 6120 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 6121 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 6122 } 6123 reassoc_branch_fixups.release (); 6124 } 6125 6126 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 6127 void debug_ops_vector (vec<operand_entry *> ops); 6128 6129 /* Dump the operand entry vector OPS to FILE. */ 6130 6131 void 6132 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 6133 { 6134 operand_entry *oe; 6135 unsigned int i; 6136 6137 FOR_EACH_VEC_ELT (ops, i, oe) 6138 { 6139 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 6140 print_generic_expr (file, oe->op); 6141 fprintf (file, "\n"); 6142 } 6143 } 6144 6145 /* Dump the operand entry vector OPS to STDERR. */ 6146 6147 DEBUG_FUNCTION void 6148 debug_ops_vector (vec<operand_entry *> ops) 6149 { 6150 dump_ops_vector (stderr, ops); 6151 } 6152 6153 /* Bubble up return status from reassociate_bb. */ 6154 6155 static bool 6156 do_reassoc (void) 6157 { 6158 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 6159 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 6160 } 6161 6162 /* Initialize the reassociation pass. */ 6163 6164 static void 6165 init_reassoc (void) 6166 { 6167 int i; 6168 long rank = 2; 6169 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 6170 6171 /* Find the loops, so that we can prevent moving calculations in 6172 them. */ 6173 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 6174 6175 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 6176 6177 next_operand_entry_id = 0; 6178 6179 /* Reverse RPO (Reverse Post Order) will give us something where 6180 deeper loops come later. */ 6181 pre_and_rev_post_order_compute (NULL, bbs, false); 6182 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 6183 operand_rank = new hash_map<tree, long>; 6184 6185 /* Give each default definition a distinct rank. This includes 6186 parameters and the static chain. Walk backwards over all 6187 SSA names so that we get proper rank ordering according 6188 to tree_swap_operands_p. */ 6189 for (i = num_ssa_names - 1; i > 0; --i) 6190 { 6191 tree name = ssa_name (i); 6192 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 6193 insert_operand_rank (name, ++rank); 6194 } 6195 6196 /* Set up rank for each BB */ 6197 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 6198 bb_rank[bbs[i]] = ++rank << 16; 6199 6200 free (bbs); 6201 calculate_dominance_info (CDI_POST_DOMINATORS); 6202 plus_negates = vNULL; 6203 } 6204 6205 /* Cleanup after the reassociation pass, and print stats if 6206 requested. */ 6207 6208 static void 6209 fini_reassoc (void) 6210 { 6211 statistics_counter_event (cfun, "Linearized", 6212 reassociate_stats.linearized); 6213 statistics_counter_event (cfun, "Constants eliminated", 6214 reassociate_stats.constants_eliminated); 6215 statistics_counter_event (cfun, "Ops eliminated", 6216 reassociate_stats.ops_eliminated); 6217 statistics_counter_event (cfun, "Statements rewritten", 6218 reassociate_stats.rewritten); 6219 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 6220 reassociate_stats.pows_encountered); 6221 statistics_counter_event (cfun, "Built-in powi calls created", 6222 reassociate_stats.pows_created); 6223 6224 delete operand_rank; 6225 operand_entry_pool.release (); 6226 free (bb_rank); 6227 plus_negates.release (); 6228 free_dominance_info (CDI_POST_DOMINATORS); 6229 loop_optimizer_finalize (); 6230 } 6231 6232 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 6233 insertion of __builtin_powi calls. 6234 6235 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 6236 optimization of a gimple conditional. Otherwise returns zero. */ 6237 6238 static unsigned int 6239 execute_reassoc (bool insert_powi_p) 6240 { 6241 reassoc_insert_powi_p = insert_powi_p; 6242 6243 init_reassoc (); 6244 6245 bool cfg_cleanup_needed; 6246 cfg_cleanup_needed = do_reassoc (); 6247 repropagate_negates (); 6248 branch_fixup (); 6249 6250 fini_reassoc (); 6251 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 6252 } 6253 6254 namespace { 6255 6256 const pass_data pass_data_reassoc = 6257 { 6258 GIMPLE_PASS, /* type */ 6259 "reassoc", /* name */ 6260 OPTGROUP_NONE, /* optinfo_flags */ 6261 TV_TREE_REASSOC, /* tv_id */ 6262 ( PROP_cfg | PROP_ssa ), /* properties_required */ 6263 0, /* properties_provided */ 6264 0, /* properties_destroyed */ 6265 0, /* todo_flags_start */ 6266 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 6267 }; 6268 6269 class pass_reassoc : public gimple_opt_pass 6270 { 6271 public: 6272 pass_reassoc (gcc::context *ctxt) 6273 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 6274 {} 6275 6276 /* opt_pass methods: */ 6277 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 6278 void set_pass_param (unsigned int n, bool param) 6279 { 6280 gcc_assert (n == 0); 6281 insert_powi_p = param; 6282 } 6283 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 6284 virtual unsigned int execute (function *) 6285 { return execute_reassoc (insert_powi_p); } 6286 6287 private: 6288 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 6289 point 3a in the pass header comment. */ 6290 bool insert_powi_p; 6291 }; // class pass_reassoc 6292 6293 } // anon namespace 6294 6295 gimple_opt_pass * 6296 make_pass_reassoc (gcc::context *ctxt) 6297 { 6298 return new pass_reassoc (ctxt); 6299 } 6300