1 /* Reassociation for trees. 2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011 3 Free Software Foundation, Inc. 4 Contributed by Daniel Berlin <dan@dberlin.org> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "tree.h" 27 #include "basic-block.h" 28 #include "tree-pretty-print.h" 29 #include "gimple-pretty-print.h" 30 #include "tree-inline.h" 31 #include "tree-flow.h" 32 #include "gimple.h" 33 #include "tree-dump.h" 34 #include "timevar.h" 35 #include "tree-iterator.h" 36 #include "tree-pass.h" 37 #include "alloc-pool.h" 38 #include "vec.h" 39 #include "langhooks.h" 40 #include "pointer-set.h" 41 #include "cfgloop.h" 42 #include "flags.h" 43 #include "target.h" 44 #include "params.h" 45 #include "diagnostic-core.h" 46 47 /* This is a simple global reassociation pass. It is, in part, based 48 on the LLVM pass of the same name (They do some things more/less 49 than we do, in different orders, etc). 50 51 It consists of five steps: 52 53 1. Breaking up subtract operations into addition + negate, where 54 it would promote the reassociation of adds. 55 56 2. Left linearization of the expression trees, so that (A+B)+(C+D) 57 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 58 During linearization, we place the operands of the binary 59 expressions into a vector of operand_entry_t 60 61 3. Optimization of the operand lists, eliminating things like a + 62 -a, a & a, etc. 63 64 4. Rewrite the expression trees we linearized and optimized so 65 they are in proper rank order. 66 67 5. Repropagate negates, as nothing else will clean it up ATM. 68 69 A bit of theory on #4, since nobody seems to write anything down 70 about why it makes sense to do it the way they do it: 71 72 We could do this much nicer theoretically, but don't (for reasons 73 explained after how to do it theoretically nice :P). 74 75 In order to promote the most redundancy elimination, you want 76 binary expressions whose operands are the same rank (or 77 preferably, the same value) exposed to the redundancy eliminator, 78 for possible elimination. 79 80 So the way to do this if we really cared, is to build the new op 81 tree from the leaves to the roots, merging as you go, and putting the 82 new op on the end of the worklist, until you are left with one 83 thing on the worklist. 84 85 IE if you have to rewrite the following set of operands (listed with 86 rank in parentheses), with opcode PLUS_EXPR: 87 88 a (1), b (1), c (1), d (2), e (2) 89 90 91 We start with our merge worklist empty, and the ops list with all of 92 those on it. 93 94 You want to first merge all leaves of the same rank, as much as 95 possible. 96 97 So first build a binary op of 98 99 mergetmp = a + b, and put "mergetmp" on the merge worklist. 100 101 Because there is no three operand form of PLUS_EXPR, c is not going to 102 be exposed to redundancy elimination as a rank 1 operand. 103 104 So you might as well throw it on the merge worklist (you could also 105 consider it to now be a rank two operand, and merge it with d and e, 106 but in this case, you then have evicted e from a binary op. So at 107 least in this situation, you can't win.) 108 109 Then build a binary op of d + e 110 mergetmp2 = d + e 111 112 and put mergetmp2 on the merge worklist. 113 114 so merge worklist = {mergetmp, c, mergetmp2} 115 116 Continue building binary ops of these operations until you have only 117 one operation left on the worklist. 118 119 So we have 120 121 build binary op 122 mergetmp3 = mergetmp + c 123 124 worklist = {mergetmp2, mergetmp3} 125 126 mergetmp4 = mergetmp2 + mergetmp3 127 128 worklist = {mergetmp4} 129 130 because we have one operation left, we can now just set the original 131 statement equal to the result of that operation. 132 133 This will at least expose a + b and d + e to redundancy elimination 134 as binary operations. 135 136 For extra points, you can reuse the old statements to build the 137 mergetmps, since you shouldn't run out. 138 139 So why don't we do this? 140 141 Because it's expensive, and rarely will help. Most trees we are 142 reassociating have 3 or less ops. If they have 2 ops, they already 143 will be written into a nice single binary op. If you have 3 ops, a 144 single simple check suffices to tell you whether the first two are of the 145 same rank. If so, you know to order it 146 147 mergetmp = op1 + op2 148 newstmt = mergetmp + op3 149 150 instead of 151 mergetmp = op2 + op3 152 newstmt = mergetmp + op1 153 154 If all three are of the same rank, you can't expose them all in a 155 single binary operator anyway, so the above is *still* the best you 156 can do. 157 158 Thus, this is what we do. When we have three ops left, we check to see 159 what order to put them in, and call it a day. As a nod to vector sum 160 reduction, we check if any of the ops are really a phi node that is a 161 destructive update for the associating op, and keep the destructive 162 update together for vector sum reduction recognition. */ 163 164 165 /* Statistics */ 166 static struct 167 { 168 int linearized; 169 int constants_eliminated; 170 int ops_eliminated; 171 int rewritten; 172 } reassociate_stats; 173 174 /* Operator, rank pair. */ 175 typedef struct operand_entry 176 { 177 unsigned int rank; 178 int id; 179 tree op; 180 } *operand_entry_t; 181 182 static alloc_pool operand_entry_pool; 183 184 /* This is used to assign a unique ID to each struct operand_entry 185 so that qsort results are identical on different hosts. */ 186 static int next_operand_entry_id; 187 188 /* Starting rank number for a given basic block, so that we can rank 189 operations using unmovable instructions in that BB based on the bb 190 depth. */ 191 static long *bb_rank; 192 193 /* Operand->rank hashtable. */ 194 static struct pointer_map_t *operand_rank; 195 196 /* Forward decls. */ 197 static long get_rank (tree); 198 199 200 /* Bias amount for loop-carried phis. We want this to be larger than 201 the depth of any reassociation tree we can see, but not larger than 202 the rank difference between two blocks. */ 203 #define PHI_LOOP_BIAS (1 << 15) 204 205 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 206 an innermost loop, and the phi has only a single use which is inside 207 the loop, then the rank is the block rank of the loop latch plus an 208 extra bias for the loop-carried dependence. This causes expressions 209 calculated into an accumulator variable to be independent for each 210 iteration of the loop. If STMT is some other phi, the rank is the 211 block rank of its containing block. */ 212 static long 213 phi_rank (gimple stmt) 214 { 215 basic_block bb = gimple_bb (stmt); 216 struct loop *father = bb->loop_father; 217 tree res; 218 unsigned i; 219 use_operand_p use; 220 gimple use_stmt; 221 222 /* We only care about real loops (those with a latch). */ 223 if (!father->latch) 224 return bb_rank[bb->index]; 225 226 /* Interesting phis must be in headers of innermost loops. */ 227 if (bb != father->header 228 || father->inner) 229 return bb_rank[bb->index]; 230 231 /* Ignore virtual SSA_NAMEs. */ 232 res = gimple_phi_result (stmt); 233 if (!is_gimple_reg (SSA_NAME_VAR (res))) 234 return bb_rank[bb->index]; 235 236 /* The phi definition must have a single use, and that use must be 237 within the loop. Otherwise this isn't an accumulator pattern. */ 238 if (!single_imm_use (res, &use, &use_stmt) 239 || gimple_bb (use_stmt)->loop_father != father) 240 return bb_rank[bb->index]; 241 242 /* Look for phi arguments from within the loop. If found, bias this phi. */ 243 for (i = 0; i < gimple_phi_num_args (stmt); i++) 244 { 245 tree arg = gimple_phi_arg_def (stmt, i); 246 if (TREE_CODE (arg) == SSA_NAME 247 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 248 { 249 gimple def_stmt = SSA_NAME_DEF_STMT (arg); 250 if (gimple_bb (def_stmt)->loop_father == father) 251 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 252 } 253 } 254 255 /* Must be an uninteresting phi. */ 256 return bb_rank[bb->index]; 257 } 258 259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 260 loop-carried dependence of an innermost loop, return TRUE; else 261 return FALSE. */ 262 static bool 263 loop_carried_phi (tree exp) 264 { 265 gimple phi_stmt; 266 long block_rank; 267 268 if (TREE_CODE (exp) != SSA_NAME 269 || SSA_NAME_IS_DEFAULT_DEF (exp)) 270 return false; 271 272 phi_stmt = SSA_NAME_DEF_STMT (exp); 273 274 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 275 return false; 276 277 /* Non-loop-carried phis have block rank. Loop-carried phis have 278 an additional bias added in. If this phi doesn't have block rank, 279 it's biased and should not be propagated. */ 280 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 281 282 if (phi_rank (phi_stmt) != block_rank) 283 return true; 284 285 return false; 286 } 287 288 /* Return the maximum of RANK and the rank that should be propagated 289 from expression OP. For most operands, this is just the rank of OP. 290 For loop-carried phis, the value is zero to avoid undoing the bias 291 in favor of the phi. */ 292 static long 293 propagate_rank (long rank, tree op) 294 { 295 long op_rank; 296 297 if (loop_carried_phi (op)) 298 return rank; 299 300 op_rank = get_rank (op); 301 302 return MAX (rank, op_rank); 303 } 304 305 /* Look up the operand rank structure for expression E. */ 306 307 static inline long 308 find_operand_rank (tree e) 309 { 310 void **slot = pointer_map_contains (operand_rank, e); 311 return slot ? (long) (intptr_t) *slot : -1; 312 } 313 314 /* Insert {E,RANK} into the operand rank hashtable. */ 315 316 static inline void 317 insert_operand_rank (tree e, long rank) 318 { 319 void **slot; 320 gcc_assert (rank > 0); 321 slot = pointer_map_insert (operand_rank, e); 322 gcc_assert (!*slot); 323 *slot = (void *) (intptr_t) rank; 324 } 325 326 /* Given an expression E, return the rank of the expression. */ 327 328 static long 329 get_rank (tree e) 330 { 331 /* Constants have rank 0. */ 332 if (is_gimple_min_invariant (e)) 333 return 0; 334 335 /* SSA_NAME's have the rank of the expression they are the result 336 of. 337 For globals and uninitialized values, the rank is 0. 338 For function arguments, use the pre-setup rank. 339 For PHI nodes, stores, asm statements, etc, we use the rank of 340 the BB. 341 For simple operations, the rank is the maximum rank of any of 342 its operands, or the bb_rank, whichever is less. 343 I make no claims that this is optimal, however, it gives good 344 results. */ 345 346 /* We make an exception to the normal ranking system to break 347 dependences of accumulator variables in loops. Suppose we 348 have a simple one-block loop containing: 349 350 x_1 = phi(x_0, x_2) 351 b = a + x_1 352 c = b + d 353 x_2 = c + e 354 355 As shown, each iteration of the calculation into x is fully 356 dependent upon the iteration before it. We would prefer to 357 see this in the form: 358 359 x_1 = phi(x_0, x_2) 360 b = a + d 361 c = b + e 362 x_2 = c + x_1 363 364 If the loop is unrolled, the calculations of b and c from 365 different iterations can be interleaved. 366 367 To obtain this result during reassociation, we bias the rank 368 of the phi definition x_1 upward, when it is recognized as an 369 accumulator pattern. The artificial rank causes it to be 370 added last, providing the desired independence. */ 371 372 if (TREE_CODE (e) == SSA_NAME) 373 { 374 gimple stmt; 375 long rank; 376 int i, n; 377 tree op; 378 379 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL 380 && SSA_NAME_IS_DEFAULT_DEF (e)) 381 return find_operand_rank (e); 382 383 stmt = SSA_NAME_DEF_STMT (e); 384 if (gimple_bb (stmt) == NULL) 385 return 0; 386 387 if (gimple_code (stmt) == GIMPLE_PHI) 388 return phi_rank (stmt); 389 390 if (!is_gimple_assign (stmt) 391 || gimple_vdef (stmt)) 392 return bb_rank[gimple_bb (stmt)->index]; 393 394 /* If we already have a rank for this expression, use that. */ 395 rank = find_operand_rank (e); 396 if (rank != -1) 397 return rank; 398 399 /* Otherwise, find the maximum rank for the operands. As an 400 exception, remove the bias from loop-carried phis when propagating 401 the rank so that dependent operations are not also biased. */ 402 rank = 0; 403 if (gimple_assign_single_p (stmt)) 404 { 405 tree rhs = gimple_assign_rhs1 (stmt); 406 n = TREE_OPERAND_LENGTH (rhs); 407 if (n == 0) 408 rank = propagate_rank (rank, rhs); 409 else 410 { 411 for (i = 0; i < n; i++) 412 { 413 op = TREE_OPERAND (rhs, i); 414 415 if (op != NULL_TREE) 416 rank = propagate_rank (rank, op); 417 } 418 } 419 } 420 else 421 { 422 n = gimple_num_ops (stmt); 423 for (i = 1; i < n; i++) 424 { 425 op = gimple_op (stmt, i); 426 gcc_assert (op); 427 rank = propagate_rank (rank, op); 428 } 429 } 430 431 if (dump_file && (dump_flags & TDF_DETAILS)) 432 { 433 fprintf (dump_file, "Rank for "); 434 print_generic_expr (dump_file, e, 0); 435 fprintf (dump_file, " is %ld\n", (rank + 1)); 436 } 437 438 /* Note the rank in the hashtable so we don't recompute it. */ 439 insert_operand_rank (e, (rank + 1)); 440 return (rank + 1); 441 } 442 443 /* Globals, etc, are rank 0 */ 444 return 0; 445 } 446 447 DEF_VEC_P(operand_entry_t); 448 DEF_VEC_ALLOC_P(operand_entry_t, heap); 449 450 /* We want integer ones to end up last no matter what, since they are 451 the ones we can do the most with. */ 452 #define INTEGER_CONST_TYPE 1 << 3 453 #define FLOAT_CONST_TYPE 1 << 2 454 #define OTHER_CONST_TYPE 1 << 1 455 456 /* Classify an invariant tree into integer, float, or other, so that 457 we can sort them to be near other constants of the same type. */ 458 static inline int 459 constant_type (tree t) 460 { 461 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 462 return INTEGER_CONST_TYPE; 463 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 464 return FLOAT_CONST_TYPE; 465 else 466 return OTHER_CONST_TYPE; 467 } 468 469 /* qsort comparison function to sort operand entries PA and PB by rank 470 so that the sorted array is ordered by rank in decreasing order. */ 471 static int 472 sort_by_operand_rank (const void *pa, const void *pb) 473 { 474 const operand_entry_t oea = *(const operand_entry_t *)pa; 475 const operand_entry_t oeb = *(const operand_entry_t *)pb; 476 477 /* It's nicer for optimize_expression if constants that are likely 478 to fold when added/multiplied//whatever are put next to each 479 other. Since all constants have rank 0, order them by type. */ 480 if (oeb->rank == 0 && oea->rank == 0) 481 { 482 if (constant_type (oeb->op) != constant_type (oea->op)) 483 return constant_type (oeb->op) - constant_type (oea->op); 484 else 485 /* To make sorting result stable, we use unique IDs to determine 486 order. */ 487 return oeb->id - oea->id; 488 } 489 490 /* Lastly, make sure the versions that are the same go next to each 491 other. We use SSA_NAME_VERSION because it's stable. */ 492 if ((oeb->rank - oea->rank == 0) 493 && TREE_CODE (oea->op) == SSA_NAME 494 && TREE_CODE (oeb->op) == SSA_NAME) 495 { 496 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 497 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); 498 else 499 return oeb->id - oea->id; 500 } 501 502 if (oeb->rank != oea->rank) 503 return oeb->rank - oea->rank; 504 else 505 return oeb->id - oea->id; 506 } 507 508 /* Add an operand entry to *OPS for the tree operand OP. */ 509 510 static void 511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op) 512 { 513 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 514 515 oe->op = op; 516 oe->rank = get_rank (op); 517 oe->id = next_operand_entry_id++; 518 VEC_safe_push (operand_entry_t, heap, *ops, oe); 519 } 520 521 /* Return true if STMT is reassociable operation containing a binary 522 operation with tree code CODE, and is inside LOOP. */ 523 524 static bool 525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) 526 { 527 basic_block bb = gimple_bb (stmt); 528 529 if (gimple_bb (stmt) == NULL) 530 return false; 531 532 if (!flow_bb_inside_loop_p (loop, bb)) 533 return false; 534 535 if (is_gimple_assign (stmt) 536 && gimple_assign_rhs_code (stmt) == code 537 && has_single_use (gimple_assign_lhs (stmt))) 538 return true; 539 540 return false; 541 } 542 543 544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 545 operand of the negate operation. Otherwise, return NULL. */ 546 547 static tree 548 get_unary_op (tree name, enum tree_code opcode) 549 { 550 gimple stmt = SSA_NAME_DEF_STMT (name); 551 552 if (!is_gimple_assign (stmt)) 553 return NULL_TREE; 554 555 if (gimple_assign_rhs_code (stmt) == opcode) 556 return gimple_assign_rhs1 (stmt); 557 return NULL_TREE; 558 } 559 560 /* If CURR and LAST are a pair of ops that OPCODE allows us to 561 eliminate through equivalences, do so, remove them from OPS, and 562 return true. Otherwise, return false. */ 563 564 static bool 565 eliminate_duplicate_pair (enum tree_code opcode, 566 VEC (operand_entry_t, heap) **ops, 567 bool *all_done, 568 unsigned int i, 569 operand_entry_t curr, 570 operand_entry_t last) 571 { 572 573 /* If we have two of the same op, and the opcode is & |, min, or max, 574 we can eliminate one of them. 575 If we have two of the same op, and the opcode is ^, we can 576 eliminate both of them. */ 577 578 if (last && last->op == curr->op) 579 { 580 switch (opcode) 581 { 582 case MAX_EXPR: 583 case MIN_EXPR: 584 case BIT_IOR_EXPR: 585 case BIT_AND_EXPR: 586 if (dump_file && (dump_flags & TDF_DETAILS)) 587 { 588 fprintf (dump_file, "Equivalence: "); 589 print_generic_expr (dump_file, curr->op, 0); 590 fprintf (dump_file, " [&|minmax] "); 591 print_generic_expr (dump_file, last->op, 0); 592 fprintf (dump_file, " -> "); 593 print_generic_stmt (dump_file, last->op, 0); 594 } 595 596 VEC_ordered_remove (operand_entry_t, *ops, i); 597 reassociate_stats.ops_eliminated ++; 598 599 return true; 600 601 case BIT_XOR_EXPR: 602 if (dump_file && (dump_flags & TDF_DETAILS)) 603 { 604 fprintf (dump_file, "Equivalence: "); 605 print_generic_expr (dump_file, curr->op, 0); 606 fprintf (dump_file, " ^ "); 607 print_generic_expr (dump_file, last->op, 0); 608 fprintf (dump_file, " -> nothing\n"); 609 } 610 611 reassociate_stats.ops_eliminated += 2; 612 613 if (VEC_length (operand_entry_t, *ops) == 2) 614 { 615 VEC_free (operand_entry_t, heap, *ops); 616 *ops = NULL; 617 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 618 *all_done = true; 619 } 620 else 621 { 622 VEC_ordered_remove (operand_entry_t, *ops, i-1); 623 VEC_ordered_remove (operand_entry_t, *ops, i-1); 624 } 625 626 return true; 627 628 default: 629 break; 630 } 631 } 632 return false; 633 } 634 635 static VEC(tree, heap) *plus_negates; 636 637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 638 expression, look in OPS for a corresponding positive operation to cancel 639 it out. If we find one, remove the other from OPS, replace 640 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 641 return false. */ 642 643 static bool 644 eliminate_plus_minus_pair (enum tree_code opcode, 645 VEC (operand_entry_t, heap) **ops, 646 unsigned int currindex, 647 operand_entry_t curr) 648 { 649 tree negateop; 650 tree notop; 651 unsigned int i; 652 operand_entry_t oe; 653 654 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 655 return false; 656 657 negateop = get_unary_op (curr->op, NEGATE_EXPR); 658 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 659 if (negateop == NULL_TREE && notop == NULL_TREE) 660 return false; 661 662 /* Any non-negated version will have a rank that is one less than 663 the current rank. So once we hit those ranks, if we don't find 664 one, we can stop. */ 665 666 for (i = currindex + 1; 667 VEC_iterate (operand_entry_t, *ops, i, oe) 668 && oe->rank >= curr->rank - 1 ; 669 i++) 670 { 671 if (oe->op == negateop) 672 { 673 674 if (dump_file && (dump_flags & TDF_DETAILS)) 675 { 676 fprintf (dump_file, "Equivalence: "); 677 print_generic_expr (dump_file, negateop, 0); 678 fprintf (dump_file, " + -"); 679 print_generic_expr (dump_file, oe->op, 0); 680 fprintf (dump_file, " -> 0\n"); 681 } 682 683 VEC_ordered_remove (operand_entry_t, *ops, i); 684 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 685 VEC_ordered_remove (operand_entry_t, *ops, currindex); 686 reassociate_stats.ops_eliminated ++; 687 688 return true; 689 } 690 else if (oe->op == notop) 691 { 692 tree op_type = TREE_TYPE (oe->op); 693 694 if (dump_file && (dump_flags & TDF_DETAILS)) 695 { 696 fprintf (dump_file, "Equivalence: "); 697 print_generic_expr (dump_file, notop, 0); 698 fprintf (dump_file, " + ~"); 699 print_generic_expr (dump_file, oe->op, 0); 700 fprintf (dump_file, " -> -1\n"); 701 } 702 703 VEC_ordered_remove (operand_entry_t, *ops, i); 704 add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); 705 VEC_ordered_remove (operand_entry_t, *ops, currindex); 706 reassociate_stats.ops_eliminated ++; 707 708 return true; 709 } 710 } 711 712 /* CURR->OP is a negate expr in a plus expr: save it for later 713 inspection in repropagate_negates(). */ 714 if (negateop != NULL_TREE) 715 VEC_safe_push (tree, heap, plus_negates, curr->op); 716 717 return false; 718 } 719 720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 721 bitwise not expression, look in OPS for a corresponding operand to 722 cancel it out. If we find one, remove the other from OPS, replace 723 OPS[CURRINDEX] with 0, and return true. Otherwise, return 724 false. */ 725 726 static bool 727 eliminate_not_pairs (enum tree_code opcode, 728 VEC (operand_entry_t, heap) **ops, 729 unsigned int currindex, 730 operand_entry_t curr) 731 { 732 tree notop; 733 unsigned int i; 734 operand_entry_t oe; 735 736 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 737 || TREE_CODE (curr->op) != SSA_NAME) 738 return false; 739 740 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 741 if (notop == NULL_TREE) 742 return false; 743 744 /* Any non-not version will have a rank that is one less than 745 the current rank. So once we hit those ranks, if we don't find 746 one, we can stop. */ 747 748 for (i = currindex + 1; 749 VEC_iterate (operand_entry_t, *ops, i, oe) 750 && oe->rank >= curr->rank - 1; 751 i++) 752 { 753 if (oe->op == notop) 754 { 755 if (dump_file && (dump_flags & TDF_DETAILS)) 756 { 757 fprintf (dump_file, "Equivalence: "); 758 print_generic_expr (dump_file, notop, 0); 759 if (opcode == BIT_AND_EXPR) 760 fprintf (dump_file, " & ~"); 761 else if (opcode == BIT_IOR_EXPR) 762 fprintf (dump_file, " | ~"); 763 print_generic_expr (dump_file, oe->op, 0); 764 if (opcode == BIT_AND_EXPR) 765 fprintf (dump_file, " -> 0\n"); 766 else if (opcode == BIT_IOR_EXPR) 767 fprintf (dump_file, " -> -1\n"); 768 } 769 770 if (opcode == BIT_AND_EXPR) 771 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 772 else if (opcode == BIT_IOR_EXPR) 773 oe->op = build_low_bits_mask (TREE_TYPE (oe->op), 774 TYPE_PRECISION (TREE_TYPE (oe->op))); 775 776 reassociate_stats.ops_eliminated 777 += VEC_length (operand_entry_t, *ops) - 1; 778 VEC_free (operand_entry_t, heap, *ops); 779 *ops = NULL; 780 VEC_safe_push (operand_entry_t, heap, *ops, oe); 781 return true; 782 } 783 } 784 785 return false; 786 } 787 788 /* Use constant value that may be present in OPS to try to eliminate 789 operands. Note that this function is only really used when we've 790 eliminated ops for other reasons, or merged constants. Across 791 single statements, fold already does all of this, plus more. There 792 is little point in duplicating logic, so I've only included the 793 identities that I could ever construct testcases to trigger. */ 794 795 static void 796 eliminate_using_constants (enum tree_code opcode, 797 VEC(operand_entry_t, heap) **ops) 798 { 799 operand_entry_t oelast = VEC_last (operand_entry_t, *ops); 800 tree type = TREE_TYPE (oelast->op); 801 802 if (oelast->rank == 0 803 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 804 { 805 switch (opcode) 806 { 807 case BIT_AND_EXPR: 808 if (integer_zerop (oelast->op)) 809 { 810 if (VEC_length (operand_entry_t, *ops) != 1) 811 { 812 if (dump_file && (dump_flags & TDF_DETAILS)) 813 fprintf (dump_file, "Found & 0, removing all other ops\n"); 814 815 reassociate_stats.ops_eliminated 816 += VEC_length (operand_entry_t, *ops) - 1; 817 818 VEC_free (operand_entry_t, heap, *ops); 819 *ops = NULL; 820 VEC_safe_push (operand_entry_t, heap, *ops, oelast); 821 return; 822 } 823 } 824 else if (integer_all_onesp (oelast->op)) 825 { 826 if (VEC_length (operand_entry_t, *ops) != 1) 827 { 828 if (dump_file && (dump_flags & TDF_DETAILS)) 829 fprintf (dump_file, "Found & -1, removing\n"); 830 VEC_pop (operand_entry_t, *ops); 831 reassociate_stats.ops_eliminated++; 832 } 833 } 834 break; 835 case BIT_IOR_EXPR: 836 if (integer_all_onesp (oelast->op)) 837 { 838 if (VEC_length (operand_entry_t, *ops) != 1) 839 { 840 if (dump_file && (dump_flags & TDF_DETAILS)) 841 fprintf (dump_file, "Found | -1, removing all other ops\n"); 842 843 reassociate_stats.ops_eliminated 844 += VEC_length (operand_entry_t, *ops) - 1; 845 846 VEC_free (operand_entry_t, heap, *ops); 847 *ops = NULL; 848 VEC_safe_push (operand_entry_t, heap, *ops, oelast); 849 return; 850 } 851 } 852 else if (integer_zerop (oelast->op)) 853 { 854 if (VEC_length (operand_entry_t, *ops) != 1) 855 { 856 if (dump_file && (dump_flags & TDF_DETAILS)) 857 fprintf (dump_file, "Found | 0, removing\n"); 858 VEC_pop (operand_entry_t, *ops); 859 reassociate_stats.ops_eliminated++; 860 } 861 } 862 break; 863 case MULT_EXPR: 864 if (integer_zerop (oelast->op) 865 || (FLOAT_TYPE_P (type) 866 && !HONOR_NANS (TYPE_MODE (type)) 867 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) 868 && real_zerop (oelast->op))) 869 { 870 if (VEC_length (operand_entry_t, *ops) != 1) 871 { 872 if (dump_file && (dump_flags & TDF_DETAILS)) 873 fprintf (dump_file, "Found * 0, removing all other ops\n"); 874 875 reassociate_stats.ops_eliminated 876 += VEC_length (operand_entry_t, *ops) - 1; 877 VEC_free (operand_entry_t, heap, *ops); 878 *ops = NULL; 879 VEC_safe_push (operand_entry_t, heap, *ops, oelast); 880 return; 881 } 882 } 883 else if (integer_onep (oelast->op) 884 || (FLOAT_TYPE_P (type) 885 && !HONOR_SNANS (TYPE_MODE (type)) 886 && real_onep (oelast->op))) 887 { 888 if (VEC_length (operand_entry_t, *ops) != 1) 889 { 890 if (dump_file && (dump_flags & TDF_DETAILS)) 891 fprintf (dump_file, "Found * 1, removing\n"); 892 VEC_pop (operand_entry_t, *ops); 893 reassociate_stats.ops_eliminated++; 894 return; 895 } 896 } 897 break; 898 case BIT_XOR_EXPR: 899 case PLUS_EXPR: 900 case MINUS_EXPR: 901 if (integer_zerop (oelast->op) 902 || (FLOAT_TYPE_P (type) 903 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 904 && fold_real_zero_addition_p (type, oelast->op, 905 opcode == MINUS_EXPR))) 906 { 907 if (VEC_length (operand_entry_t, *ops) != 1) 908 { 909 if (dump_file && (dump_flags & TDF_DETAILS)) 910 fprintf (dump_file, "Found [|^+] 0, removing\n"); 911 VEC_pop (operand_entry_t, *ops); 912 reassociate_stats.ops_eliminated++; 913 return; 914 } 915 } 916 break; 917 default: 918 break; 919 } 920 } 921 } 922 923 924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple, 925 bool, bool); 926 927 /* Structure for tracking and counting operands. */ 928 typedef struct oecount_s { 929 int cnt; 930 int id; 931 enum tree_code oecode; 932 tree op; 933 } oecount; 934 935 DEF_VEC_O(oecount); 936 DEF_VEC_ALLOC_O(oecount,heap); 937 938 /* The heap for the oecount hashtable and the sorted list of operands. */ 939 static VEC (oecount, heap) *cvec; 940 941 /* Hash function for oecount. */ 942 943 static hashval_t 944 oecount_hash (const void *p) 945 { 946 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42); 947 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 948 } 949 950 /* Comparison function for oecount. */ 951 952 static int 953 oecount_eq (const void *p1, const void *p2) 954 { 955 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42); 956 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42); 957 return (c1->oecode == c2->oecode 958 && c1->op == c2->op); 959 } 960 961 /* Comparison function for qsort sorting oecount elements by count. */ 962 963 static int 964 oecount_cmp (const void *p1, const void *p2) 965 { 966 const oecount *c1 = (const oecount *)p1; 967 const oecount *c2 = (const oecount *)p2; 968 if (c1->cnt != c2->cnt) 969 return c1->cnt - c2->cnt; 970 else 971 /* If counts are identical, use unique IDs to stabilize qsort. */ 972 return c1->id - c2->id; 973 } 974 975 /* Walks the linear chain with result *DEF searching for an operation 976 with operand OP and code OPCODE removing that from the chain. *DEF 977 is updated if there is only one operand but no operation left. */ 978 979 static void 980 zero_one_operation (tree *def, enum tree_code opcode, tree op) 981 { 982 gimple stmt = SSA_NAME_DEF_STMT (*def); 983 984 do 985 { 986 tree name = gimple_assign_rhs1 (stmt); 987 988 /* If this is the operation we look for and one of the operands 989 is ours simply propagate the other operand into the stmts 990 single use. */ 991 if (gimple_assign_rhs_code (stmt) == opcode 992 && (name == op 993 || gimple_assign_rhs2 (stmt) == op)) 994 { 995 gimple use_stmt; 996 use_operand_p use; 997 gimple_stmt_iterator gsi; 998 if (name == op) 999 name = gimple_assign_rhs2 (stmt); 1000 gcc_assert (has_single_use (gimple_assign_lhs (stmt))); 1001 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt); 1002 if (gimple_assign_lhs (stmt) == *def) 1003 *def = name; 1004 SET_USE (use, name); 1005 if (TREE_CODE (name) != SSA_NAME) 1006 update_stmt (use_stmt); 1007 gsi = gsi_for_stmt (stmt); 1008 gsi_remove (&gsi, true); 1009 release_defs (stmt); 1010 return; 1011 } 1012 1013 /* Continue walking the chain. */ 1014 gcc_assert (name != op 1015 && TREE_CODE (name) == SSA_NAME); 1016 stmt = SSA_NAME_DEF_STMT (name); 1017 } 1018 while (1); 1019 } 1020 1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1022 the result. Places the statement after the definition of either 1023 OP1 or OP2. Returns the new statement. */ 1024 1025 static gimple 1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode) 1027 { 1028 gimple op1def = NULL, op2def = NULL; 1029 gimple_stmt_iterator gsi; 1030 tree op; 1031 gimple sum; 1032 1033 /* Create the addition statement. */ 1034 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2); 1035 op = make_ssa_name (tmpvar, sum); 1036 gimple_assign_set_lhs (sum, op); 1037 1038 /* Find an insertion place and insert. */ 1039 if (TREE_CODE (op1) == SSA_NAME) 1040 op1def = SSA_NAME_DEF_STMT (op1); 1041 if (TREE_CODE (op2) == SSA_NAME) 1042 op2def = SSA_NAME_DEF_STMT (op2); 1043 if ((!op1def || gimple_nop_p (op1def)) 1044 && (!op2def || gimple_nop_p (op2def))) 1045 { 1046 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); 1047 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1048 } 1049 else if ((!op1def || gimple_nop_p (op1def)) 1050 || (op2def && !gimple_nop_p (op2def) 1051 && stmt_dominates_stmt_p (op1def, op2def))) 1052 { 1053 if (gimple_code (op2def) == GIMPLE_PHI) 1054 { 1055 gsi = gsi_after_labels (gimple_bb (op2def)); 1056 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1057 } 1058 else 1059 { 1060 if (!stmt_ends_bb_p (op2def)) 1061 { 1062 gsi = gsi_for_stmt (op2def); 1063 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); 1064 } 1065 else 1066 { 1067 edge e; 1068 edge_iterator ei; 1069 1070 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) 1071 if (e->flags & EDGE_FALLTHRU) 1072 gsi_insert_on_edge_immediate (e, sum); 1073 } 1074 } 1075 } 1076 else 1077 { 1078 if (gimple_code (op1def) == GIMPLE_PHI) 1079 { 1080 gsi = gsi_after_labels (gimple_bb (op1def)); 1081 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1082 } 1083 else 1084 { 1085 if (!stmt_ends_bb_p (op1def)) 1086 { 1087 gsi = gsi_for_stmt (op1def); 1088 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); 1089 } 1090 else 1091 { 1092 edge e; 1093 edge_iterator ei; 1094 1095 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) 1096 if (e->flags & EDGE_FALLTHRU) 1097 gsi_insert_on_edge_immediate (e, sum); 1098 } 1099 } 1100 } 1101 update_stmt (sum); 1102 1103 return sum; 1104 } 1105 1106 /* Perform un-distribution of divisions and multiplications. 1107 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1108 to (A + B) / X for real X. 1109 1110 The algorithm is organized as follows. 1111 1112 - First we walk the addition chain *OPS looking for summands that 1113 are defined by a multiplication or a real division. This results 1114 in the candidates bitmap with relevant indices into *OPS. 1115 1116 - Second we build the chains of multiplications or divisions for 1117 these candidates, counting the number of occurences of (operand, code) 1118 pairs in all of the candidates chains. 1119 1120 - Third we sort the (operand, code) pairs by number of occurence and 1121 process them starting with the pair with the most uses. 1122 1123 * For each such pair we walk the candidates again to build a 1124 second candidate bitmap noting all multiplication/division chains 1125 that have at least one occurence of (operand, code). 1126 1127 * We build an alternate addition chain only covering these 1128 candidates with one (operand, code) operation removed from their 1129 multiplication/division chain. 1130 1131 * The first candidate gets replaced by the alternate addition chain 1132 multiplied/divided by the operand. 1133 1134 * All candidate chains get disabled for further processing and 1135 processing of (operand, code) pairs continues. 1136 1137 The alternate addition chains built are re-processed by the main 1138 reassociation algorithm which allows optimizing a * x * y + b * y * x 1139 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1140 1141 static bool 1142 undistribute_ops_list (enum tree_code opcode, 1143 VEC (operand_entry_t, heap) **ops, struct loop *loop) 1144 { 1145 unsigned int length = VEC_length (operand_entry_t, *ops); 1146 operand_entry_t oe1; 1147 unsigned i, j; 1148 sbitmap candidates, candidates2; 1149 unsigned nr_candidates, nr_candidates2; 1150 sbitmap_iterator sbi0; 1151 VEC (operand_entry_t, heap) **subops; 1152 htab_t ctable; 1153 bool changed = false; 1154 int next_oecount_id = 0; 1155 1156 if (length <= 1 1157 || opcode != PLUS_EXPR) 1158 return false; 1159 1160 /* Build a list of candidates to process. */ 1161 candidates = sbitmap_alloc (length); 1162 sbitmap_zero (candidates); 1163 nr_candidates = 0; 1164 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1) 1165 { 1166 enum tree_code dcode; 1167 gimple oe1def; 1168 1169 if (TREE_CODE (oe1->op) != SSA_NAME) 1170 continue; 1171 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1172 if (!is_gimple_assign (oe1def)) 1173 continue; 1174 dcode = gimple_assign_rhs_code (oe1def); 1175 if ((dcode != MULT_EXPR 1176 && dcode != RDIV_EXPR) 1177 || !is_reassociable_op (oe1def, dcode, loop)) 1178 continue; 1179 1180 SET_BIT (candidates, i); 1181 nr_candidates++; 1182 } 1183 1184 if (nr_candidates < 2) 1185 { 1186 sbitmap_free (candidates); 1187 return false; 1188 } 1189 1190 if (dump_file && (dump_flags & TDF_DETAILS)) 1191 { 1192 fprintf (dump_file, "searching for un-distribute opportunities "); 1193 print_generic_expr (dump_file, 1194 VEC_index (operand_entry_t, *ops, 1195 sbitmap_first_set_bit (candidates))->op, 0); 1196 fprintf (dump_file, " %d\n", nr_candidates); 1197 } 1198 1199 /* Build linearized sub-operand lists and the counting table. */ 1200 cvec = NULL; 1201 ctable = htab_create (15, oecount_hash, oecount_eq, NULL); 1202 subops = XCNEWVEC (VEC (operand_entry_t, heap) *, 1203 VEC_length (operand_entry_t, *ops)); 1204 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) 1205 { 1206 gimple oedef; 1207 enum tree_code oecode; 1208 unsigned j; 1209 1210 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op); 1211 oecode = gimple_assign_rhs_code (oedef); 1212 linearize_expr_tree (&subops[i], oedef, 1213 associative_tree_code (oecode), false); 1214 1215 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) 1216 { 1217 oecount c; 1218 void **slot; 1219 size_t idx; 1220 c.oecode = oecode; 1221 c.cnt = 1; 1222 c.id = next_oecount_id++; 1223 c.op = oe1->op; 1224 VEC_safe_push (oecount, heap, cvec, &c); 1225 idx = VEC_length (oecount, cvec) + 41; 1226 slot = htab_find_slot (ctable, (void *)idx, INSERT); 1227 if (!*slot) 1228 { 1229 *slot = (void *)idx; 1230 } 1231 else 1232 { 1233 VEC_pop (oecount, cvec); 1234 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++; 1235 } 1236 } 1237 } 1238 htab_delete (ctable); 1239 1240 /* Sort the counting table. */ 1241 VEC_qsort (oecount, cvec, oecount_cmp); 1242 1243 if (dump_file && (dump_flags & TDF_DETAILS)) 1244 { 1245 oecount *c; 1246 fprintf (dump_file, "Candidates:\n"); 1247 FOR_EACH_VEC_ELT (oecount, cvec, j, c) 1248 { 1249 fprintf (dump_file, " %u %s: ", c->cnt, 1250 c->oecode == MULT_EXPR 1251 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1252 print_generic_expr (dump_file, c->op, 0); 1253 fprintf (dump_file, "\n"); 1254 } 1255 } 1256 1257 /* Process the (operand, code) pairs in order of most occurence. */ 1258 candidates2 = sbitmap_alloc (length); 1259 while (!VEC_empty (oecount, cvec)) 1260 { 1261 oecount *c = VEC_last (oecount, cvec); 1262 if (c->cnt < 2) 1263 break; 1264 1265 /* Now collect the operands in the outer chain that contain 1266 the common operand in their inner chain. */ 1267 sbitmap_zero (candidates2); 1268 nr_candidates2 = 0; 1269 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) 1270 { 1271 gimple oedef; 1272 enum tree_code oecode; 1273 unsigned j; 1274 tree op = VEC_index (operand_entry_t, *ops, i)->op; 1275 1276 /* If we undistributed in this chain already this may be 1277 a constant. */ 1278 if (TREE_CODE (op) != SSA_NAME) 1279 continue; 1280 1281 oedef = SSA_NAME_DEF_STMT (op); 1282 oecode = gimple_assign_rhs_code (oedef); 1283 if (oecode != c->oecode) 1284 continue; 1285 1286 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1) 1287 { 1288 if (oe1->op == c->op) 1289 { 1290 SET_BIT (candidates2, i); 1291 ++nr_candidates2; 1292 break; 1293 } 1294 } 1295 } 1296 1297 if (nr_candidates2 >= 2) 1298 { 1299 operand_entry_t oe1, oe2; 1300 tree tmpvar; 1301 gimple prod; 1302 int first = sbitmap_first_set_bit (candidates2); 1303 1304 /* Build the new addition chain. */ 1305 oe1 = VEC_index (operand_entry_t, *ops, first); 1306 if (dump_file && (dump_flags & TDF_DETAILS)) 1307 { 1308 fprintf (dump_file, "Building ("); 1309 print_generic_expr (dump_file, oe1->op, 0); 1310 } 1311 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL); 1312 add_referenced_var (tmpvar); 1313 zero_one_operation (&oe1->op, c->oecode, c->op); 1314 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0) 1315 { 1316 gimple sum; 1317 oe2 = VEC_index (operand_entry_t, *ops, i); 1318 if (dump_file && (dump_flags & TDF_DETAILS)) 1319 { 1320 fprintf (dump_file, " + "); 1321 print_generic_expr (dump_file, oe2->op, 0); 1322 } 1323 zero_one_operation (&oe2->op, c->oecode, c->op); 1324 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode); 1325 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1326 oe2->rank = 0; 1327 oe1->op = gimple_get_lhs (sum); 1328 } 1329 1330 /* Apply the multiplication/division. */ 1331 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode); 1332 if (dump_file && (dump_flags & TDF_DETAILS)) 1333 { 1334 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1335 print_generic_expr (dump_file, c->op, 0); 1336 fprintf (dump_file, "\n"); 1337 } 1338 1339 /* Record it in the addition chain and disable further 1340 undistribution with this op. */ 1341 oe1->op = gimple_assign_lhs (prod); 1342 oe1->rank = get_rank (oe1->op); 1343 VEC_free (operand_entry_t, heap, subops[first]); 1344 1345 changed = true; 1346 } 1347 1348 VEC_pop (oecount, cvec); 1349 } 1350 1351 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i) 1352 VEC_free (operand_entry_t, heap, subops[i]); 1353 free (subops); 1354 VEC_free (oecount, heap, cvec); 1355 sbitmap_free (candidates); 1356 sbitmap_free (candidates2); 1357 1358 return changed; 1359 } 1360 1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1362 expression, examine the other OPS to see if any of them are comparisons 1363 of the same values, which we may be able to combine or eliminate. 1364 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1365 1366 static bool 1367 eliminate_redundant_comparison (enum tree_code opcode, 1368 VEC (operand_entry_t, heap) **ops, 1369 unsigned int currindex, 1370 operand_entry_t curr) 1371 { 1372 tree op1, op2; 1373 enum tree_code lcode, rcode; 1374 gimple def1, def2; 1375 int i; 1376 operand_entry_t oe; 1377 1378 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1379 return false; 1380 1381 /* Check that CURR is a comparison. */ 1382 if (TREE_CODE (curr->op) != SSA_NAME) 1383 return false; 1384 def1 = SSA_NAME_DEF_STMT (curr->op); 1385 if (!is_gimple_assign (def1)) 1386 return false; 1387 lcode = gimple_assign_rhs_code (def1); 1388 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1389 return false; 1390 op1 = gimple_assign_rhs1 (def1); 1391 op2 = gimple_assign_rhs2 (def1); 1392 1393 /* Now look for a similar comparison in the remaining OPS. */ 1394 for (i = currindex + 1; 1395 VEC_iterate (operand_entry_t, *ops, i, oe); 1396 i++) 1397 { 1398 tree t; 1399 1400 if (TREE_CODE (oe->op) != SSA_NAME) 1401 continue; 1402 def2 = SSA_NAME_DEF_STMT (oe->op); 1403 if (!is_gimple_assign (def2)) 1404 continue; 1405 rcode = gimple_assign_rhs_code (def2); 1406 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1407 continue; 1408 1409 /* If we got here, we have a match. See if we can combine the 1410 two comparisons. */ 1411 if (opcode == BIT_IOR_EXPR) 1412 t = maybe_fold_or_comparisons (lcode, op1, op2, 1413 rcode, gimple_assign_rhs1 (def2), 1414 gimple_assign_rhs2 (def2)); 1415 else 1416 t = maybe_fold_and_comparisons (lcode, op1, op2, 1417 rcode, gimple_assign_rhs1 (def2), 1418 gimple_assign_rhs2 (def2)); 1419 if (!t) 1420 continue; 1421 1422 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1423 always give us a boolean_type_node value back. If the original 1424 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1425 we need to convert. */ 1426 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1427 t = fold_convert (TREE_TYPE (curr->op), t); 1428 1429 if (TREE_CODE (t) != INTEGER_CST 1430 && !operand_equal_p (t, curr->op, 0)) 1431 { 1432 enum tree_code subcode; 1433 tree newop1, newop2; 1434 if (!COMPARISON_CLASS_P (t)) 1435 continue; 1436 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1437 STRIP_USELESS_TYPE_CONVERSION (newop1); 1438 STRIP_USELESS_TYPE_CONVERSION (newop2); 1439 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1440 continue; 1441 } 1442 1443 if (dump_file && (dump_flags & TDF_DETAILS)) 1444 { 1445 fprintf (dump_file, "Equivalence: "); 1446 print_generic_expr (dump_file, curr->op, 0); 1447 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1448 print_generic_expr (dump_file, oe->op, 0); 1449 fprintf (dump_file, " -> "); 1450 print_generic_expr (dump_file, t, 0); 1451 fprintf (dump_file, "\n"); 1452 } 1453 1454 /* Now we can delete oe, as it has been subsumed by the new combined 1455 expression t. */ 1456 VEC_ordered_remove (operand_entry_t, *ops, i); 1457 reassociate_stats.ops_eliminated ++; 1458 1459 /* If t is the same as curr->op, we're done. Otherwise we must 1460 replace curr->op with t. Special case is if we got a constant 1461 back, in which case we add it to the end instead of in place of 1462 the current entry. */ 1463 if (TREE_CODE (t) == INTEGER_CST) 1464 { 1465 VEC_ordered_remove (operand_entry_t, *ops, currindex); 1466 add_to_ops_vec (ops, t); 1467 } 1468 else if (!operand_equal_p (t, curr->op, 0)) 1469 { 1470 tree tmpvar; 1471 gimple sum; 1472 enum tree_code subcode; 1473 tree newop1; 1474 tree newop2; 1475 gcc_assert (COMPARISON_CLASS_P (t)); 1476 tmpvar = create_tmp_var (TREE_TYPE (t), NULL); 1477 add_referenced_var (tmpvar); 1478 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1479 STRIP_USELESS_TYPE_CONVERSION (newop1); 1480 STRIP_USELESS_TYPE_CONVERSION (newop2); 1481 gcc_checking_assert (is_gimple_val (newop1) 1482 && is_gimple_val (newop2)); 1483 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode); 1484 curr->op = gimple_get_lhs (sum); 1485 } 1486 return true; 1487 } 1488 1489 return false; 1490 } 1491 1492 /* Perform various identities and other optimizations on the list of 1493 operand entries, stored in OPS. The tree code for the binary 1494 operation between all the operands is OPCODE. */ 1495 1496 static void 1497 optimize_ops_list (enum tree_code opcode, 1498 VEC (operand_entry_t, heap) **ops) 1499 { 1500 unsigned int length = VEC_length (operand_entry_t, *ops); 1501 unsigned int i; 1502 operand_entry_t oe; 1503 operand_entry_t oelast = NULL; 1504 bool iterate = false; 1505 1506 if (length == 1) 1507 return; 1508 1509 oelast = VEC_last (operand_entry_t, *ops); 1510 1511 /* If the last two are constants, pop the constants off, merge them 1512 and try the next two. */ 1513 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1514 { 1515 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2); 1516 1517 if (oelm1->rank == 0 1518 && is_gimple_min_invariant (oelm1->op) 1519 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1520 TREE_TYPE (oelast->op))) 1521 { 1522 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1523 oelm1->op, oelast->op); 1524 1525 if (folded && is_gimple_min_invariant (folded)) 1526 { 1527 if (dump_file && (dump_flags & TDF_DETAILS)) 1528 fprintf (dump_file, "Merging constants\n"); 1529 1530 VEC_pop (operand_entry_t, *ops); 1531 VEC_pop (operand_entry_t, *ops); 1532 1533 add_to_ops_vec (ops, folded); 1534 reassociate_stats.constants_eliminated++; 1535 1536 optimize_ops_list (opcode, ops); 1537 return; 1538 } 1539 } 1540 } 1541 1542 eliminate_using_constants (opcode, ops); 1543 oelast = NULL; 1544 1545 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);) 1546 { 1547 bool done = false; 1548 1549 if (eliminate_not_pairs (opcode, ops, i, oe)) 1550 return; 1551 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 1552 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 1553 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 1554 { 1555 if (done) 1556 return; 1557 iterate = true; 1558 oelast = NULL; 1559 continue; 1560 } 1561 oelast = oe; 1562 i++; 1563 } 1564 1565 length = VEC_length (operand_entry_t, *ops); 1566 oelast = VEC_last (operand_entry_t, *ops); 1567 1568 if (iterate) 1569 optimize_ops_list (opcode, ops); 1570 } 1571 1572 /* The following functions are subroutines to optimize_range_tests and allow 1573 it to try to change a logical combination of comparisons into a range 1574 test. 1575 1576 For example, both 1577 X == 2 || X == 5 || X == 3 || X == 4 1578 and 1579 X >= 2 && X <= 5 1580 are converted to 1581 (unsigned) (X - 2) <= 3 1582 1583 For more information see comments above fold_test_range in fold-const.c, 1584 this implementation is for GIMPLE. */ 1585 1586 struct range_entry 1587 { 1588 tree exp; 1589 tree low; 1590 tree high; 1591 bool in_p; 1592 bool strict_overflow_p; 1593 unsigned int idx, next; 1594 }; 1595 1596 /* This is similar to make_range in fold-const.c, but on top of 1597 GIMPLE instead of trees. */ 1598 1599 static void 1600 init_range_entry (struct range_entry *r, tree exp) 1601 { 1602 int in_p; 1603 tree low, high; 1604 bool is_bool, strict_overflow_p; 1605 1606 r->exp = NULL_TREE; 1607 r->in_p = false; 1608 r->strict_overflow_p = false; 1609 r->low = NULL_TREE; 1610 r->high = NULL_TREE; 1611 if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))) 1612 return; 1613 1614 /* Start with simply saying "EXP != 0" and then look at the code of EXP 1615 and see if we can refine the range. Some of the cases below may not 1616 happen, but it doesn't seem worth worrying about this. We "continue" 1617 the outer loop when we've changed something; otherwise we "break" 1618 the switch, which will "break" the while. */ 1619 low = build_int_cst (TREE_TYPE (exp), 0); 1620 high = low; 1621 in_p = 0; 1622 strict_overflow_p = false; 1623 is_bool = false; 1624 if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 1625 { 1626 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 1627 is_bool = true; 1628 else 1629 return; 1630 } 1631 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 1632 is_bool = true; 1633 1634 while (1) 1635 { 1636 gimple stmt; 1637 enum tree_code code; 1638 tree arg0, arg1, exp_type; 1639 tree nexp; 1640 location_t loc; 1641 1642 if (TREE_CODE (exp) != SSA_NAME) 1643 break; 1644 1645 stmt = SSA_NAME_DEF_STMT (exp); 1646 if (!is_gimple_assign (stmt)) 1647 break; 1648 1649 code = gimple_assign_rhs_code (stmt); 1650 arg0 = gimple_assign_rhs1 (stmt); 1651 if (TREE_CODE (arg0) != SSA_NAME) 1652 break; 1653 arg1 = gimple_assign_rhs2 (stmt); 1654 exp_type = TREE_TYPE (exp); 1655 loc = gimple_location (stmt); 1656 switch (code) 1657 { 1658 case BIT_NOT_EXPR: 1659 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 1660 { 1661 in_p = !in_p; 1662 exp = arg0; 1663 continue; 1664 } 1665 break; 1666 case SSA_NAME: 1667 exp = arg0; 1668 continue; 1669 CASE_CONVERT: 1670 if (is_bool) 1671 goto do_default; 1672 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 1673 { 1674 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 1675 is_bool = true; 1676 else 1677 return; 1678 } 1679 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 1680 is_bool = true; 1681 goto do_default; 1682 case EQ_EXPR: 1683 case NE_EXPR: 1684 case LT_EXPR: 1685 case LE_EXPR: 1686 case GE_EXPR: 1687 case GT_EXPR: 1688 is_bool = true; 1689 /* FALLTHRU */ 1690 default: 1691 if (!is_bool) 1692 return; 1693 do_default: 1694 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 1695 &low, &high, &in_p, 1696 &strict_overflow_p); 1697 if (nexp != NULL_TREE) 1698 { 1699 exp = nexp; 1700 gcc_assert (TREE_CODE (exp) == SSA_NAME); 1701 continue; 1702 } 1703 break; 1704 } 1705 break; 1706 } 1707 if (is_bool) 1708 { 1709 r->exp = exp; 1710 r->in_p = in_p; 1711 r->low = low; 1712 r->high = high; 1713 r->strict_overflow_p = strict_overflow_p; 1714 } 1715 } 1716 1717 /* Comparison function for qsort. Sort entries 1718 without SSA_NAME exp first, then with SSA_NAMEs sorted 1719 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 1720 by increasing ->low and if ->low is the same, by increasing 1721 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 1722 maximum. */ 1723 1724 static int 1725 range_entry_cmp (const void *a, const void *b) 1726 { 1727 const struct range_entry *p = (const struct range_entry *) a; 1728 const struct range_entry *q = (const struct range_entry *) b; 1729 1730 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 1731 { 1732 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 1733 { 1734 /* Group range_entries for the same SSA_NAME together. */ 1735 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 1736 return -1; 1737 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 1738 return 1; 1739 /* If ->low is different, NULL low goes first, then by 1740 ascending low. */ 1741 if (p->low != NULL_TREE) 1742 { 1743 if (q->low != NULL_TREE) 1744 { 1745 tree tem = fold_binary (LT_EXPR, boolean_type_node, 1746 p->low, q->low); 1747 if (tem && integer_onep (tem)) 1748 return -1; 1749 tem = fold_binary (GT_EXPR, boolean_type_node, 1750 p->low, q->low); 1751 if (tem && integer_onep (tem)) 1752 return 1; 1753 } 1754 else 1755 return 1; 1756 } 1757 else if (q->low != NULL_TREE) 1758 return -1; 1759 /* If ->high is different, NULL high goes last, before that by 1760 ascending high. */ 1761 if (p->high != NULL_TREE) 1762 { 1763 if (q->high != NULL_TREE) 1764 { 1765 tree tem = fold_binary (LT_EXPR, boolean_type_node, 1766 p->high, q->high); 1767 if (tem && integer_onep (tem)) 1768 return -1; 1769 tem = fold_binary (GT_EXPR, boolean_type_node, 1770 p->high, q->high); 1771 if (tem && integer_onep (tem)) 1772 return 1; 1773 } 1774 else 1775 return -1; 1776 } 1777 else if (p->high != NULL_TREE) 1778 return 1; 1779 /* If both ranges are the same, sort below by ascending idx. */ 1780 } 1781 else 1782 return 1; 1783 } 1784 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 1785 return -1; 1786 1787 if (p->idx < q->idx) 1788 return -1; 1789 else 1790 { 1791 gcc_checking_assert (p->idx > q->idx); 1792 return 1; 1793 } 1794 } 1795 1796 /* Helper routine of optimize_range_test. 1797 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 1798 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 1799 OPCODE and OPS are arguments of optimize_range_tests. Return 1800 true if the range merge has been successful. */ 1801 1802 static bool 1803 update_range_test (struct range_entry *range, struct range_entry *otherrange, 1804 unsigned int count, enum tree_code opcode, 1805 VEC (operand_entry_t, heap) **ops, tree exp, bool in_p, 1806 tree low, tree high, bool strict_overflow_p) 1807 { 1808 tree op = VEC_index (operand_entry_t, *ops, range->idx)->op; 1809 location_t loc = gimple_location (SSA_NAME_DEF_STMT (op)); 1810 tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high); 1811 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 1812 gimple_stmt_iterator gsi; 1813 1814 if (tem == NULL_TREE) 1815 return false; 1816 1817 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 1818 warning_at (loc, OPT_Wstrict_overflow, 1819 "assuming signed overflow does not occur " 1820 "when simplifying range test"); 1821 1822 if (dump_file && (dump_flags & TDF_DETAILS)) 1823 { 1824 struct range_entry *r; 1825 fprintf (dump_file, "Optimizing range tests "); 1826 print_generic_expr (dump_file, range->exp, 0); 1827 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 1828 print_generic_expr (dump_file, range->low, 0); 1829 fprintf (dump_file, ", "); 1830 print_generic_expr (dump_file, range->high, 0); 1831 fprintf (dump_file, "]"); 1832 for (r = otherrange; r < otherrange + count; r++) 1833 { 1834 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 1835 print_generic_expr (dump_file, r->low, 0); 1836 fprintf (dump_file, ", "); 1837 print_generic_expr (dump_file, r->high, 0); 1838 fprintf (dump_file, "]"); 1839 } 1840 fprintf (dump_file, "\n into "); 1841 print_generic_expr (dump_file, tem, 0); 1842 fprintf (dump_file, "\n"); 1843 } 1844 1845 if (opcode == BIT_IOR_EXPR) 1846 tem = invert_truthvalue_loc (loc, tem); 1847 1848 tem = fold_convert_loc (loc, TREE_TYPE (op), tem); 1849 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op)); 1850 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 1851 GSI_SAME_STMT); 1852 1853 VEC_index (operand_entry_t, *ops, range->idx)->op = tem; 1854 range->exp = exp; 1855 range->low = low; 1856 range->high = high; 1857 range->in_p = in_p; 1858 range->strict_overflow_p = false; 1859 1860 for (range = otherrange; range < otherrange + count; range++) 1861 { 1862 VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node; 1863 range->exp = NULL_TREE; 1864 } 1865 return true; 1866 } 1867 1868 /* Optimize range tests, similarly how fold_range_test optimizes 1869 it on trees. The tree code for the binary 1870 operation between all the operands is OPCODE. */ 1871 1872 static void 1873 optimize_range_tests (enum tree_code opcode, 1874 VEC (operand_entry_t, heap) **ops) 1875 { 1876 unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first; 1877 operand_entry_t oe; 1878 struct range_entry *ranges; 1879 bool any_changes = false; 1880 1881 if (length == 1) 1882 return; 1883 1884 ranges = XNEWVEC (struct range_entry, length); 1885 for (i = 0; i < length; i++) 1886 { 1887 ranges[i].idx = i; 1888 init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op); 1889 /* For | invert it now, we will invert it again before emitting 1890 the optimized expression. */ 1891 if (opcode == BIT_IOR_EXPR) 1892 ranges[i].in_p = !ranges[i].in_p; 1893 } 1894 1895 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 1896 for (i = 0; i < length; i++) 1897 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 1898 break; 1899 1900 /* Try to merge ranges. */ 1901 for (first = i; i < length; i++) 1902 { 1903 tree low = ranges[i].low; 1904 tree high = ranges[i].high; 1905 int in_p = ranges[i].in_p; 1906 bool strict_overflow_p = ranges[i].strict_overflow_p; 1907 int update_fail_count = 0; 1908 1909 for (j = i + 1; j < length; j++) 1910 { 1911 if (ranges[i].exp != ranges[j].exp) 1912 break; 1913 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 1914 ranges[j].in_p, ranges[j].low, ranges[j].high)) 1915 break; 1916 strict_overflow_p |= ranges[j].strict_overflow_p; 1917 } 1918 1919 if (j == i + 1) 1920 continue; 1921 1922 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, 1923 ops, ranges[i].exp, in_p, low, high, 1924 strict_overflow_p)) 1925 { 1926 i = j - 1; 1927 any_changes = true; 1928 } 1929 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 1930 while update_range_test would fail. */ 1931 else if (update_fail_count == 64) 1932 i = j - 1; 1933 else 1934 ++update_fail_count; 1935 } 1936 1937 /* Optimize X == CST1 || X == CST2 1938 if popcount (CST1 ^ CST2) == 1 into 1939 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 1940 Similarly for ranges. E.g. 1941 X != 2 && X != 3 && X != 10 && X != 11 1942 will be transformed by the above loop into 1943 (X - 2U) <= 1U && (X - 10U) <= 1U 1944 and this loop can transform that into 1945 ((X & ~8) - 2U) <= 1U. */ 1946 for (i = first; i < length; i++) 1947 { 1948 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; 1949 1950 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 1951 continue; 1952 type = TREE_TYPE (ranges[i].exp); 1953 if (!INTEGRAL_TYPE_P (type)) 1954 continue; 1955 lowi = ranges[i].low; 1956 if (lowi == NULL_TREE) 1957 lowi = TYPE_MIN_VALUE (type); 1958 highi = ranges[i].high; 1959 if (highi == NULL_TREE) 1960 continue; 1961 for (j = i + 1; j < length && j < i + 64; j++) 1962 { 1963 if (ranges[j].exp == NULL_TREE) 1964 continue; 1965 if (ranges[i].exp != ranges[j].exp) 1966 break; 1967 if (ranges[j].in_p) 1968 continue; 1969 lowj = ranges[j].low; 1970 if (lowj == NULL_TREE) 1971 continue; 1972 highj = ranges[j].high; 1973 if (highj == NULL_TREE) 1974 highj = TYPE_MAX_VALUE (type); 1975 tem = fold_binary (GT_EXPR, boolean_type_node, 1976 lowj, highi); 1977 if (tem == NULL_TREE || !integer_onep (tem)) 1978 continue; 1979 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 1980 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 1981 continue; 1982 gcc_checking_assert (!integer_zerop (lowxor)); 1983 tem = fold_binary (MINUS_EXPR, type, lowxor, 1984 build_int_cst (type, 1)); 1985 if (tem == NULL_TREE) 1986 continue; 1987 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); 1988 if (tem == NULL_TREE || !integer_zerop (tem)) 1989 continue; 1990 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 1991 if (!tree_int_cst_equal (lowxor, highxor)) 1992 continue; 1993 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 1994 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); 1995 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 1996 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 1997 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, 1998 ranges[i].in_p, lowj, highj, 1999 ranges[i].strict_overflow_p 2000 || ranges[j].strict_overflow_p)) 2001 { 2002 any_changes = true; 2003 break; 2004 } 2005 } 2006 } 2007 2008 if (any_changes) 2009 { 2010 j = 0; 2011 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe) 2012 { 2013 if (oe->op == error_mark_node) 2014 continue; 2015 else if (i != j) 2016 VEC_replace (operand_entry_t, *ops, j, oe); 2017 j++; 2018 } 2019 VEC_truncate (operand_entry_t, *ops, j); 2020 } 2021 2022 XDELETEVEC (ranges); 2023 } 2024 2025 /* Return true if OPERAND is defined by a PHI node which uses the LHS 2026 of STMT in it's operands. This is also known as a "destructive 2027 update" operation. */ 2028 2029 static bool 2030 is_phi_for_stmt (gimple stmt, tree operand) 2031 { 2032 gimple def_stmt; 2033 tree lhs; 2034 use_operand_p arg_p; 2035 ssa_op_iter i; 2036 2037 if (TREE_CODE (operand) != SSA_NAME) 2038 return false; 2039 2040 lhs = gimple_assign_lhs (stmt); 2041 2042 def_stmt = SSA_NAME_DEF_STMT (operand); 2043 if (gimple_code (def_stmt) != GIMPLE_PHI) 2044 return false; 2045 2046 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) 2047 if (lhs == USE_FROM_PTR (arg_p)) 2048 return true; 2049 return false; 2050 } 2051 2052 /* Remove def stmt of VAR if VAR has zero uses and recurse 2053 on rhs1 operand if so. */ 2054 2055 static void 2056 remove_visited_stmt_chain (tree var) 2057 { 2058 gimple stmt; 2059 gimple_stmt_iterator gsi; 2060 2061 while (1) 2062 { 2063 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 2064 return; 2065 stmt = SSA_NAME_DEF_STMT (var); 2066 if (!is_gimple_assign (stmt) 2067 || !gimple_visited_p (stmt)) 2068 return; 2069 var = gimple_assign_rhs1 (stmt); 2070 gsi = gsi_for_stmt (stmt); 2071 gsi_remove (&gsi, true); 2072 release_defs (stmt); 2073 } 2074 } 2075 2076 /* This function checks three consequtive operands in 2077 passed operands vector OPS starting from OPINDEX and 2078 swaps two operands if it is profitable for binary operation 2079 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 2080 2081 We pair ops with the same rank if possible. 2082 2083 The alternative we try is to see if STMT is a destructive 2084 update style statement, which is like: 2085 b = phi (a, ...) 2086 a = c + b; 2087 In that case, we want to use the destructive update form to 2088 expose the possible vectorizer sum reduction opportunity. 2089 In that case, the third operand will be the phi node. This 2090 check is not performed if STMT is null. 2091 2092 We could, of course, try to be better as noted above, and do a 2093 lot of work to try to find these opportunities in >3 operand 2094 cases, but it is unlikely to be worth it. */ 2095 2096 static void 2097 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops, 2098 unsigned int opindex, gimple stmt) 2099 { 2100 operand_entry_t oe1, oe2, oe3; 2101 2102 oe1 = VEC_index (operand_entry_t, ops, opindex); 2103 oe2 = VEC_index (operand_entry_t, ops, opindex + 1); 2104 oe3 = VEC_index (operand_entry_t, ops, opindex + 2); 2105 2106 if ((oe1->rank == oe2->rank 2107 && oe2->rank != oe3->rank) 2108 || (stmt && is_phi_for_stmt (stmt, oe3->op) 2109 && !is_phi_for_stmt (stmt, oe1->op) 2110 && !is_phi_for_stmt (stmt, oe2->op))) 2111 { 2112 struct operand_entry temp = *oe3; 2113 oe3->op = oe1->op; 2114 oe3->rank = oe1->rank; 2115 oe1->op = temp.op; 2116 oe1->rank= temp.rank; 2117 } 2118 else if ((oe1->rank == oe3->rank 2119 && oe2->rank != oe3->rank) 2120 || (stmt && is_phi_for_stmt (stmt, oe2->op) 2121 && !is_phi_for_stmt (stmt, oe1->op) 2122 && !is_phi_for_stmt (stmt, oe3->op))) 2123 { 2124 struct operand_entry temp = *oe2; 2125 oe2->op = oe1->op; 2126 oe2->rank = oe1->rank; 2127 oe1->op = temp.op; 2128 oe1->rank= temp.rank; 2129 } 2130 } 2131 2132 /* Recursively rewrite our linearized statements so that the operators 2133 match those in OPS[OPINDEX], putting the computation in rank 2134 order. */ 2135 2136 static void 2137 rewrite_expr_tree (gimple stmt, unsigned int opindex, 2138 VEC(operand_entry_t, heap) * ops, bool moved) 2139 { 2140 tree rhs1 = gimple_assign_rhs1 (stmt); 2141 tree rhs2 = gimple_assign_rhs2 (stmt); 2142 operand_entry_t oe; 2143 2144 /* If we have three operands left, then we want to make sure the ones 2145 that get the double binary op are chosen wisely. */ 2146 if (opindex + 3 == VEC_length (operand_entry_t, ops)) 2147 swap_ops_for_binary_stmt (ops, opindex, stmt); 2148 2149 /* The final recursion case for this function is that you have 2150 exactly two operations left. 2151 If we had one exactly one op in the entire list to start with, we 2152 would have never called this function, and the tail recursion 2153 rewrites them one at a time. */ 2154 if (opindex + 2 == VEC_length (operand_entry_t, ops)) 2155 { 2156 operand_entry_t oe1, oe2; 2157 2158 oe1 = VEC_index (operand_entry_t, ops, opindex); 2159 oe2 = VEC_index (operand_entry_t, ops, opindex + 1); 2160 2161 if (rhs1 != oe1->op || rhs2 != oe2->op) 2162 { 2163 if (dump_file && (dump_flags & TDF_DETAILS)) 2164 { 2165 fprintf (dump_file, "Transforming "); 2166 print_gimple_stmt (dump_file, stmt, 0, 0); 2167 } 2168 2169 gimple_assign_set_rhs1 (stmt, oe1->op); 2170 gimple_assign_set_rhs2 (stmt, oe2->op); 2171 update_stmt (stmt); 2172 if (rhs1 != oe1->op && rhs1 != oe2->op) 2173 remove_visited_stmt_chain (rhs1); 2174 2175 if (dump_file && (dump_flags & TDF_DETAILS)) 2176 { 2177 fprintf (dump_file, " into "); 2178 print_gimple_stmt (dump_file, stmt, 0, 0); 2179 } 2180 2181 } 2182 return; 2183 } 2184 2185 /* If we hit here, we should have 3 or more ops left. */ 2186 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops)); 2187 2188 /* Rewrite the next operator. */ 2189 oe = VEC_index (operand_entry_t, ops, opindex); 2190 2191 if (oe->op != rhs2) 2192 { 2193 if (!moved) 2194 { 2195 gimple_stmt_iterator gsinow, gsirhs1; 2196 gimple stmt1 = stmt, stmt2; 2197 unsigned int count; 2198 2199 gsinow = gsi_for_stmt (stmt); 2200 count = VEC_length (operand_entry_t, ops) - opindex - 2; 2201 while (count-- != 0) 2202 { 2203 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); 2204 gsirhs1 = gsi_for_stmt (stmt2); 2205 gsi_move_before (&gsirhs1, &gsinow); 2206 gsi_prev (&gsinow); 2207 stmt1 = stmt2; 2208 } 2209 moved = true; 2210 } 2211 2212 if (dump_file && (dump_flags & TDF_DETAILS)) 2213 { 2214 fprintf (dump_file, "Transforming "); 2215 print_gimple_stmt (dump_file, stmt, 0, 0); 2216 } 2217 2218 gimple_assign_set_rhs2 (stmt, oe->op); 2219 update_stmt (stmt); 2220 2221 if (dump_file && (dump_flags & TDF_DETAILS)) 2222 { 2223 fprintf (dump_file, " into "); 2224 print_gimple_stmt (dump_file, stmt, 0, 0); 2225 } 2226 } 2227 /* Recurse on the LHS of the binary operator, which is guaranteed to 2228 be the non-leaf side. */ 2229 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); 2230 } 2231 2232 /* Find out how many cycles we need to compute statements chain. 2233 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 2234 maximum number of independent statements we may execute per cycle. */ 2235 2236 static int 2237 get_required_cycles (int ops_num, int cpu_width) 2238 { 2239 int res; 2240 int elog; 2241 unsigned int rest; 2242 2243 /* While we have more than 2 * cpu_width operands 2244 we may reduce number of operands by cpu_width 2245 per cycle. */ 2246 res = ops_num / (2 * cpu_width); 2247 2248 /* Remained operands count may be reduced twice per cycle 2249 until we have only one operand. */ 2250 rest = (unsigned)(ops_num - res * cpu_width); 2251 elog = exact_log2 (rest); 2252 if (elog >= 0) 2253 res += elog; 2254 else 2255 res += floor_log2 (rest) + 1; 2256 2257 return res; 2258 } 2259 2260 /* Returns an optimal number of registers to use for computation of 2261 given statements. */ 2262 2263 static int 2264 get_reassociation_width (int ops_num, enum tree_code opc, 2265 enum machine_mode mode) 2266 { 2267 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 2268 int width; 2269 int width_min; 2270 int cycles_best; 2271 2272 if (param_width > 0) 2273 width = param_width; 2274 else 2275 width = targetm.sched.reassociation_width (opc, mode); 2276 2277 if (width == 1) 2278 return width; 2279 2280 /* Get the minimal time required for sequence computation. */ 2281 cycles_best = get_required_cycles (ops_num, width); 2282 2283 /* Check if we may use less width and still compute sequence for 2284 the same time. It will allow us to reduce registers usage. 2285 get_required_cycles is monotonically increasing with lower width 2286 so we can perform a binary search for the minimal width that still 2287 results in the optimal cycle count. */ 2288 width_min = 1; 2289 while (width > width_min) 2290 { 2291 int width_mid = (width + width_min) / 2; 2292 2293 if (get_required_cycles (ops_num, width_mid) == cycles_best) 2294 width = width_mid; 2295 else if (width_min < width_mid) 2296 width_min = width_mid; 2297 else 2298 break; 2299 } 2300 2301 return width; 2302 } 2303 2304 /* Recursively rewrite our linearized statements so that the operators 2305 match those in OPS[OPINDEX], putting the computation in rank 2306 order and trying to allow operations to be executed in 2307 parallel. */ 2308 2309 static void 2310 rewrite_expr_tree_parallel (gimple stmt, int width, 2311 VEC(operand_entry_t, heap) * ops) 2312 { 2313 enum tree_code opcode = gimple_assign_rhs_code (stmt); 2314 int op_num = VEC_length (operand_entry_t, ops); 2315 gcc_assert (op_num > 0); 2316 int stmt_num = op_num - 1; 2317 gimple *stmts = XALLOCAVEC (gimple, stmt_num); 2318 int op_index = op_num - 1; 2319 int stmt_index = 0; 2320 int ready_stmts_end = 0; 2321 int i = 0; 2322 tree last_rhs1 = gimple_assign_rhs1 (stmt); 2323 tree lhs_var; 2324 2325 /* We start expression rewriting from the top statements. 2326 So, in this loop we create a full list of statements 2327 we will work with. */ 2328 stmts[stmt_num - 1] = stmt; 2329 for (i = stmt_num - 2; i >= 0; i--) 2330 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 2331 2332 lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL); 2333 add_referenced_var (lhs_var); 2334 2335 for (i = 0; i < stmt_num; i++) 2336 { 2337 tree op1, op2; 2338 2339 /* Determine whether we should use results of 2340 already handled statements or not. */ 2341 if (ready_stmts_end == 0 2342 && (i - stmt_index >= width || op_index < 1)) 2343 ready_stmts_end = i; 2344 2345 /* Now we choose operands for the next statement. Non zero 2346 value in ready_stmts_end means here that we should use 2347 the result of already generated statements as new operand. */ 2348 if (ready_stmts_end > 0) 2349 { 2350 op1 = gimple_assign_lhs (stmts[stmt_index++]); 2351 if (ready_stmts_end > stmt_index) 2352 op2 = gimple_assign_lhs (stmts[stmt_index++]); 2353 else if (op_index >= 0) 2354 op2 = VEC_index (operand_entry_t, ops, op_index--)->op; 2355 else 2356 { 2357 gcc_assert (stmt_index < i); 2358 op2 = gimple_assign_lhs (stmts[stmt_index++]); 2359 } 2360 2361 if (stmt_index >= ready_stmts_end) 2362 ready_stmts_end = 0; 2363 } 2364 else 2365 { 2366 if (op_index > 1) 2367 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 2368 op2 = VEC_index (operand_entry_t, ops, op_index--)->op; 2369 op1 = VEC_index (operand_entry_t, ops, op_index--)->op; 2370 } 2371 2372 /* If we emit the last statement then we should put 2373 operands into the last statement. It will also 2374 break the loop. */ 2375 if (op_index < 0 && stmt_index == i) 2376 i = stmt_num - 1; 2377 2378 if (dump_file && (dump_flags & TDF_DETAILS)) 2379 { 2380 fprintf (dump_file, "Transforming "); 2381 print_gimple_stmt (dump_file, stmts[i], 0, 0); 2382 } 2383 2384 /* We keep original statement only for the last one. All 2385 others are recreated. */ 2386 if (i == stmt_num - 1) 2387 { 2388 gimple_assign_set_rhs1 (stmts[i], op1); 2389 gimple_assign_set_rhs2 (stmts[i], op2); 2390 update_stmt (stmts[i]); 2391 } 2392 else 2393 stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode); 2394 2395 if (dump_file && (dump_flags & TDF_DETAILS)) 2396 { 2397 fprintf (dump_file, " into "); 2398 print_gimple_stmt (dump_file, stmts[i], 0, 0); 2399 } 2400 } 2401 2402 remove_visited_stmt_chain (last_rhs1); 2403 } 2404 2405 /* Transform STMT, which is really (A +B) + (C + D) into the left 2406 linear form, ((A+B)+C)+D. 2407 Recurse on D if necessary. */ 2408 2409 static void 2410 linearize_expr (gimple stmt) 2411 { 2412 gimple_stmt_iterator gsinow, gsirhs; 2413 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 2414 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 2415 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 2416 gimple newbinrhs = NULL; 2417 struct loop *loop = loop_containing_stmt (stmt); 2418 2419 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 2420 && is_reassociable_op (binrhs, rhscode, loop)); 2421 2422 gsinow = gsi_for_stmt (stmt); 2423 gsirhs = gsi_for_stmt (binrhs); 2424 gsi_move_before (&gsirhs, &gsinow); 2425 2426 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 2427 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); 2428 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 2429 2430 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 2431 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 2432 2433 if (dump_file && (dump_flags & TDF_DETAILS)) 2434 { 2435 fprintf (dump_file, "Linearized: "); 2436 print_gimple_stmt (dump_file, stmt, 0, 0); 2437 } 2438 2439 reassociate_stats.linearized++; 2440 update_stmt (binrhs); 2441 update_stmt (binlhs); 2442 update_stmt (stmt); 2443 2444 gimple_set_visited (stmt, true); 2445 gimple_set_visited (binlhs, true); 2446 gimple_set_visited (binrhs, true); 2447 2448 /* Tail recurse on the new rhs if it still needs reassociation. */ 2449 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 2450 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 2451 want to change the algorithm while converting to tuples. */ 2452 linearize_expr (stmt); 2453 } 2454 2455 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 2456 it. Otherwise, return NULL. */ 2457 2458 static gimple 2459 get_single_immediate_use (tree lhs) 2460 { 2461 use_operand_p immuse; 2462 gimple immusestmt; 2463 2464 if (TREE_CODE (lhs) == SSA_NAME 2465 && single_imm_use (lhs, &immuse, &immusestmt) 2466 && is_gimple_assign (immusestmt)) 2467 return immusestmt; 2468 2469 return NULL; 2470 } 2471 2472 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 2473 representing the negated value. Insertions of any necessary 2474 instructions go before GSI. 2475 This function is recursive in that, if you hand it "a_5" as the 2476 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 2477 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 2478 2479 static tree 2480 negate_value (tree tonegate, gimple_stmt_iterator *gsi) 2481 { 2482 gimple negatedefstmt= NULL; 2483 tree resultofnegate; 2484 2485 /* If we are trying to negate a name, defined by an add, negate the 2486 add operands instead. */ 2487 if (TREE_CODE (tonegate) == SSA_NAME) 2488 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 2489 if (TREE_CODE (tonegate) == SSA_NAME 2490 && is_gimple_assign (negatedefstmt) 2491 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 2492 && has_single_use (gimple_assign_lhs (negatedefstmt)) 2493 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 2494 { 2495 gimple_stmt_iterator gsi; 2496 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 2497 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 2498 2499 gsi = gsi_for_stmt (negatedefstmt); 2500 rhs1 = negate_value (rhs1, &gsi); 2501 gimple_assign_set_rhs1 (negatedefstmt, rhs1); 2502 2503 gsi = gsi_for_stmt (negatedefstmt); 2504 rhs2 = negate_value (rhs2, &gsi); 2505 gimple_assign_set_rhs2 (negatedefstmt, rhs2); 2506 2507 update_stmt (negatedefstmt); 2508 return gimple_assign_lhs (negatedefstmt); 2509 } 2510 2511 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 2512 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true, 2513 NULL_TREE, true, GSI_SAME_STMT); 2514 return resultofnegate; 2515 } 2516 2517 /* Return true if we should break up the subtract in STMT into an add 2518 with negate. This is true when we the subtract operands are really 2519 adds, or the subtract itself is used in an add expression. In 2520 either case, breaking up the subtract into an add with negate 2521 exposes the adds to reassociation. */ 2522 2523 static bool 2524 should_break_up_subtract (gimple stmt) 2525 { 2526 tree lhs = gimple_assign_lhs (stmt); 2527 tree binlhs = gimple_assign_rhs1 (stmt); 2528 tree binrhs = gimple_assign_rhs2 (stmt); 2529 gimple immusestmt; 2530 struct loop *loop = loop_containing_stmt (stmt); 2531 2532 if (TREE_CODE (binlhs) == SSA_NAME 2533 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 2534 return true; 2535 2536 if (TREE_CODE (binrhs) == SSA_NAME 2537 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 2538 return true; 2539 2540 if (TREE_CODE (lhs) == SSA_NAME 2541 && (immusestmt = get_single_immediate_use (lhs)) 2542 && is_gimple_assign (immusestmt) 2543 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 2544 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 2545 return true; 2546 return false; 2547 } 2548 2549 /* Transform STMT from A - B into A + -B. */ 2550 2551 static void 2552 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) 2553 { 2554 tree rhs1 = gimple_assign_rhs1 (stmt); 2555 tree rhs2 = gimple_assign_rhs2 (stmt); 2556 2557 if (dump_file && (dump_flags & TDF_DETAILS)) 2558 { 2559 fprintf (dump_file, "Breaking up subtract "); 2560 print_gimple_stmt (dump_file, stmt, 0, 0); 2561 } 2562 2563 rhs2 = negate_value (rhs2, gsip); 2564 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 2565 update_stmt (stmt); 2566 } 2567 2568 /* Recursively linearize a binary expression that is the RHS of STMT. 2569 Place the operands of the expression tree in the vector named OPS. */ 2570 2571 static void 2572 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt, 2573 bool is_associative, bool set_visited) 2574 { 2575 tree binlhs = gimple_assign_rhs1 (stmt); 2576 tree binrhs = gimple_assign_rhs2 (stmt); 2577 gimple binlhsdef, binrhsdef; 2578 bool binlhsisreassoc = false; 2579 bool binrhsisreassoc = false; 2580 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 2581 struct loop *loop = loop_containing_stmt (stmt); 2582 2583 if (set_visited) 2584 gimple_set_visited (stmt, true); 2585 2586 if (TREE_CODE (binlhs) == SSA_NAME) 2587 { 2588 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 2589 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 2590 && !stmt_could_throw_p (binlhsdef)); 2591 } 2592 2593 if (TREE_CODE (binrhs) == SSA_NAME) 2594 { 2595 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 2596 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 2597 && !stmt_could_throw_p (binrhsdef)); 2598 } 2599 2600 /* If the LHS is not reassociable, but the RHS is, we need to swap 2601 them. If neither is reassociable, there is nothing we can do, so 2602 just put them in the ops vector. If the LHS is reassociable, 2603 linearize it. If both are reassociable, then linearize the RHS 2604 and the LHS. */ 2605 2606 if (!binlhsisreassoc) 2607 { 2608 tree temp; 2609 2610 /* If this is not a associative operation like division, give up. */ 2611 if (!is_associative) 2612 { 2613 add_to_ops_vec (ops, binrhs); 2614 return; 2615 } 2616 2617 if (!binrhsisreassoc) 2618 { 2619 add_to_ops_vec (ops, binrhs); 2620 add_to_ops_vec (ops, binlhs); 2621 return; 2622 } 2623 2624 if (dump_file && (dump_flags & TDF_DETAILS)) 2625 { 2626 fprintf (dump_file, "swapping operands of "); 2627 print_gimple_stmt (dump_file, stmt, 0, 0); 2628 } 2629 2630 swap_tree_operands (stmt, 2631 gimple_assign_rhs1_ptr (stmt), 2632 gimple_assign_rhs2_ptr (stmt)); 2633 update_stmt (stmt); 2634 2635 if (dump_file && (dump_flags & TDF_DETAILS)) 2636 { 2637 fprintf (dump_file, " is now "); 2638 print_gimple_stmt (dump_file, stmt, 0, 0); 2639 } 2640 2641 /* We want to make it so the lhs is always the reassociative op, 2642 so swap. */ 2643 temp = binlhs; 2644 binlhs = binrhs; 2645 binrhs = temp; 2646 } 2647 else if (binrhsisreassoc) 2648 { 2649 linearize_expr (stmt); 2650 binlhs = gimple_assign_rhs1 (stmt); 2651 binrhs = gimple_assign_rhs2 (stmt); 2652 } 2653 2654 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 2655 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 2656 rhscode, loop)); 2657 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 2658 is_associative, set_visited); 2659 add_to_ops_vec (ops, binrhs); 2660 } 2661 2662 /* Repropagate the negates back into subtracts, since no other pass 2663 currently does it. */ 2664 2665 static void 2666 repropagate_negates (void) 2667 { 2668 unsigned int i = 0; 2669 tree negate; 2670 2671 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate) 2672 { 2673 gimple user = get_single_immediate_use (negate); 2674 2675 if (!user || !is_gimple_assign (user)) 2676 continue; 2677 2678 /* The negate operand can be either operand of a PLUS_EXPR 2679 (it can be the LHS if the RHS is a constant for example). 2680 2681 Force the negate operand to the RHS of the PLUS_EXPR, then 2682 transform the PLUS_EXPR into a MINUS_EXPR. */ 2683 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 2684 { 2685 /* If the negated operand appears on the LHS of the 2686 PLUS_EXPR, exchange the operands of the PLUS_EXPR 2687 to force the negated operand to the RHS of the PLUS_EXPR. */ 2688 if (gimple_assign_rhs1 (user) == negate) 2689 { 2690 swap_tree_operands (user, 2691 gimple_assign_rhs1_ptr (user), 2692 gimple_assign_rhs2_ptr (user)); 2693 } 2694 2695 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 2696 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 2697 if (gimple_assign_rhs2 (user) == negate) 2698 { 2699 tree rhs1 = gimple_assign_rhs1 (user); 2700 tree rhs2 = get_unary_op (negate, NEGATE_EXPR); 2701 gimple_stmt_iterator gsi = gsi_for_stmt (user); 2702 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 2703 update_stmt (user); 2704 } 2705 } 2706 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 2707 { 2708 if (gimple_assign_rhs1 (user) == negate) 2709 { 2710 /* We have 2711 x = -a 2712 y = x - b 2713 which we transform into 2714 x = a + b 2715 y = -x . 2716 This pushes down the negate which we possibly can merge 2717 into some other operation, hence insert it into the 2718 plus_negates vector. */ 2719 gimple feed = SSA_NAME_DEF_STMT (negate); 2720 tree a = gimple_assign_rhs1 (feed); 2721 tree rhs2 = gimple_assign_rhs2 (user); 2722 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2; 2723 gimple_replace_lhs (feed, negate); 2724 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2); 2725 update_stmt (gsi_stmt (gsi)); 2726 gsi2 = gsi_for_stmt (user); 2727 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL); 2728 update_stmt (gsi_stmt (gsi2)); 2729 gsi_move_before (&gsi, &gsi2); 2730 VEC_safe_push (tree, heap, plus_negates, 2731 gimple_assign_lhs (gsi_stmt (gsi2))); 2732 } 2733 else 2734 { 2735 /* Transform "x = -a; y = b - x" into "y = b + a", getting 2736 rid of one operation. */ 2737 gimple feed = SSA_NAME_DEF_STMT (negate); 2738 tree a = gimple_assign_rhs1 (feed); 2739 tree rhs1 = gimple_assign_rhs1 (user); 2740 gimple_stmt_iterator gsi = gsi_for_stmt (user); 2741 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 2742 update_stmt (gsi_stmt (gsi)); 2743 } 2744 } 2745 } 2746 } 2747 2748 /* Returns true if OP is of a type for which we can do reassociation. 2749 That is for integral or non-saturating fixed-point types, and for 2750 floating point type when associative-math is enabled. */ 2751 2752 static bool 2753 can_reassociate_p (tree op) 2754 { 2755 tree type = TREE_TYPE (op); 2756 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 2757 || NON_SAT_FIXED_POINT_TYPE_P (type) 2758 || (flag_associative_math && FLOAT_TYPE_P (type))) 2759 return true; 2760 return false; 2761 } 2762 2763 /* Break up subtract operations in block BB. 2764 2765 We do this top down because we don't know whether the subtract is 2766 part of a possible chain of reassociation except at the top. 2767 2768 IE given 2769 d = f + g 2770 c = a + e 2771 b = c - d 2772 q = b - r 2773 k = t - q 2774 2775 we want to break up k = t - q, but we won't until we've transformed q 2776 = b - r, which won't be broken up until we transform b = c - d. 2777 2778 En passant, clear the GIMPLE visited flag on every statement. */ 2779 2780 static void 2781 break_up_subtract_bb (basic_block bb) 2782 { 2783 gimple_stmt_iterator gsi; 2784 basic_block son; 2785 2786 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2787 { 2788 gimple stmt = gsi_stmt (gsi); 2789 gimple_set_visited (stmt, false); 2790 2791 if (!is_gimple_assign (stmt) 2792 || !can_reassociate_p (gimple_assign_lhs (stmt))) 2793 continue; 2794 2795 /* Look for simple gimple subtract operations. */ 2796 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 2797 { 2798 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 2799 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 2800 continue; 2801 2802 /* Check for a subtract used only in an addition. If this 2803 is the case, transform it into add of a negate for better 2804 reassociation. IE transform C = A-B into C = A + -B if C 2805 is only used in an addition. */ 2806 if (should_break_up_subtract (stmt)) 2807 break_up_subtract (stmt, &gsi); 2808 } 2809 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 2810 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 2811 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt)); 2812 } 2813 for (son = first_dom_son (CDI_DOMINATORS, bb); 2814 son; 2815 son = next_dom_son (CDI_DOMINATORS, son)) 2816 break_up_subtract_bb (son); 2817 } 2818 2819 /* Reassociate expressions in basic block BB and its post-dominator as 2820 children. */ 2821 2822 static void 2823 reassociate_bb (basic_block bb) 2824 { 2825 gimple_stmt_iterator gsi; 2826 basic_block son; 2827 2828 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 2829 { 2830 gimple stmt = gsi_stmt (gsi); 2831 2832 if (is_gimple_assign (stmt) 2833 && !stmt_could_throw_p (stmt)) 2834 { 2835 tree lhs, rhs1, rhs2; 2836 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 2837 2838 /* If this is not a gimple binary expression, there is 2839 nothing for us to do with it. */ 2840 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 2841 continue; 2842 2843 /* If this was part of an already processed statement, 2844 we don't need to touch it again. */ 2845 if (gimple_visited_p (stmt)) 2846 { 2847 /* This statement might have become dead because of previous 2848 reassociations. */ 2849 if (has_zero_uses (gimple_get_lhs (stmt))) 2850 { 2851 gsi_remove (&gsi, true); 2852 release_defs (stmt); 2853 /* We might end up removing the last stmt above which 2854 places the iterator to the end of the sequence. 2855 Reset it to the last stmt in this case which might 2856 be the end of the sequence as well if we removed 2857 the last statement of the sequence. In which case 2858 we need to bail out. */ 2859 if (gsi_end_p (gsi)) 2860 { 2861 gsi = gsi_last_bb (bb); 2862 if (gsi_end_p (gsi)) 2863 break; 2864 } 2865 } 2866 continue; 2867 } 2868 2869 lhs = gimple_assign_lhs (stmt); 2870 rhs1 = gimple_assign_rhs1 (stmt); 2871 rhs2 = gimple_assign_rhs2 (stmt); 2872 2873 /* For non-bit or min/max operations we can't associate 2874 all types. Verify that here. */ 2875 if (rhs_code != BIT_IOR_EXPR 2876 && rhs_code != BIT_AND_EXPR 2877 && rhs_code != BIT_XOR_EXPR 2878 && rhs_code != MIN_EXPR 2879 && rhs_code != MAX_EXPR 2880 && (!can_reassociate_p (lhs) 2881 || !can_reassociate_p (rhs1) 2882 || !can_reassociate_p (rhs2))) 2883 continue; 2884 2885 if (associative_tree_code (rhs_code)) 2886 { 2887 VEC(operand_entry_t, heap) *ops = NULL; 2888 2889 /* There may be no immediate uses left by the time we 2890 get here because we may have eliminated them all. */ 2891 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 2892 continue; 2893 2894 gimple_set_visited (stmt, true); 2895 linearize_expr_tree (&ops, stmt, true, true); 2896 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank); 2897 optimize_ops_list (rhs_code, &ops); 2898 if (undistribute_ops_list (rhs_code, &ops, 2899 loop_containing_stmt (stmt))) 2900 { 2901 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank); 2902 optimize_ops_list (rhs_code, &ops); 2903 } 2904 2905 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 2906 optimize_range_tests (rhs_code, &ops); 2907 2908 if (VEC_length (operand_entry_t, ops) == 1) 2909 { 2910 if (dump_file && (dump_flags & TDF_DETAILS)) 2911 { 2912 fprintf (dump_file, "Transforming "); 2913 print_gimple_stmt (dump_file, stmt, 0, 0); 2914 } 2915 2916 rhs1 = gimple_assign_rhs1 (stmt); 2917 gimple_assign_set_rhs_from_tree (&gsi, 2918 VEC_last (operand_entry_t, 2919 ops)->op); 2920 update_stmt (stmt); 2921 remove_visited_stmt_chain (rhs1); 2922 2923 if (dump_file && (dump_flags & TDF_DETAILS)) 2924 { 2925 fprintf (dump_file, " into "); 2926 print_gimple_stmt (dump_file, stmt, 0, 0); 2927 } 2928 } 2929 else 2930 { 2931 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 2932 int ops_num = VEC_length (operand_entry_t, ops); 2933 int width = get_reassociation_width (ops_num, rhs_code, mode); 2934 2935 if (dump_file && (dump_flags & TDF_DETAILS)) 2936 fprintf (dump_file, 2937 "Width = %d was chosen for reassociation\n", width); 2938 2939 if (width > 1 2940 && VEC_length (operand_entry_t, ops) > 3) 2941 rewrite_expr_tree_parallel (stmt, width, ops); 2942 else 2943 rewrite_expr_tree (stmt, 0, ops, false); 2944 } 2945 2946 VEC_free (operand_entry_t, heap, ops); 2947 } 2948 } 2949 } 2950 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 2951 son; 2952 son = next_dom_son (CDI_POST_DOMINATORS, son)) 2953 reassociate_bb (son); 2954 } 2955 2956 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops); 2957 void debug_ops_vector (VEC (operand_entry_t, heap) *ops); 2958 2959 /* Dump the operand entry vector OPS to FILE. */ 2960 2961 void 2962 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops) 2963 { 2964 operand_entry_t oe; 2965 unsigned int i; 2966 2967 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe) 2968 { 2969 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 2970 print_generic_expr (file, oe->op, 0); 2971 } 2972 } 2973 2974 /* Dump the operand entry vector OPS to STDERR. */ 2975 2976 DEBUG_FUNCTION void 2977 debug_ops_vector (VEC (operand_entry_t, heap) *ops) 2978 { 2979 dump_ops_vector (stderr, ops); 2980 } 2981 2982 static void 2983 do_reassoc (void) 2984 { 2985 break_up_subtract_bb (ENTRY_BLOCK_PTR); 2986 reassociate_bb (EXIT_BLOCK_PTR); 2987 } 2988 2989 /* Initialize the reassociation pass. */ 2990 2991 static void 2992 init_reassoc (void) 2993 { 2994 int i; 2995 long rank = 2; 2996 tree param; 2997 int *bbs = XNEWVEC (int, last_basic_block + 1); 2998 2999 /* Find the loops, so that we can prevent moving calculations in 3000 them. */ 3001 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 3002 3003 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 3004 3005 operand_entry_pool = create_alloc_pool ("operand entry pool", 3006 sizeof (struct operand_entry), 30); 3007 next_operand_entry_id = 0; 3008 3009 /* Reverse RPO (Reverse Post Order) will give us something where 3010 deeper loops come later. */ 3011 pre_and_rev_post_order_compute (NULL, bbs, false); 3012 bb_rank = XCNEWVEC (long, last_basic_block + 1); 3013 operand_rank = pointer_map_create (); 3014 3015 /* Give each argument a distinct rank. */ 3016 for (param = DECL_ARGUMENTS (current_function_decl); 3017 param; 3018 param = DECL_CHAIN (param)) 3019 { 3020 if (gimple_default_def (cfun, param) != NULL) 3021 { 3022 tree def = gimple_default_def (cfun, param); 3023 insert_operand_rank (def, ++rank); 3024 } 3025 } 3026 3027 /* Give the chain decl a distinct rank. */ 3028 if (cfun->static_chain_decl != NULL) 3029 { 3030 tree def = gimple_default_def (cfun, cfun->static_chain_decl); 3031 if (def != NULL) 3032 insert_operand_rank (def, ++rank); 3033 } 3034 3035 /* Set up rank for each BB */ 3036 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) 3037 bb_rank[bbs[i]] = ++rank << 16; 3038 3039 free (bbs); 3040 calculate_dominance_info (CDI_POST_DOMINATORS); 3041 plus_negates = NULL; 3042 } 3043 3044 /* Cleanup after the reassociation pass, and print stats if 3045 requested. */ 3046 3047 static void 3048 fini_reassoc (void) 3049 { 3050 statistics_counter_event (cfun, "Linearized", 3051 reassociate_stats.linearized); 3052 statistics_counter_event (cfun, "Constants eliminated", 3053 reassociate_stats.constants_eliminated); 3054 statistics_counter_event (cfun, "Ops eliminated", 3055 reassociate_stats.ops_eliminated); 3056 statistics_counter_event (cfun, "Statements rewritten", 3057 reassociate_stats.rewritten); 3058 3059 pointer_map_destroy (operand_rank); 3060 free_alloc_pool (operand_entry_pool); 3061 free (bb_rank); 3062 VEC_free (tree, heap, plus_negates); 3063 free_dominance_info (CDI_POST_DOMINATORS); 3064 loop_optimizer_finalize (); 3065 } 3066 3067 /* Gate and execute functions for Reassociation. */ 3068 3069 static unsigned int 3070 execute_reassoc (void) 3071 { 3072 init_reassoc (); 3073 3074 do_reassoc (); 3075 repropagate_negates (); 3076 3077 fini_reassoc (); 3078 return 0; 3079 } 3080 3081 static bool 3082 gate_tree_ssa_reassoc (void) 3083 { 3084 return flag_tree_reassoc != 0; 3085 } 3086 3087 struct gimple_opt_pass pass_reassoc = 3088 { 3089 { 3090 GIMPLE_PASS, 3091 "reassoc", /* name */ 3092 gate_tree_ssa_reassoc, /* gate */ 3093 execute_reassoc, /* execute */ 3094 NULL, /* sub */ 3095 NULL, /* next */ 3096 0, /* static_pass_number */ 3097 TV_TREE_REASSOC, /* tv_id */ 3098 PROP_cfg | PROP_ssa, /* properties_required */ 3099 0, /* properties_provided */ 3100 0, /* properties_destroyed */ 3101 0, /* todo_flags_start */ 3102 TODO_verify_ssa 3103 | TODO_verify_flow 3104 | TODO_ggc_collect /* todo_flags_finish */ 3105 } 3106 }; 3107