1 /* Partial redundancy elimination / Hoisting for RTL. 2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* TODO 22 - reordering of memory allocation and freeing to be more space efficient 23 - do rough calc of how many regs are needed in each block, and a rough 24 calc of how many regs are available in each class and use that to 25 throttle back the code in cases where RTX_COST is minimal. 26 */ 27 28 /* References searched while implementing this. 29 30 Compilers Principles, Techniques and Tools 31 Aho, Sethi, Ullman 32 Addison-Wesley, 1988 33 34 Global Optimization by Suppression of Partial Redundancies 35 E. Morel, C. Renvoise 36 communications of the acm, Vol. 22, Num. 2, Feb. 1979 37 38 A Portable Machine-Independent Global Optimizer - Design and Measurements 39 Frederick Chow 40 Stanford Ph.D. thesis, Dec. 1983 41 42 A Fast Algorithm for Code Movement Optimization 43 D.M. Dhamdhere 44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988 45 46 A Solution to a Problem with Morel and Renvoise's 47 Global Optimization by Suppression of Partial Redundancies 48 K-H Drechsler, M.P. Stadel 49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988 50 51 Practical Adaptation of the Global Optimization 52 Algorithm of Morel and Renvoise 53 D.M. Dhamdhere 54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991 55 56 Efficiently Computing Static Single Assignment Form and the Control 57 Dependence Graph 58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck 59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991 60 61 Lazy Code Motion 62 J. Knoop, O. Ruthing, B. Steffen 63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 64 65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear 66 Time for Reducible Flow Control 67 Thomas Ball 68 ACM Letters on Programming Languages and Systems, 69 Vol. 2, Num. 1-4, Mar-Dec 1993 70 71 An Efficient Representation for Sparse Sets 72 Preston Briggs, Linda Torczon 73 ACM Letters on Programming Languages and Systems, 74 Vol. 2, Num. 1-4, Mar-Dec 1993 75 76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion 77 K-H Drechsler, M.P. Stadel 78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993 79 80 Partial Dead Code Elimination 81 J. Knoop, O. Ruthing, B. Steffen 82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 83 84 Effective Partial Redundancy Elimination 85 P. Briggs, K.D. Cooper 86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 87 88 The Program Structure Tree: Computing Control Regions in Linear Time 89 R. Johnson, D. Pearson, K. Pingali 90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 91 92 Optimal Code Motion: Theory and Practice 93 J. Knoop, O. Ruthing, B. Steffen 94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994 95 96 The power of assignment motion 97 J. Knoop, O. Ruthing, B. Steffen 98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 99 100 Global code motion / global value numbering 101 C. Click 102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 103 104 Value Driven Redundancy Elimination 105 L.T. Simpson 106 Rice University Ph.D. thesis, Apr. 1996 107 108 Value Numbering 109 L.T. Simpson 110 Massively Scalar Compiler Project, Rice University, Sep. 1996 111 112 High Performance Compilers for Parallel Computing 113 Michael Wolfe 114 Addison-Wesley, 1996 115 116 Advanced Compiler Design and Implementation 117 Steven Muchnick 118 Morgan Kaufmann, 1997 119 120 Building an Optimizing Compiler 121 Robert Morgan 122 Digital Press, 1998 123 124 People wishing to speed up the code here should read: 125 Elimination Algorithms for Data Flow Analysis 126 B.G. Ryder, M.C. Paull 127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986 128 129 How to Analyze Large Programs Efficiently and Informatively 130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck 131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 132 133 People wishing to do something different can find various possibilities 134 in the above papers and elsewhere. 135 */ 136 137 #include "config.h" 138 #include "system.h" 139 #include "coretypes.h" 140 #include "tm.h" 141 #include "diagnostic-core.h" 142 #include "toplev.h" 143 144 #include "rtl.h" 145 #include "tree.h" 146 #include "tm_p.h" 147 #include "regs.h" 148 #include "hard-reg-set.h" 149 #include "flags.h" 150 #include "insn-config.h" 151 #include "recog.h" 152 #include "basic-block.h" 153 #include "output.h" 154 #include "function.h" 155 #include "expr.h" 156 #include "except.h" 157 #include "ggc.h" 158 #include "params.h" 159 #include "cselib.h" 160 #include "intl.h" 161 #include "obstack.h" 162 #include "timevar.h" 163 #include "tree-pass.h" 164 #include "hashtab.h" 165 #include "df.h" 166 #include "dbgcnt.h" 167 #include "target.h" 168 #include "gcse.h" 169 170 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations 171 are a superset of those done by classic GCSE. 172 173 Two passes of copy/constant propagation are done around PRE or hoisting 174 because the first one enables more GCSE and the second one helps to clean 175 up the copies that PRE and HOIST create. This is needed more for PRE than 176 for HOIST because code hoisting will try to use an existing register 177 containing the common subexpression rather than create a new one. This is 178 harder to do for PRE because of the code motion (which HOIST doesn't do). 179 180 Expressions we are interested in GCSE-ing are of the form 181 (set (pseudo-reg) (expression)). 182 Function want_to_gcse_p says what these are. 183 184 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing. 185 This allows PRE to hoist expressions that are expressed in multiple insns, 186 such as complex address calculations (e.g. for PIC code, or loads with a 187 high part and a low part). 188 189 PRE handles moving invariant expressions out of loops (by treating them as 190 partially redundant). 191 192 ********************** 193 194 We used to support multiple passes but there are diminishing returns in 195 doing so. The first pass usually makes 90% of the changes that are doable. 196 A second pass can make a few more changes made possible by the first pass. 197 Experiments show any further passes don't make enough changes to justify 198 the expense. 199 200 A study of spec92 using an unlimited number of passes: 201 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83, 202 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2, 203 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1 204 205 It was found doing copy propagation between each pass enables further 206 substitutions. 207 208 This study was done before expressions in REG_EQUAL notes were added as 209 candidate expressions for optimization, and before the GIMPLE optimizers 210 were added. Probably, multiple passes is even less efficient now than 211 at the time when the study was conducted. 212 213 PRE is quite expensive in complicated functions because the DFA can take 214 a while to converge. Hence we only perform one pass. 215 216 ********************** 217 218 The steps for PRE are: 219 220 1) Build the hash table of expressions we wish to GCSE (expr_hash_table). 221 222 2) Perform the data flow analysis for PRE. 223 224 3) Delete the redundant instructions 225 226 4) Insert the required copies [if any] that make the partially 227 redundant instructions fully redundant. 228 229 5) For other reaching expressions, insert an instruction to copy the value 230 to a newly created pseudo that will reach the redundant instruction. 231 232 The deletion is done first so that when we do insertions we 233 know which pseudo reg to use. 234 235 Various papers have argued that PRE DFA is expensive (O(n^2)) and others 236 argue it is not. The number of iterations for the algorithm to converge 237 is typically 2-4 so I don't view it as that expensive (relatively speaking). 238 239 PRE GCSE depends heavily on the second CPROP pass to clean up the copies 240 we create. To make an expression reach the place where it's redundant, 241 the result of the expression is copied to a new register, and the redundant 242 expression is deleted by replacing it with this new register. Classic GCSE 243 doesn't have this problem as much as it computes the reaching defs of 244 each register in each block and thus can try to use an existing 245 register. */ 246 247 /* GCSE global vars. */ 248 249 struct target_gcse default_target_gcse; 250 #if SWITCHABLE_TARGET 251 struct target_gcse *this_target_gcse = &default_target_gcse; 252 #endif 253 254 /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ 255 int flag_rerun_cse_after_global_opts; 256 257 /* An obstack for our working variables. */ 258 static struct obstack gcse_obstack; 259 260 struct reg_use {rtx reg_rtx; }; 261 262 /* Hash table of expressions. */ 263 264 struct expr 265 { 266 /* The expression. */ 267 rtx expr; 268 /* Index in the available expression bitmaps. */ 269 int bitmap_index; 270 /* Next entry with the same hash. */ 271 struct expr *next_same_hash; 272 /* List of anticipatable occurrences in basic blocks in the function. 273 An "anticipatable occurrence" is one that is the first occurrence in the 274 basic block, the operands are not modified in the basic block prior 275 to the occurrence and the output is not used between the start of 276 the block and the occurrence. */ 277 struct occr *antic_occr; 278 /* List of available occurrence in basic blocks in the function. 279 An "available occurrence" is one that is the last occurrence in the 280 basic block and the operands are not modified by following statements in 281 the basic block [including this insn]. */ 282 struct occr *avail_occr; 283 /* Non-null if the computation is PRE redundant. 284 The value is the newly created pseudo-reg to record a copy of the 285 expression in all the places that reach the redundant copy. */ 286 rtx reaching_reg; 287 /* Maximum distance in instructions this expression can travel. 288 We avoid moving simple expressions for more than a few instructions 289 to keep register pressure under control. 290 A value of "0" removes restrictions on how far the expression can 291 travel. */ 292 int max_distance; 293 }; 294 295 /* Occurrence of an expression. 296 There is one per basic block. If a pattern appears more than once the 297 last appearance is used [or first for anticipatable expressions]. */ 298 299 struct occr 300 { 301 /* Next occurrence of this expression. */ 302 struct occr *next; 303 /* The insn that computes the expression. */ 304 rtx insn; 305 /* Nonzero if this [anticipatable] occurrence has been deleted. */ 306 char deleted_p; 307 /* Nonzero if this [available] occurrence has been copied to 308 reaching_reg. */ 309 /* ??? This is mutually exclusive with deleted_p, so they could share 310 the same byte. */ 311 char copied_p; 312 }; 313 314 typedef struct occr *occr_t; 315 DEF_VEC_P (occr_t); 316 DEF_VEC_ALLOC_P (occr_t, heap); 317 318 /* Expression hash tables. 319 Each hash table is an array of buckets. 320 ??? It is known that if it were an array of entries, structure elements 321 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is 322 not clear whether in the final analysis a sufficient amount of memory would 323 be saved as the size of the available expression bitmaps would be larger 324 [one could build a mapping table without holes afterwards though]. 325 Someday I'll perform the computation and figure it out. */ 326 327 struct hash_table_d 328 { 329 /* The table itself. 330 This is an array of `expr_hash_table_size' elements. */ 331 struct expr **table; 332 333 /* Size of the hash table, in elements. */ 334 unsigned int size; 335 336 /* Number of hash table elements. */ 337 unsigned int n_elems; 338 }; 339 340 /* Expression hash table. */ 341 static struct hash_table_d expr_hash_table; 342 343 /* This is a list of expressions which are MEMs and will be used by load 344 or store motion. 345 Load motion tracks MEMs which aren't killed by anything except itself, 346 i.e. loads and stores to a single location. 347 We can then allow movement of these MEM refs with a little special 348 allowance. (all stores copy the same value to the reaching reg used 349 for the loads). This means all values used to store into memory must have 350 no side effects so we can re-issue the setter value. */ 351 352 struct ls_expr 353 { 354 struct expr * expr; /* Gcse expression reference for LM. */ 355 rtx pattern; /* Pattern of this mem. */ 356 rtx pattern_regs; /* List of registers mentioned by the mem. */ 357 rtx loads; /* INSN list of loads seen. */ 358 rtx stores; /* INSN list of stores seen. */ 359 struct ls_expr * next; /* Next in the list. */ 360 int invalid; /* Invalid for some reason. */ 361 int index; /* If it maps to a bitmap index. */ 362 unsigned int hash_index; /* Index when in a hash table. */ 363 rtx reaching_reg; /* Register to use when re-writing. */ 364 }; 365 366 /* Head of the list of load/store memory refs. */ 367 static struct ls_expr * pre_ldst_mems = NULL; 368 369 /* Hashtable for the load/store memory refs. */ 370 static htab_t pre_ldst_table = NULL; 371 372 /* Bitmap containing one bit for each register in the program. 373 Used when performing GCSE to track which registers have been set since 374 the start of the basic block. */ 375 static regset reg_set_bitmap; 376 377 /* Array, indexed by basic block number for a list of insns which modify 378 memory within that block. */ 379 static VEC (rtx,heap) **modify_mem_list; 380 static bitmap modify_mem_list_set; 381 382 typedef struct modify_pair_s 383 { 384 rtx dest; /* A MEM. */ 385 rtx dest_addr; /* The canonical address of `dest'. */ 386 } modify_pair; 387 388 DEF_VEC_O(modify_pair); 389 DEF_VEC_ALLOC_O(modify_pair,heap); 390 391 /* This array parallels modify_mem_list, except that it stores MEMs 392 being set and their canonicalized memory addresses. */ 393 static VEC (modify_pair,heap) **canon_modify_mem_list; 394 395 /* Bitmap indexed by block numbers to record which blocks contain 396 function calls. */ 397 static bitmap blocks_with_calls; 398 399 /* Various variables for statistics gathering. */ 400 401 /* Memory used in a pass. 402 This isn't intended to be absolutely precise. Its intent is only 403 to keep an eye on memory usage. */ 404 static int bytes_used; 405 406 /* GCSE substitutions made. */ 407 static int gcse_subst_count; 408 /* Number of copy instructions created. */ 409 static int gcse_create_count; 410 411 /* Doing code hoisting. */ 412 static bool doing_code_hoisting_p = false; 413 414 /* For available exprs */ 415 static sbitmap *ae_kill; 416 417 static void compute_can_copy (void); 418 static void *gmalloc (size_t) ATTRIBUTE_MALLOC; 419 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC; 420 static void *gcse_alloc (unsigned long); 421 static void alloc_gcse_mem (void); 422 static void free_gcse_mem (void); 423 static void hash_scan_insn (rtx, struct hash_table_d *); 424 static void hash_scan_set (rtx, rtx, struct hash_table_d *); 425 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); 426 static void hash_scan_call (rtx, rtx, struct hash_table_d *); 427 static int want_to_gcse_p (rtx, int *); 428 static int oprs_unchanged_p (const_rtx, const_rtx, int); 429 static int oprs_anticipatable_p (const_rtx, const_rtx); 430 static int oprs_available_p (const_rtx, const_rtx); 431 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, 432 struct hash_table_d *); 433 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); 434 static int expr_equiv_p (const_rtx, const_rtx); 435 static void record_last_reg_set_info (rtx, int); 436 static void record_last_mem_set_info (rtx); 437 static void record_last_set_info (rtx, const_rtx, void *); 438 static void compute_hash_table (struct hash_table_d *); 439 static void alloc_hash_table (struct hash_table_d *); 440 static void free_hash_table (struct hash_table_d *); 441 static void compute_hash_table_work (struct hash_table_d *); 442 static void dump_hash_table (FILE *, const char *, struct hash_table_d *); 443 static void compute_transp (const_rtx, int, sbitmap *); 444 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, 445 struct hash_table_d *); 446 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *); 447 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int); 448 static void canon_list_insert (rtx, const_rtx, void *); 449 static void alloc_pre_mem (int, int); 450 static void free_pre_mem (void); 451 static struct edge_list *compute_pre_data (void); 452 static int pre_expr_reaches_here_p (basic_block, struct expr *, 453 basic_block); 454 static void insert_insn_end_basic_block (struct expr *, basic_block); 455 static void pre_insert_copy_insn (struct expr *, rtx); 456 static void pre_insert_copies (void); 457 static int pre_delete (void); 458 static int pre_gcse (struct edge_list *); 459 static int one_pre_gcse_pass (void); 460 static void add_label_notes (rtx, rtx); 461 static void alloc_code_hoist_mem (int, int); 462 static void free_code_hoist_mem (void); 463 static void compute_code_hoist_vbeinout (void); 464 static void compute_code_hoist_data (void); 465 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, 466 int, int *); 467 static int hoist_code (void); 468 static int one_code_hoisting_pass (void); 469 static rtx process_insert_insn (struct expr *); 470 static int pre_edge_insert (struct edge_list *, struct expr **); 471 static int pre_expr_reaches_here_p_work (basic_block, struct expr *, 472 basic_block, char *); 473 static struct ls_expr * ldst_entry (rtx); 474 static void free_ldst_entry (struct ls_expr *); 475 static void free_ld_motion_mems (void); 476 static void print_ldst_list (FILE *); 477 static struct ls_expr * find_rtx_in_ldst (rtx); 478 static int simple_mem (const_rtx); 479 static void invalidate_any_buried_refs (rtx); 480 static void compute_ld_motion_mems (void); 481 static void trim_ld_motion_mems (void); 482 static void update_ld_motion_stores (struct expr *); 483 static void clear_modify_mem_tables (void); 484 static void free_modify_mem_tables (void); 485 static rtx gcse_emit_move_after (rtx, rtx, rtx); 486 static bool is_too_expensive (const char *); 487 488 #define GNEW(T) ((T *) gmalloc (sizeof (T))) 489 #define GCNEW(T) ((T *) gcalloc (1, sizeof (T))) 490 491 #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N))) 492 #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T))) 493 494 #define GNEWVAR(T, S) ((T *) gmalloc ((S))) 495 #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S))) 496 497 #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) 498 #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) 499 500 /* Misc. utilities. */ 501 502 #define can_copy \ 503 (this_target_gcse->x_can_copy) 504 #define can_copy_init_p \ 505 (this_target_gcse->x_can_copy_init_p) 506 507 /* Compute which modes support reg/reg copy operations. */ 508 509 static void 510 compute_can_copy (void) 511 { 512 int i; 513 #ifndef AVOID_CCMODE_COPIES 514 rtx reg, insn; 515 #endif 516 memset (can_copy, 0, NUM_MACHINE_MODES); 517 518 start_sequence (); 519 for (i = 0; i < NUM_MACHINE_MODES; i++) 520 if (GET_MODE_CLASS (i) == MODE_CC) 521 { 522 #ifdef AVOID_CCMODE_COPIES 523 can_copy[i] = 0; 524 #else 525 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1); 526 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg)); 527 if (recog (PATTERN (insn), insn, NULL) >= 0) 528 can_copy[i] = 1; 529 #endif 530 } 531 else 532 can_copy[i] = 1; 533 534 end_sequence (); 535 } 536 537 /* Returns whether the mode supports reg/reg copy operations. */ 538 539 bool 540 can_copy_p (enum machine_mode mode) 541 { 542 if (! can_copy_init_p) 543 { 544 compute_can_copy (); 545 can_copy_init_p = true; 546 } 547 548 return can_copy[mode] != 0; 549 } 550 551 /* Cover function to xmalloc to record bytes allocated. */ 552 553 static void * 554 gmalloc (size_t size) 555 { 556 bytes_used += size; 557 return xmalloc (size); 558 } 559 560 /* Cover function to xcalloc to record bytes allocated. */ 561 562 static void * 563 gcalloc (size_t nelem, size_t elsize) 564 { 565 bytes_used += nelem * elsize; 566 return xcalloc (nelem, elsize); 567 } 568 569 /* Cover function to obstack_alloc. */ 570 571 static void * 572 gcse_alloc (unsigned long size) 573 { 574 bytes_used += size; 575 return obstack_alloc (&gcse_obstack, size); 576 } 577 578 /* Allocate memory for the reg/memory set tracking tables. 579 This is called at the start of each pass. */ 580 581 static void 582 alloc_gcse_mem (void) 583 { 584 /* Allocate vars to track sets of regs. */ 585 reg_set_bitmap = ALLOC_REG_SET (NULL); 586 587 /* Allocate array to keep a list of insns which modify memory in each 588 basic block. */ 589 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block); 590 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *, 591 last_basic_block); 592 modify_mem_list_set = BITMAP_ALLOC (NULL); 593 blocks_with_calls = BITMAP_ALLOC (NULL); 594 } 595 596 /* Free memory allocated by alloc_gcse_mem. */ 597 598 static void 599 free_gcse_mem (void) 600 { 601 FREE_REG_SET (reg_set_bitmap); 602 603 free_modify_mem_tables (); 604 BITMAP_FREE (modify_mem_list_set); 605 BITMAP_FREE (blocks_with_calls); 606 } 607 608 /* Compute the local properties of each recorded expression. 609 610 Local properties are those that are defined by the block, irrespective of 611 other blocks. 612 613 An expression is transparent in a block if its operands are not modified 614 in the block. 615 616 An expression is computed (locally available) in a block if it is computed 617 at least once and expression would contain the same value if the 618 computation was moved to the end of the block. 619 620 An expression is locally anticipatable in a block if it is computed at 621 least once and expression would contain the same value if the computation 622 was moved to the beginning of the block. 623 624 We call this routine for pre and code hoisting. They all compute 625 basically the same information and thus can easily share this code. 626 627 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local 628 properties. If NULL, then it is not necessary to compute or record that 629 particular property. 630 631 TABLE controls which hash table to look at. */ 632 633 static void 634 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc, 635 struct hash_table_d *table) 636 { 637 unsigned int i; 638 639 /* Initialize any bitmaps that were passed in. */ 640 if (transp) 641 { 642 sbitmap_vector_ones (transp, last_basic_block); 643 } 644 645 if (comp) 646 sbitmap_vector_zero (comp, last_basic_block); 647 if (antloc) 648 sbitmap_vector_zero (antloc, last_basic_block); 649 650 for (i = 0; i < table->size; i++) 651 { 652 struct expr *expr; 653 654 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) 655 { 656 int indx = expr->bitmap_index; 657 struct occr *occr; 658 659 /* The expression is transparent in this block if it is not killed. 660 We start by assuming all are transparent [none are killed], and 661 then reset the bits for those that are. */ 662 if (transp) 663 compute_transp (expr->expr, indx, transp); 664 665 /* The occurrences recorded in antic_occr are exactly those that 666 we want to set to nonzero in ANTLOC. */ 667 if (antloc) 668 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 669 { 670 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx); 671 672 /* While we're scanning the table, this is a good place to 673 initialize this. */ 674 occr->deleted_p = 0; 675 } 676 677 /* The occurrences recorded in avail_occr are exactly those that 678 we want to set to nonzero in COMP. */ 679 if (comp) 680 for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 681 { 682 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx); 683 684 /* While we're scanning the table, this is a good place to 685 initialize this. */ 686 occr->copied_p = 0; 687 } 688 689 /* While we're scanning the table, this is a good place to 690 initialize this. */ 691 expr->reaching_reg = 0; 692 } 693 } 694 } 695 696 /* Hash table support. */ 697 698 struct reg_avail_info 699 { 700 basic_block last_bb; 701 int first_set; 702 int last_set; 703 }; 704 705 static struct reg_avail_info *reg_avail_info; 706 static basic_block current_bb; 707 708 /* See whether X, the source of a set, is something we want to consider for 709 GCSE. */ 710 711 static int 712 want_to_gcse_p (rtx x, int *max_distance_ptr) 713 { 714 #ifdef STACK_REGS 715 /* On register stack architectures, don't GCSE constants from the 716 constant pool, as the benefits are often swamped by the overhead 717 of shuffling the register stack between basic blocks. */ 718 if (IS_STACK_MODE (GET_MODE (x))) 719 x = avoid_constant_pool_reference (x); 720 #endif 721 722 /* GCSE'ing constants: 723 724 We do not specifically distinguish between constant and non-constant 725 expressions in PRE and Hoist. We use set_src_cost below to limit 726 the maximum distance simple expressions can travel. 727 728 Nevertheless, constants are much easier to GCSE, and, hence, 729 it is easy to overdo the optimizations. Usually, excessive PRE and 730 Hoisting of constant leads to increased register pressure. 731 732 RA can deal with this by rematerialing some of the constants. 733 Therefore, it is important that the back-end generates sets of constants 734 in a way that allows reload rematerialize them under high register 735 pressure, i.e., a pseudo register with REG_EQUAL to constant 736 is set only once. Failing to do so will result in IRA/reload 737 spilling such constants under high register pressure instead of 738 rematerializing them. */ 739 740 switch (GET_CODE (x)) 741 { 742 case REG: 743 case SUBREG: 744 case CALL: 745 return 0; 746 747 case CONST_INT: 748 case CONST_DOUBLE: 749 case CONST_FIXED: 750 case CONST_VECTOR: 751 if (!doing_code_hoisting_p) 752 /* Do not PRE constants. */ 753 return 0; 754 755 /* FALLTHRU */ 756 757 default: 758 if (doing_code_hoisting_p) 759 /* PRE doesn't implement max_distance restriction. */ 760 { 761 int cost; 762 int max_distance; 763 764 gcc_assert (!optimize_function_for_speed_p (cfun) 765 && optimize_function_for_size_p (cfun)); 766 cost = set_src_cost (x, 0); 767 768 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) 769 { 770 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; 771 if (max_distance == 0) 772 return 0; 773 774 gcc_assert (max_distance > 0); 775 } 776 else 777 max_distance = 0; 778 779 if (max_distance_ptr) 780 *max_distance_ptr = max_distance; 781 } 782 783 return can_assign_to_reg_without_clobbers_p (x); 784 } 785 } 786 787 /* Used internally by can_assign_to_reg_without_clobbers_p. */ 788 789 static GTY(()) rtx test_insn; 790 791 /* Return true if we can assign X to a pseudo register such that the 792 resulting insn does not result in clobbering a hard register as a 793 side-effect. 794 795 Additionally, if the target requires it, check that the resulting insn 796 can be copied. If it cannot, this means that X is special and probably 797 has hidden side-effects we don't want to mess with. 798 799 This function is typically used by code motion passes, to verify 800 that it is safe to insert an insn without worrying about clobbering 801 maybe live hard regs. */ 802 803 bool 804 can_assign_to_reg_without_clobbers_p (rtx x) 805 { 806 int num_clobbers = 0; 807 int icode; 808 809 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */ 810 if (general_operand (x, GET_MODE (x))) 811 return 1; 812 else if (GET_MODE (x) == VOIDmode) 813 return 0; 814 815 /* Otherwise, check if we can make a valid insn from it. First initialize 816 our test insn if we haven't already. */ 817 if (test_insn == 0) 818 { 819 test_insn 820 = make_insn_raw (gen_rtx_SET (VOIDmode, 821 gen_rtx_REG (word_mode, 822 FIRST_PSEUDO_REGISTER * 2), 823 const0_rtx)); 824 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0; 825 } 826 827 /* Now make an insn like the one we would make when GCSE'ing and see if 828 valid. */ 829 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x)); 830 SET_SRC (PATTERN (test_insn)) = x; 831 832 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers); 833 if (icode < 0) 834 return false; 835 836 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode)) 837 return false; 838 839 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn)) 840 return false; 841 842 return true; 843 } 844 845 /* Return nonzero if the operands of expression X are unchanged from the 846 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0), 847 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */ 848 849 static int 850 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p) 851 { 852 int i, j; 853 enum rtx_code code; 854 const char *fmt; 855 856 if (x == 0) 857 return 1; 858 859 code = GET_CODE (x); 860 switch (code) 861 { 862 case REG: 863 { 864 struct reg_avail_info *info = ®_avail_info[REGNO (x)]; 865 866 if (info->last_bb != current_bb) 867 return 1; 868 if (avail_p) 869 return info->last_set < DF_INSN_LUID (insn); 870 else 871 return info->first_set >= DF_INSN_LUID (insn); 872 } 873 874 case MEM: 875 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn), 876 x, avail_p)) 877 return 0; 878 else 879 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p); 880 881 case PRE_DEC: 882 case PRE_INC: 883 case POST_DEC: 884 case POST_INC: 885 case PRE_MODIFY: 886 case POST_MODIFY: 887 return 0; 888 889 case PC: 890 case CC0: /*FIXME*/ 891 case CONST: 892 case CONST_INT: 893 case CONST_DOUBLE: 894 case CONST_FIXED: 895 case CONST_VECTOR: 896 case SYMBOL_REF: 897 case LABEL_REF: 898 case ADDR_VEC: 899 case ADDR_DIFF_VEC: 900 return 1; 901 902 default: 903 break; 904 } 905 906 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 907 { 908 if (fmt[i] == 'e') 909 { 910 /* If we are about to do the last recursive call needed at this 911 level, change it into iteration. This function is called enough 912 to be worth it. */ 913 if (i == 0) 914 return oprs_unchanged_p (XEXP (x, i), insn, avail_p); 915 916 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p)) 917 return 0; 918 } 919 else if (fmt[i] == 'E') 920 for (j = 0; j < XVECLEN (x, i); j++) 921 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p)) 922 return 0; 923 } 924 925 return 1; 926 } 927 928 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */ 929 930 struct mem_conflict_info 931 { 932 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will 933 see if a memory store conflicts with this memory load. */ 934 const_rtx mem; 935 936 /* True if mems_conflict_for_gcse_p finds a conflict between two memory 937 references. */ 938 bool conflict; 939 }; 940 941 /* DEST is the output of an instruction. If it is a memory reference and 942 possibly conflicts with the load found in DATA, then communicate this 943 information back through DATA. */ 944 945 static void 946 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, 947 void *data) 948 { 949 struct mem_conflict_info *mci = (struct mem_conflict_info *) data; 950 951 while (GET_CODE (dest) == SUBREG 952 || GET_CODE (dest) == ZERO_EXTRACT 953 || GET_CODE (dest) == STRICT_LOW_PART) 954 dest = XEXP (dest, 0); 955 956 /* If DEST is not a MEM, then it will not conflict with the load. Note 957 that function calls are assumed to clobber memory, but are handled 958 elsewhere. */ 959 if (! MEM_P (dest)) 960 return; 961 962 /* If we are setting a MEM in our list of specially recognized MEMs, 963 don't mark as killed this time. */ 964 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem)) 965 { 966 if (!find_rtx_in_ldst (dest)) 967 mci->conflict = true; 968 return; 969 } 970 971 if (true_dependence (dest, GET_MODE (dest), mci->mem)) 972 mci->conflict = true; 973 } 974 975 /* Return nonzero if the expression in X (a memory reference) is killed 976 in block BB before or after the insn with the LUID in UID_LIMIT. 977 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills 978 before UID_LIMIT. 979 980 To check the entire block, set UID_LIMIT to max_uid + 1 and 981 AVAIL_P to 0. */ 982 983 static int 984 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, 985 int avail_p) 986 { 987 VEC (rtx,heap) *list = modify_mem_list[bb->index]; 988 rtx setter; 989 unsigned ix; 990 991 /* If this is a readonly then we aren't going to be changing it. */ 992 if (MEM_READONLY_P (x)) 993 return 0; 994 995 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter) 996 { 997 struct mem_conflict_info mci; 998 999 /* Ignore entries in the list that do not apply. */ 1000 if ((avail_p 1001 && DF_INSN_LUID (setter) < uid_limit) 1002 || (! avail_p 1003 && DF_INSN_LUID (setter) > uid_limit)) 1004 continue; 1005 1006 /* If SETTER is a call everything is clobbered. Note that calls 1007 to pure functions are never put on the list, so we need not 1008 worry about them. */ 1009 if (CALL_P (setter)) 1010 return 1; 1011 1012 /* SETTER must be an INSN of some kind that sets memory. Call 1013 note_stores to examine each hunk of memory that is modified. */ 1014 mci.mem = x; 1015 mci.conflict = false; 1016 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci); 1017 if (mci.conflict) 1018 return 1; 1019 } 1020 return 0; 1021 } 1022 1023 /* Return nonzero if the operands of expression X are unchanged from 1024 the start of INSN's basic block up to but not including INSN. */ 1025 1026 static int 1027 oprs_anticipatable_p (const_rtx x, const_rtx insn) 1028 { 1029 return oprs_unchanged_p (x, insn, 0); 1030 } 1031 1032 /* Return nonzero if the operands of expression X are unchanged from 1033 INSN to the end of INSN's basic block. */ 1034 1035 static int 1036 oprs_available_p (const_rtx x, const_rtx insn) 1037 { 1038 return oprs_unchanged_p (x, insn, 1); 1039 } 1040 1041 /* Hash expression X. 1042 1043 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean 1044 indicating if a volatile operand is found or if the expression contains 1045 something we don't want to insert in the table. HASH_TABLE_SIZE is 1046 the current size of the hash table to be probed. */ 1047 1048 static unsigned int 1049 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p, 1050 int hash_table_size) 1051 { 1052 unsigned int hash; 1053 1054 *do_not_record_p = 0; 1055 1056 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false); 1057 return hash % hash_table_size; 1058 } 1059 1060 /* Return nonzero if exp1 is equivalent to exp2. */ 1061 1062 static int 1063 expr_equiv_p (const_rtx x, const_rtx y) 1064 { 1065 return exp_equiv_p (x, y, 0, true); 1066 } 1067 1068 /* Insert expression X in INSN in the hash TABLE. 1069 If it is already present, record it as the last occurrence in INSN's 1070 basic block. 1071 1072 MODE is the mode of the value X is being stored into. 1073 It is only used if X is a CONST_INT. 1074 1075 ANTIC_P is nonzero if X is an anticipatable expression. 1076 AVAIL_P is nonzero if X is an available expression. 1077 1078 MAX_DISTANCE is the maximum distance in instructions this expression can 1079 be moved. */ 1080 1081 static void 1082 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, 1083 int avail_p, int max_distance, struct hash_table_d *table) 1084 { 1085 int found, do_not_record_p; 1086 unsigned int hash; 1087 struct expr *cur_expr, *last_expr = NULL; 1088 struct occr *antic_occr, *avail_occr; 1089 1090 hash = hash_expr (x, mode, &do_not_record_p, table->size); 1091 1092 /* Do not insert expression in table if it contains volatile operands, 1093 or if hash_expr determines the expression is something we don't want 1094 to or can't handle. */ 1095 if (do_not_record_p) 1096 return; 1097 1098 cur_expr = table->table[hash]; 1099 found = 0; 1100 1101 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x))) 1102 { 1103 /* If the expression isn't found, save a pointer to the end of 1104 the list. */ 1105 last_expr = cur_expr; 1106 cur_expr = cur_expr->next_same_hash; 1107 } 1108 1109 if (! found) 1110 { 1111 cur_expr = GOBNEW (struct expr); 1112 bytes_used += sizeof (struct expr); 1113 if (table->table[hash] == NULL) 1114 /* This is the first pattern that hashed to this index. */ 1115 table->table[hash] = cur_expr; 1116 else 1117 /* Add EXPR to end of this hash chain. */ 1118 last_expr->next_same_hash = cur_expr; 1119 1120 /* Set the fields of the expr element. */ 1121 cur_expr->expr = x; 1122 cur_expr->bitmap_index = table->n_elems++; 1123 cur_expr->next_same_hash = NULL; 1124 cur_expr->antic_occr = NULL; 1125 cur_expr->avail_occr = NULL; 1126 gcc_assert (max_distance >= 0); 1127 cur_expr->max_distance = max_distance; 1128 } 1129 else 1130 gcc_assert (cur_expr->max_distance == max_distance); 1131 1132 /* Now record the occurrence(s). */ 1133 if (antic_p) 1134 { 1135 antic_occr = cur_expr->antic_occr; 1136 1137 if (antic_occr 1138 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn)) 1139 antic_occr = NULL; 1140 1141 if (antic_occr) 1142 /* Found another instance of the expression in the same basic block. 1143 Prefer the currently recorded one. We want the first one in the 1144 block and the block is scanned from start to end. */ 1145 ; /* nothing to do */ 1146 else 1147 { 1148 /* First occurrence of this expression in this basic block. */ 1149 antic_occr = GOBNEW (struct occr); 1150 bytes_used += sizeof (struct occr); 1151 antic_occr->insn = insn; 1152 antic_occr->next = cur_expr->antic_occr; 1153 antic_occr->deleted_p = 0; 1154 cur_expr->antic_occr = antic_occr; 1155 } 1156 } 1157 1158 if (avail_p) 1159 { 1160 avail_occr = cur_expr->avail_occr; 1161 1162 if (avail_occr 1163 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn)) 1164 { 1165 /* Found another instance of the expression in the same basic block. 1166 Prefer this occurrence to the currently recorded one. We want 1167 the last one in the block and the block is scanned from start 1168 to end. */ 1169 avail_occr->insn = insn; 1170 } 1171 else 1172 { 1173 /* First occurrence of this expression in this basic block. */ 1174 avail_occr = GOBNEW (struct occr); 1175 bytes_used += sizeof (struct occr); 1176 avail_occr->insn = insn; 1177 avail_occr->next = cur_expr->avail_occr; 1178 avail_occr->deleted_p = 0; 1179 cur_expr->avail_occr = avail_occr; 1180 } 1181 } 1182 } 1183 1184 /* Scan SET present in INSN and add an entry to the hash TABLE. */ 1185 1186 static void 1187 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table) 1188 { 1189 rtx src = SET_SRC (set); 1190 rtx dest = SET_DEST (set); 1191 rtx note; 1192 1193 if (GET_CODE (src) == CALL) 1194 hash_scan_call (src, insn, table); 1195 1196 else if (REG_P (dest)) 1197 { 1198 unsigned int regno = REGNO (dest); 1199 int max_distance = 0; 1200 1201 /* See if a REG_EQUAL note shows this equivalent to a simpler expression. 1202 1203 This allows us to do a single GCSE pass and still eliminate 1204 redundant constants, addresses or other expressions that are 1205 constructed with multiple instructions. 1206 1207 However, keep the original SRC if INSN is a simple reg-reg move. 1208 In this case, there will almost always be a REG_EQUAL note on the 1209 insn that sets SRC. By recording the REG_EQUAL value here as SRC 1210 for INSN, we miss copy propagation opportunities and we perform the 1211 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we 1212 do more than one PRE GCSE pass. 1213 1214 Note that this does not impede profitable constant propagations. We 1215 "look through" reg-reg sets in lookup_avail_set. */ 1216 note = find_reg_equal_equiv_note (insn); 1217 if (note != 0 1218 && REG_NOTE_KIND (note) == REG_EQUAL 1219 && !REG_P (src) 1220 && want_to_gcse_p (XEXP (note, 0), NULL)) 1221 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src); 1222 1223 /* Only record sets of pseudo-regs in the hash table. */ 1224 if (regno >= FIRST_PSEUDO_REGISTER 1225 /* Don't GCSE something if we can't do a reg/reg copy. */ 1226 && can_copy_p (GET_MODE (dest)) 1227 /* GCSE commonly inserts instruction after the insn. We can't 1228 do that easily for EH edges so disable GCSE on these for now. */ 1229 /* ??? We can now easily create new EH landing pads at the 1230 gimple level, for splitting edges; there's no reason we 1231 can't do the same thing at the rtl level. */ 1232 && !can_throw_internal (insn) 1233 /* Is SET_SRC something we want to gcse? */ 1234 && want_to_gcse_p (src, &max_distance) 1235 /* Don't CSE a nop. */ 1236 && ! set_noop_p (set) 1237 /* Don't GCSE if it has attached REG_EQUIV note. 1238 At this point this only function parameters should have 1239 REG_EQUIV notes and if the argument slot is used somewhere 1240 explicitly, it means address of parameter has been taken, 1241 so we should not extend the lifetime of the pseudo. */ 1242 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0)))) 1243 { 1244 /* An expression is not anticipatable if its operands are 1245 modified before this insn or if this is not the only SET in 1246 this insn. The latter condition does not have to mean that 1247 SRC itself is not anticipatable, but we just will not be 1248 able to handle code motion of insns with multiple sets. */ 1249 int antic_p = oprs_anticipatable_p (src, insn) 1250 && !multiple_sets (insn); 1251 /* An expression is not available if its operands are 1252 subsequently modified, including this insn. It's also not 1253 available if this is a branch, because we can't insert 1254 a set after the branch. */ 1255 int avail_p = (oprs_available_p (src, insn) 1256 && ! JUMP_P (insn)); 1257 1258 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, 1259 max_distance, table); 1260 } 1261 } 1262 /* In case of store we want to consider the memory value as available in 1263 the REG stored in that memory. This makes it possible to remove 1264 redundant loads from due to stores to the same location. */ 1265 else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) 1266 { 1267 unsigned int regno = REGNO (src); 1268 int max_distance = 0; 1269 1270 /* Only record sets of pseudo-regs in the hash table. */ 1271 if (regno >= FIRST_PSEUDO_REGISTER 1272 /* Don't GCSE something if we can't do a reg/reg copy. */ 1273 && can_copy_p (GET_MODE (src)) 1274 /* GCSE commonly inserts instruction after the insn. We can't 1275 do that easily for EH edges so disable GCSE on these for now. */ 1276 && !can_throw_internal (insn) 1277 /* Is SET_DEST something we want to gcse? */ 1278 && want_to_gcse_p (dest, &max_distance) 1279 /* Don't CSE a nop. */ 1280 && ! set_noop_p (set) 1281 /* Don't GCSE if it has attached REG_EQUIV note. 1282 At this point this only function parameters should have 1283 REG_EQUIV notes and if the argument slot is used somewhere 1284 explicitly, it means address of parameter has been taken, 1285 so we should not extend the lifetime of the pseudo. */ 1286 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 1287 || ! MEM_P (XEXP (note, 0)))) 1288 { 1289 /* Stores are never anticipatable. */ 1290 int antic_p = 0; 1291 /* An expression is not available if its operands are 1292 subsequently modified, including this insn. It's also not 1293 available if this is a branch, because we can't insert 1294 a set after the branch. */ 1295 int avail_p = oprs_available_p (dest, insn) 1296 && ! JUMP_P (insn); 1297 1298 /* Record the memory expression (DEST) in the hash table. */ 1299 insert_expr_in_table (dest, GET_MODE (dest), insn, 1300 antic_p, avail_p, max_distance, table); 1301 } 1302 } 1303 } 1304 1305 static void 1306 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED, 1307 struct hash_table_d *table ATTRIBUTE_UNUSED) 1308 { 1309 /* Currently nothing to do. */ 1310 } 1311 1312 static void 1313 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED, 1314 struct hash_table_d *table ATTRIBUTE_UNUSED) 1315 { 1316 /* Currently nothing to do. */ 1317 } 1318 1319 /* Process INSN and add hash table entries as appropriate. */ 1320 1321 static void 1322 hash_scan_insn (rtx insn, struct hash_table_d *table) 1323 { 1324 rtx pat = PATTERN (insn); 1325 int i; 1326 1327 /* Pick out the sets of INSN and for other forms of instructions record 1328 what's been modified. */ 1329 1330 if (GET_CODE (pat) == SET) 1331 hash_scan_set (pat, insn, table); 1332 1333 else if (GET_CODE (pat) == CLOBBER) 1334 hash_scan_clobber (pat, insn, table); 1335 1336 else if (GET_CODE (pat) == CALL) 1337 hash_scan_call (pat, insn, table); 1338 1339 else if (GET_CODE (pat) == PARALLEL) 1340 for (i = 0; i < XVECLEN (pat, 0); i++) 1341 { 1342 rtx x = XVECEXP (pat, 0, i); 1343 1344 if (GET_CODE (x) == SET) 1345 hash_scan_set (x, insn, table); 1346 else if (GET_CODE (x) == CLOBBER) 1347 hash_scan_clobber (x, insn, table); 1348 else if (GET_CODE (x) == CALL) 1349 hash_scan_call (x, insn, table); 1350 } 1351 } 1352 1353 /* Dump the hash table TABLE to file FILE under the name NAME. */ 1354 1355 static void 1356 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table) 1357 { 1358 int i; 1359 /* Flattened out table, so it's printed in proper order. */ 1360 struct expr **flat_table; 1361 unsigned int *hash_val; 1362 struct expr *expr; 1363 1364 flat_table = XCNEWVEC (struct expr *, table->n_elems); 1365 hash_val = XNEWVEC (unsigned int, table->n_elems); 1366 1367 for (i = 0; i < (int) table->size; i++) 1368 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) 1369 { 1370 flat_table[expr->bitmap_index] = expr; 1371 hash_val[expr->bitmap_index] = i; 1372 } 1373 1374 fprintf (file, "%s hash table (%d buckets, %d entries)\n", 1375 name, table->size, table->n_elems); 1376 1377 for (i = 0; i < (int) table->n_elems; i++) 1378 if (flat_table[i] != 0) 1379 { 1380 expr = flat_table[i]; 1381 fprintf (file, "Index %d (hash value %d; max distance %d)\n ", 1382 expr->bitmap_index, hash_val[i], expr->max_distance); 1383 print_rtl (file, expr->expr); 1384 fprintf (file, "\n"); 1385 } 1386 1387 fprintf (file, "\n"); 1388 1389 free (flat_table); 1390 free (hash_val); 1391 } 1392 1393 /* Record register first/last/block set information for REGNO in INSN. 1394 1395 first_set records the first place in the block where the register 1396 is set and is used to compute "anticipatability". 1397 1398 last_set records the last place in the block where the register 1399 is set and is used to compute "availability". 1400 1401 last_bb records the block for which first_set and last_set are 1402 valid, as a quick test to invalidate them. */ 1403 1404 static void 1405 record_last_reg_set_info (rtx insn, int regno) 1406 { 1407 struct reg_avail_info *info = ®_avail_info[regno]; 1408 int luid = DF_INSN_LUID (insn); 1409 1410 info->last_set = luid; 1411 if (info->last_bb != current_bb) 1412 { 1413 info->last_bb = current_bb; 1414 info->first_set = luid; 1415 } 1416 } 1417 1418 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn. 1419 Note we store a pair of elements in the list, so they have to be 1420 taken off pairwise. */ 1421 1422 static void 1423 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED, 1424 void * v_insn) 1425 { 1426 rtx dest_addr, insn; 1427 int bb; 1428 modify_pair *pair; 1429 1430 while (GET_CODE (dest) == SUBREG 1431 || GET_CODE (dest) == ZERO_EXTRACT 1432 || GET_CODE (dest) == STRICT_LOW_PART) 1433 dest = XEXP (dest, 0); 1434 1435 /* If DEST is not a MEM, then it will not conflict with a load. Note 1436 that function calls are assumed to clobber memory, but are handled 1437 elsewhere. */ 1438 1439 if (! MEM_P (dest)) 1440 return; 1441 1442 dest_addr = get_addr (XEXP (dest, 0)); 1443 dest_addr = canon_rtx (dest_addr); 1444 insn = (rtx) v_insn; 1445 bb = BLOCK_FOR_INSN (insn)->index; 1446 1447 pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL); 1448 pair->dest = dest; 1449 pair->dest_addr = dest_addr; 1450 } 1451 1452 /* Record memory modification information for INSN. We do not actually care 1453 about the memory location(s) that are set, or even how they are set (consider 1454 a CALL_INSN). We merely need to record which insns modify memory. */ 1455 1456 static void 1457 record_last_mem_set_info (rtx insn) 1458 { 1459 int bb = BLOCK_FOR_INSN (insn)->index; 1460 1461 /* load_killed_in_block_p will handle the case of calls clobbering 1462 everything. */ 1463 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn); 1464 bitmap_set_bit (modify_mem_list_set, bb); 1465 1466 if (CALL_P (insn)) 1467 bitmap_set_bit (blocks_with_calls, bb); 1468 else 1469 note_stores (PATTERN (insn), canon_list_insert, (void*) insn); 1470 } 1471 1472 /* Called from compute_hash_table via note_stores to handle one 1473 SET or CLOBBER in an insn. DATA is really the instruction in which 1474 the SET is taking place. */ 1475 1476 static void 1477 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data) 1478 { 1479 rtx last_set_insn = (rtx) data; 1480 1481 if (GET_CODE (dest) == SUBREG) 1482 dest = SUBREG_REG (dest); 1483 1484 if (REG_P (dest)) 1485 record_last_reg_set_info (last_set_insn, REGNO (dest)); 1486 else if (MEM_P (dest) 1487 /* Ignore pushes, they clobber nothing. */ 1488 && ! push_operand (dest, GET_MODE (dest))) 1489 record_last_mem_set_info (last_set_insn); 1490 } 1491 1492 /* Top level function to create an expression hash table. 1493 1494 Expression entries are placed in the hash table if 1495 - they are of the form (set (pseudo-reg) src), 1496 - src is something we want to perform GCSE on, 1497 - none of the operands are subsequently modified in the block 1498 1499 Currently src must be a pseudo-reg or a const_int. 1500 1501 TABLE is the table computed. */ 1502 1503 static void 1504 compute_hash_table_work (struct hash_table_d *table) 1505 { 1506 int i; 1507 1508 /* re-Cache any INSN_LIST nodes we have allocated. */ 1509 clear_modify_mem_tables (); 1510 /* Some working arrays used to track first and last set in each block. */ 1511 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ()); 1512 1513 for (i = 0; i < max_reg_num (); ++i) 1514 reg_avail_info[i].last_bb = NULL; 1515 1516 FOR_EACH_BB (current_bb) 1517 { 1518 rtx insn; 1519 unsigned int regno; 1520 1521 /* First pass over the instructions records information used to 1522 determine when registers and memory are first and last set. */ 1523 FOR_BB_INSNS (current_bb, insn) 1524 { 1525 if (!NONDEBUG_INSN_P (insn)) 1526 continue; 1527 1528 if (CALL_P (insn)) 1529 { 1530 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 1531 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) 1532 record_last_reg_set_info (insn, regno); 1533 1534 if (! RTL_CONST_OR_PURE_CALL_P (insn)) 1535 record_last_mem_set_info (insn); 1536 } 1537 1538 note_stores (PATTERN (insn), record_last_set_info, insn); 1539 } 1540 1541 /* The next pass builds the hash table. */ 1542 FOR_BB_INSNS (current_bb, insn) 1543 if (NONDEBUG_INSN_P (insn)) 1544 hash_scan_insn (insn, table); 1545 } 1546 1547 free (reg_avail_info); 1548 reg_avail_info = NULL; 1549 } 1550 1551 /* Allocate space for the set/expr hash TABLE. 1552 It is used to determine the number of buckets to use. */ 1553 1554 static void 1555 alloc_hash_table (struct hash_table_d *table) 1556 { 1557 int n; 1558 1559 n = get_max_insn_count (); 1560 1561 table->size = n / 4; 1562 if (table->size < 11) 1563 table->size = 11; 1564 1565 /* Attempt to maintain efficient use of hash table. 1566 Making it an odd number is simplest for now. 1567 ??? Later take some measurements. */ 1568 table->size |= 1; 1569 n = table->size * sizeof (struct expr *); 1570 table->table = GNEWVAR (struct expr *, n); 1571 } 1572 1573 /* Free things allocated by alloc_hash_table. */ 1574 1575 static void 1576 free_hash_table (struct hash_table_d *table) 1577 { 1578 free (table->table); 1579 } 1580 1581 /* Compute the expression hash table TABLE. */ 1582 1583 static void 1584 compute_hash_table (struct hash_table_d *table) 1585 { 1586 /* Initialize count of number of entries in hash table. */ 1587 table->n_elems = 0; 1588 memset (table->table, 0, table->size * sizeof (struct expr *)); 1589 1590 compute_hash_table_work (table); 1591 } 1592 1593 /* Expression tracking support. */ 1594 1595 /* Clear canon_modify_mem_list and modify_mem_list tables. */ 1596 static void 1597 clear_modify_mem_tables (void) 1598 { 1599 unsigned i; 1600 bitmap_iterator bi; 1601 1602 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi) 1603 { 1604 VEC_free (rtx, heap, modify_mem_list[i]); 1605 VEC_free (modify_pair, heap, canon_modify_mem_list[i]); 1606 } 1607 bitmap_clear (modify_mem_list_set); 1608 bitmap_clear (blocks_with_calls); 1609 } 1610 1611 /* Release memory used by modify_mem_list_set. */ 1612 1613 static void 1614 free_modify_mem_tables (void) 1615 { 1616 clear_modify_mem_tables (); 1617 free (modify_mem_list); 1618 free (canon_modify_mem_list); 1619 modify_mem_list = 0; 1620 canon_modify_mem_list = 0; 1621 } 1622 1623 /* For each block, compute whether X is transparent. X is either an 1624 expression or an assignment [though we don't care which, for this context 1625 an assignment is treated as an expression]. For each block where an 1626 element of X is modified, reset the INDX bit in BMAP. */ 1627 1628 static void 1629 compute_transp (const_rtx x, int indx, sbitmap *bmap) 1630 { 1631 int i, j; 1632 enum rtx_code code; 1633 const char *fmt; 1634 1635 /* repeat is used to turn tail-recursion into iteration since GCC 1636 can't do it when there's no return value. */ 1637 repeat: 1638 1639 if (x == 0) 1640 return; 1641 1642 code = GET_CODE (x); 1643 switch (code) 1644 { 1645 case REG: 1646 { 1647 df_ref def; 1648 for (def = DF_REG_DEF_CHAIN (REGNO (x)); 1649 def; 1650 def = DF_REF_NEXT_REG (def)) 1651 RESET_BIT (bmap[DF_REF_BB (def)->index], indx); 1652 } 1653 1654 return; 1655 1656 case MEM: 1657 if (! MEM_READONLY_P (x)) 1658 { 1659 bitmap_iterator bi; 1660 unsigned bb_index; 1661 1662 /* First handle all the blocks with calls. We don't need to 1663 do any list walking for them. */ 1664 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi) 1665 { 1666 RESET_BIT (bmap[bb_index], indx); 1667 } 1668 1669 /* Now iterate over the blocks which have memory modifications 1670 but which do not have any calls. */ 1671 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set, 1672 blocks_with_calls, 1673 0, bb_index, bi) 1674 { 1675 VEC (modify_pair,heap) *list 1676 = canon_modify_mem_list[bb_index]; 1677 modify_pair *pair; 1678 unsigned ix; 1679 1680 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair) 1681 { 1682 rtx dest = pair->dest; 1683 rtx dest_addr = pair->dest_addr; 1684 1685 if (canon_true_dependence (dest, GET_MODE (dest), 1686 dest_addr, x, NULL_RTX)) 1687 RESET_BIT (bmap[bb_index], indx); 1688 } 1689 } 1690 } 1691 1692 x = XEXP (x, 0); 1693 goto repeat; 1694 1695 case PC: 1696 case CC0: /*FIXME*/ 1697 case CONST: 1698 case CONST_INT: 1699 case CONST_DOUBLE: 1700 case CONST_FIXED: 1701 case CONST_VECTOR: 1702 case SYMBOL_REF: 1703 case LABEL_REF: 1704 case ADDR_VEC: 1705 case ADDR_DIFF_VEC: 1706 return; 1707 1708 default: 1709 break; 1710 } 1711 1712 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 1713 { 1714 if (fmt[i] == 'e') 1715 { 1716 /* If we are about to do the last recursive call 1717 needed at this level, change it into iteration. 1718 This function is called enough to be worth it. */ 1719 if (i == 0) 1720 { 1721 x = XEXP (x, i); 1722 goto repeat; 1723 } 1724 1725 compute_transp (XEXP (x, i), indx, bmap); 1726 } 1727 else if (fmt[i] == 'E') 1728 for (j = 0; j < XVECLEN (x, i); j++) 1729 compute_transp (XVECEXP (x, i, j), indx, bmap); 1730 } 1731 } 1732 1733 /* Compute PRE+LCM working variables. */ 1734 1735 /* Local properties of expressions. */ 1736 1737 /* Nonzero for expressions that are transparent in the block. */ 1738 static sbitmap *transp; 1739 1740 /* Nonzero for expressions that are computed (available) in the block. */ 1741 static sbitmap *comp; 1742 1743 /* Nonzero for expressions that are locally anticipatable in the block. */ 1744 static sbitmap *antloc; 1745 1746 /* Nonzero for expressions where this block is an optimal computation 1747 point. */ 1748 static sbitmap *pre_optimal; 1749 1750 /* Nonzero for expressions which are redundant in a particular block. */ 1751 static sbitmap *pre_redundant; 1752 1753 /* Nonzero for expressions which should be inserted on a specific edge. */ 1754 static sbitmap *pre_insert_map; 1755 1756 /* Nonzero for expressions which should be deleted in a specific block. */ 1757 static sbitmap *pre_delete_map; 1758 1759 /* Allocate vars used for PRE analysis. */ 1760 1761 static void 1762 alloc_pre_mem (int n_blocks, int n_exprs) 1763 { 1764 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 1765 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 1766 antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 1767 1768 pre_optimal = NULL; 1769 pre_redundant = NULL; 1770 pre_insert_map = NULL; 1771 pre_delete_map = NULL; 1772 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs); 1773 1774 /* pre_insert and pre_delete are allocated later. */ 1775 } 1776 1777 /* Free vars used for PRE analysis. */ 1778 1779 static void 1780 free_pre_mem (void) 1781 { 1782 sbitmap_vector_free (transp); 1783 sbitmap_vector_free (comp); 1784 1785 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */ 1786 1787 if (pre_optimal) 1788 sbitmap_vector_free (pre_optimal); 1789 if (pre_redundant) 1790 sbitmap_vector_free (pre_redundant); 1791 if (pre_insert_map) 1792 sbitmap_vector_free (pre_insert_map); 1793 if (pre_delete_map) 1794 sbitmap_vector_free (pre_delete_map); 1795 1796 transp = comp = NULL; 1797 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; 1798 } 1799 1800 /* Remove certain expressions from anticipatable and transparent 1801 sets of basic blocks that have incoming abnormal edge. 1802 For PRE remove potentially trapping expressions to avoid placing 1803 them on abnormal edges. For hoisting remove memory references that 1804 can be clobbered by calls. */ 1805 1806 static void 1807 prune_expressions (bool pre_p) 1808 { 1809 sbitmap prune_exprs; 1810 struct expr *expr; 1811 unsigned int ui; 1812 basic_block bb; 1813 1814 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); 1815 sbitmap_zero (prune_exprs); 1816 for (ui = 0; ui < expr_hash_table.size; ui++) 1817 { 1818 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash) 1819 { 1820 /* Note potentially trapping expressions. */ 1821 if (may_trap_p (expr->expr)) 1822 { 1823 SET_BIT (prune_exprs, expr->bitmap_index); 1824 continue; 1825 } 1826 1827 if (!pre_p && MEM_P (expr->expr)) 1828 /* Note memory references that can be clobbered by a call. 1829 We do not split abnormal edges in hoisting, so would 1830 a memory reference get hoisted along an abnormal edge, 1831 it would be placed /before/ the call. Therefore, only 1832 constant memory references can be hoisted along abnormal 1833 edges. */ 1834 { 1835 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF 1836 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) 1837 continue; 1838 1839 if (MEM_READONLY_P (expr->expr) 1840 && !MEM_VOLATILE_P (expr->expr) 1841 && MEM_NOTRAP_P (expr->expr)) 1842 /* Constant memory reference, e.g., a PIC address. */ 1843 continue; 1844 1845 /* ??? Optimally, we would use interprocedural alias 1846 analysis to determine if this mem is actually killed 1847 by this call. */ 1848 1849 SET_BIT (prune_exprs, expr->bitmap_index); 1850 } 1851 } 1852 } 1853 1854 FOR_EACH_BB (bb) 1855 { 1856 edge e; 1857 edge_iterator ei; 1858 1859 /* If the current block is the destination of an abnormal edge, we 1860 kill all trapping (for PRE) and memory (for hoist) expressions 1861 because we won't be able to properly place the instruction on 1862 the edge. So make them neither anticipatable nor transparent. 1863 This is fairly conservative. 1864 1865 ??? For hoisting it may be necessary to check for set-and-jump 1866 instructions here, not just for abnormal edges. The general problem 1867 is that when an expression cannot not be placed right at the end of 1868 a basic block we should account for any side-effects of a subsequent 1869 jump instructions that could clobber the expression. It would 1870 be best to implement this check along the lines of 1871 hoist_expr_reaches_here_p where the target block is already known 1872 and, hence, there's no need to conservatively prune expressions on 1873 "intermediate" set-and-jump instructions. */ 1874 FOR_EACH_EDGE (e, ei, bb->preds) 1875 if ((e->flags & EDGE_ABNORMAL) 1876 && (pre_p || CALL_P (BB_END (e->src)))) 1877 { 1878 sbitmap_difference (antloc[bb->index], 1879 antloc[bb->index], prune_exprs); 1880 sbitmap_difference (transp[bb->index], 1881 transp[bb->index], prune_exprs); 1882 break; 1883 } 1884 } 1885 1886 sbitmap_free (prune_exprs); 1887 } 1888 1889 /* It may be necessary to insert a large number of insns on edges to 1890 make the existing occurrences of expressions fully redundant. This 1891 routine examines the set of insertions and deletions and if the ratio 1892 of insertions to deletions is too high for a particular expression, then 1893 the expression is removed from the insertion/deletion sets. 1894 1895 N_ELEMS is the number of elements in the hash table. */ 1896 1897 static void 1898 prune_insertions_deletions (int n_elems) 1899 { 1900 sbitmap_iterator sbi; 1901 sbitmap prune_exprs; 1902 1903 /* We always use I to iterate over blocks/edges and J to iterate over 1904 expressions. */ 1905 unsigned int i, j; 1906 1907 /* Counts for the number of times an expression needs to be inserted and 1908 number of times an expression can be removed as a result. */ 1909 int *insertions = GCNEWVEC (int, n_elems); 1910 int *deletions = GCNEWVEC (int, n_elems); 1911 1912 /* Set of expressions which require too many insertions relative to 1913 the number of deletions achieved. We will prune these out of the 1914 insertion/deletion sets. */ 1915 prune_exprs = sbitmap_alloc (n_elems); 1916 sbitmap_zero (prune_exprs); 1917 1918 /* Iterate over the edges counting the number of times each expression 1919 needs to be inserted. */ 1920 for (i = 0; i < (unsigned) n_edges; i++) 1921 { 1922 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi) 1923 insertions[j]++; 1924 } 1925 1926 /* Similarly for deletions, but those occur in blocks rather than on 1927 edges. */ 1928 for (i = 0; i < (unsigned) last_basic_block; i++) 1929 { 1930 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi) 1931 deletions[j]++; 1932 } 1933 1934 /* Now that we have accurate counts, iterate over the elements in the 1935 hash table and see if any need too many insertions relative to the 1936 number of evaluations that can be removed. If so, mark them in 1937 PRUNE_EXPRS. */ 1938 for (j = 0; j < (unsigned) n_elems; j++) 1939 if (deletions[j] 1940 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO) 1941 SET_BIT (prune_exprs, j); 1942 1943 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */ 1944 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi) 1945 { 1946 for (i = 0; i < (unsigned) n_edges; i++) 1947 RESET_BIT (pre_insert_map[i], j); 1948 1949 for (i = 0; i < (unsigned) last_basic_block; i++) 1950 RESET_BIT (pre_delete_map[i], j); 1951 } 1952 1953 sbitmap_free (prune_exprs); 1954 free (insertions); 1955 free (deletions); 1956 } 1957 1958 /* Top level routine to do the dataflow analysis needed by PRE. */ 1959 1960 static struct edge_list * 1961 compute_pre_data (void) 1962 { 1963 struct edge_list *edge_list; 1964 basic_block bb; 1965 1966 compute_local_properties (transp, comp, antloc, &expr_hash_table); 1967 prune_expressions (true); 1968 sbitmap_vector_zero (ae_kill, last_basic_block); 1969 1970 /* Compute ae_kill for each basic block using: 1971 1972 ~(TRANSP | COMP) 1973 */ 1974 1975 FOR_EACH_BB (bb) 1976 { 1977 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); 1978 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); 1979 } 1980 1981 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, 1982 ae_kill, &pre_insert_map, &pre_delete_map); 1983 sbitmap_vector_free (antloc); 1984 antloc = NULL; 1985 sbitmap_vector_free (ae_kill); 1986 ae_kill = NULL; 1987 1988 prune_insertions_deletions (expr_hash_table.n_elems); 1989 1990 return edge_list; 1991 } 1992 1993 /* PRE utilities */ 1994 1995 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach 1996 block BB. 1997 1998 VISITED is a pointer to a working buffer for tracking which BB's have 1999 been visited. It is NULL for the top-level call. 2000 2001 We treat reaching expressions that go through blocks containing the same 2002 reaching expression as "not reaching". E.g. if EXPR is generated in blocks 2003 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 2004 2 as not reaching. The intent is to improve the probability of finding 2005 only one reaching expression and to reduce register lifetimes by picking 2006 the closest such expression. */ 2007 2008 static int 2009 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, 2010 basic_block bb, char *visited) 2011 { 2012 edge pred; 2013 edge_iterator ei; 2014 2015 FOR_EACH_EDGE (pred, ei, bb->preds) 2016 { 2017 basic_block pred_bb = pred->src; 2018 2019 if (pred->src == ENTRY_BLOCK_PTR 2020 /* Has predecessor has already been visited? */ 2021 || visited[pred_bb->index]) 2022 ;/* Nothing to do. */ 2023 2024 /* Does this predecessor generate this expression? */ 2025 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index)) 2026 { 2027 /* Is this the occurrence we're looking for? 2028 Note that there's only one generating occurrence per block 2029 so we just need to check the block number. */ 2030 if (occr_bb == pred_bb) 2031 return 1; 2032 2033 visited[pred_bb->index] = 1; 2034 } 2035 /* Ignore this predecessor if it kills the expression. */ 2036 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index)) 2037 visited[pred_bb->index] = 1; 2038 2039 /* Neither gen nor kill. */ 2040 else 2041 { 2042 visited[pred_bb->index] = 1; 2043 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited)) 2044 return 1; 2045 } 2046 } 2047 2048 /* All paths have been checked. */ 2049 return 0; 2050 } 2051 2052 /* The wrapper for pre_expr_reaches_here_work that ensures that any 2053 memory allocated for that function is returned. */ 2054 2055 static int 2056 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb) 2057 { 2058 int rval; 2059 char *visited = XCNEWVEC (char, last_basic_block); 2060 2061 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited); 2062 2063 free (visited); 2064 return rval; 2065 } 2066 2067 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */ 2068 2069 static rtx 2070 process_insert_insn (struct expr *expr) 2071 { 2072 rtx reg = expr->reaching_reg; 2073 /* Copy the expression to make sure we don't have any sharing issues. */ 2074 rtx exp = copy_rtx (expr->expr); 2075 rtx pat; 2076 2077 start_sequence (); 2078 2079 /* If the expression is something that's an operand, like a constant, 2080 just copy it to a register. */ 2081 if (general_operand (exp, GET_MODE (reg))) 2082 emit_move_insn (reg, exp); 2083 2084 /* Otherwise, make a new insn to compute this expression and make sure the 2085 insn will be recognized (this also adds any needed CLOBBERs). */ 2086 else 2087 { 2088 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp)); 2089 2090 if (insn_invalid_p (insn)) 2091 gcc_unreachable (); 2092 } 2093 2094 pat = get_insns (); 2095 end_sequence (); 2096 2097 return pat; 2098 } 2099 2100 /* Add EXPR to the end of basic block BB. 2101 2102 This is used by both the PRE and code hoisting. */ 2103 2104 static void 2105 insert_insn_end_basic_block (struct expr *expr, basic_block bb) 2106 { 2107 rtx insn = BB_END (bb); 2108 rtx new_insn; 2109 rtx reg = expr->reaching_reg; 2110 int regno = REGNO (reg); 2111 rtx pat, pat_end; 2112 2113 pat = process_insert_insn (expr); 2114 gcc_assert (pat && INSN_P (pat)); 2115 2116 pat_end = pat; 2117 while (NEXT_INSN (pat_end) != NULL_RTX) 2118 pat_end = NEXT_INSN (pat_end); 2119 2120 /* If the last insn is a jump, insert EXPR in front [taking care to 2121 handle cc0, etc. properly]. Similarly we need to care trapping 2122 instructions in presence of non-call exceptions. */ 2123 2124 if (JUMP_P (insn) 2125 || (NONJUMP_INSN_P (insn) 2126 && (!single_succ_p (bb) 2127 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) 2128 { 2129 #ifdef HAVE_cc0 2130 rtx note; 2131 #endif 2132 2133 /* If this is a jump table, then we can't insert stuff here. Since 2134 we know the previous real insn must be the tablejump, we insert 2135 the new instruction just before the tablejump. */ 2136 if (GET_CODE (PATTERN (insn)) == ADDR_VEC 2137 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 2138 insn = prev_active_insn (insn); 2139 2140 #ifdef HAVE_cc0 2141 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts 2142 if cc0 isn't set. */ 2143 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); 2144 if (note) 2145 insn = XEXP (note, 0); 2146 else 2147 { 2148 rtx maybe_cc0_setter = prev_nonnote_insn (insn); 2149 if (maybe_cc0_setter 2150 && INSN_P (maybe_cc0_setter) 2151 && sets_cc0_p (PATTERN (maybe_cc0_setter))) 2152 insn = maybe_cc0_setter; 2153 } 2154 #endif 2155 /* FIXME: What if something in cc0/jump uses value set in new insn? */ 2156 new_insn = emit_insn_before_noloc (pat, insn, bb); 2157 } 2158 2159 /* Likewise if the last insn is a call, as will happen in the presence 2160 of exception handling. */ 2161 else if (CALL_P (insn) 2162 && (!single_succ_p (bb) 2163 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) 2164 { 2165 /* Keeping in mind targets with small register classes and parameters 2166 in registers, we search backward and place the instructions before 2167 the first parameter is loaded. Do this for everyone for consistency 2168 and a presumption that we'll get better code elsewhere as well. */ 2169 2170 /* Since different machines initialize their parameter registers 2171 in different orders, assume nothing. Collect the set of all 2172 parameter registers. */ 2173 insn = find_first_parameter_load (insn, BB_HEAD (bb)); 2174 2175 /* If we found all the parameter loads, then we want to insert 2176 before the first parameter load. 2177 2178 If we did not find all the parameter loads, then we might have 2179 stopped on the head of the block, which could be a CODE_LABEL. 2180 If we inserted before the CODE_LABEL, then we would be putting 2181 the insn in the wrong basic block. In that case, put the insn 2182 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */ 2183 while (LABEL_P (insn) 2184 || NOTE_INSN_BASIC_BLOCK_P (insn)) 2185 insn = NEXT_INSN (insn); 2186 2187 new_insn = emit_insn_before_noloc (pat, insn, bb); 2188 } 2189 else 2190 new_insn = emit_insn_after_noloc (pat, insn, bb); 2191 2192 while (1) 2193 { 2194 if (INSN_P (pat)) 2195 add_label_notes (PATTERN (pat), new_insn); 2196 if (pat == pat_end) 2197 break; 2198 pat = NEXT_INSN (pat); 2199 } 2200 2201 gcse_create_count++; 2202 2203 if (dump_file) 2204 { 2205 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ", 2206 bb->index, INSN_UID (new_insn)); 2207 fprintf (dump_file, "copying expression %d to reg %d\n", 2208 expr->bitmap_index, regno); 2209 } 2210 } 2211 2212 /* Insert partially redundant expressions on edges in the CFG to make 2213 the expressions fully redundant. */ 2214 2215 static int 2216 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map) 2217 { 2218 int e, i, j, num_edges, set_size, did_insert = 0; 2219 sbitmap *inserted; 2220 2221 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge 2222 if it reaches any of the deleted expressions. */ 2223 2224 set_size = pre_insert_map[0]->size; 2225 num_edges = NUM_EDGES (edge_list); 2226 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems); 2227 sbitmap_vector_zero (inserted, num_edges); 2228 2229 for (e = 0; e < num_edges; e++) 2230 { 2231 int indx; 2232 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); 2233 2234 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) 2235 { 2236 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; 2237 2238 for (j = indx; 2239 insert && j < (int) expr_hash_table.n_elems; 2240 j++, insert >>= 1) 2241 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX) 2242 { 2243 struct expr *expr = index_map[j]; 2244 struct occr *occr; 2245 2246 /* Now look at each deleted occurrence of this expression. */ 2247 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2248 { 2249 if (! occr->deleted_p) 2250 continue; 2251 2252 /* Insert this expression on this edge if it would 2253 reach the deleted occurrence in BB. */ 2254 if (!TEST_BIT (inserted[e], j)) 2255 { 2256 rtx insn; 2257 edge eg = INDEX_EDGE (edge_list, e); 2258 2259 /* We can't insert anything on an abnormal and 2260 critical edge, so we insert the insn at the end of 2261 the previous block. There are several alternatives 2262 detailed in Morgans book P277 (sec 10.5) for 2263 handling this situation. This one is easiest for 2264 now. */ 2265 2266 if (eg->flags & EDGE_ABNORMAL) 2267 insert_insn_end_basic_block (index_map[j], bb); 2268 else 2269 { 2270 insn = process_insert_insn (index_map[j]); 2271 insert_insn_on_edge (insn, eg); 2272 } 2273 2274 if (dump_file) 2275 { 2276 fprintf (dump_file, "PRE: edge (%d,%d), ", 2277 bb->index, 2278 INDEX_EDGE_SUCC_BB (edge_list, e)->index); 2279 fprintf (dump_file, "copy expression %d\n", 2280 expr->bitmap_index); 2281 } 2282 2283 update_ld_motion_stores (expr); 2284 SET_BIT (inserted[e], j); 2285 did_insert = 1; 2286 gcse_create_count++; 2287 } 2288 } 2289 } 2290 } 2291 } 2292 2293 sbitmap_vector_free (inserted); 2294 return did_insert; 2295 } 2296 2297 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG. 2298 Given "old_reg <- expr" (INSN), instead of adding after it 2299 reaching_reg <- old_reg 2300 it's better to do the following: 2301 reaching_reg <- expr 2302 old_reg <- reaching_reg 2303 because this way copy propagation can discover additional PRE 2304 opportunities. But if this fails, we try the old way. 2305 When "expr" is a store, i.e. 2306 given "MEM <- old_reg", instead of adding after it 2307 reaching_reg <- old_reg 2308 it's better to add it before as follows: 2309 reaching_reg <- old_reg 2310 MEM <- reaching_reg. */ 2311 2312 static void 2313 pre_insert_copy_insn (struct expr *expr, rtx insn) 2314 { 2315 rtx reg = expr->reaching_reg; 2316 int regno = REGNO (reg); 2317 int indx = expr->bitmap_index; 2318 rtx pat = PATTERN (insn); 2319 rtx set, first_set, new_insn; 2320 rtx old_reg; 2321 int i; 2322 2323 /* This block matches the logic in hash_scan_insn. */ 2324 switch (GET_CODE (pat)) 2325 { 2326 case SET: 2327 set = pat; 2328 break; 2329 2330 case PARALLEL: 2331 /* Search through the parallel looking for the set whose 2332 source was the expression that we're interested in. */ 2333 first_set = NULL_RTX; 2334 set = NULL_RTX; 2335 for (i = 0; i < XVECLEN (pat, 0); i++) 2336 { 2337 rtx x = XVECEXP (pat, 0, i); 2338 if (GET_CODE (x) == SET) 2339 { 2340 /* If the source was a REG_EQUAL or REG_EQUIV note, we 2341 may not find an equivalent expression, but in this 2342 case the PARALLEL will have a single set. */ 2343 if (first_set == NULL_RTX) 2344 first_set = x; 2345 if (expr_equiv_p (SET_SRC (x), expr->expr)) 2346 { 2347 set = x; 2348 break; 2349 } 2350 } 2351 } 2352 2353 gcc_assert (first_set); 2354 if (set == NULL_RTX) 2355 set = first_set; 2356 break; 2357 2358 default: 2359 gcc_unreachable (); 2360 } 2361 2362 if (REG_P (SET_DEST (set))) 2363 { 2364 old_reg = SET_DEST (set); 2365 /* Check if we can modify the set destination in the original insn. */ 2366 if (validate_change (insn, &SET_DEST (set), reg, 0)) 2367 { 2368 new_insn = gen_move_insn (old_reg, reg); 2369 new_insn = emit_insn_after (new_insn, insn); 2370 } 2371 else 2372 { 2373 new_insn = gen_move_insn (reg, old_reg); 2374 new_insn = emit_insn_after (new_insn, insn); 2375 } 2376 } 2377 else /* This is possible only in case of a store to memory. */ 2378 { 2379 old_reg = SET_SRC (set); 2380 new_insn = gen_move_insn (reg, old_reg); 2381 2382 /* Check if we can modify the set source in the original insn. */ 2383 if (validate_change (insn, &SET_SRC (set), reg, 0)) 2384 new_insn = emit_insn_before (new_insn, insn); 2385 else 2386 new_insn = emit_insn_after (new_insn, insn); 2387 } 2388 2389 gcse_create_count++; 2390 2391 if (dump_file) 2392 fprintf (dump_file, 2393 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", 2394 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx, 2395 INSN_UID (insn), regno); 2396 } 2397 2398 /* Copy available expressions that reach the redundant expression 2399 to `reaching_reg'. */ 2400 2401 static void 2402 pre_insert_copies (void) 2403 { 2404 unsigned int i, added_copy; 2405 struct expr *expr; 2406 struct occr *occr; 2407 struct occr *avail; 2408 2409 /* For each available expression in the table, copy the result to 2410 `reaching_reg' if the expression reaches a deleted one. 2411 2412 ??? The current algorithm is rather brute force. 2413 Need to do some profiling. */ 2414 2415 for (i = 0; i < expr_hash_table.size; i++) 2416 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2417 { 2418 /* If the basic block isn't reachable, PPOUT will be TRUE. However, 2419 we don't want to insert a copy here because the expression may not 2420 really be redundant. So only insert an insn if the expression was 2421 deleted. This test also avoids further processing if the 2422 expression wasn't deleted anywhere. */ 2423 if (expr->reaching_reg == NULL) 2424 continue; 2425 2426 /* Set when we add a copy for that expression. */ 2427 added_copy = 0; 2428 2429 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2430 { 2431 if (! occr->deleted_p) 2432 continue; 2433 2434 for (avail = expr->avail_occr; avail != NULL; avail = avail->next) 2435 { 2436 rtx insn = avail->insn; 2437 2438 /* No need to handle this one if handled already. */ 2439 if (avail->copied_p) 2440 continue; 2441 2442 /* Don't handle this one if it's a redundant one. */ 2443 if (INSN_DELETED_P (insn)) 2444 continue; 2445 2446 /* Or if the expression doesn't reach the deleted one. */ 2447 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn), 2448 expr, 2449 BLOCK_FOR_INSN (occr->insn))) 2450 continue; 2451 2452 added_copy = 1; 2453 2454 /* Copy the result of avail to reaching_reg. */ 2455 pre_insert_copy_insn (expr, insn); 2456 avail->copied_p = 1; 2457 } 2458 } 2459 2460 if (added_copy) 2461 update_ld_motion_stores (expr); 2462 } 2463 } 2464 2465 /* Emit move from SRC to DEST noting the equivalence with expression computed 2466 in INSN. */ 2467 2468 static rtx 2469 gcse_emit_move_after (rtx dest, rtx src, rtx insn) 2470 { 2471 rtx new_rtx; 2472 rtx set = single_set (insn), set2; 2473 rtx note; 2474 rtx eqv; 2475 2476 /* This should never fail since we're creating a reg->reg copy 2477 we've verified to be valid. */ 2478 2479 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn); 2480 2481 /* Note the equivalence for local CSE pass. */ 2482 set2 = single_set (new_rtx); 2483 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest)) 2484 return new_rtx; 2485 if ((note = find_reg_equal_equiv_note (insn))) 2486 eqv = XEXP (note, 0); 2487 else 2488 eqv = SET_SRC (set); 2489 2490 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv)); 2491 2492 return new_rtx; 2493 } 2494 2495 /* Delete redundant computations. 2496 Deletion is done by changing the insn to copy the `reaching_reg' of 2497 the expression into the result of the SET. It is left to later passes 2498 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it. 2499 2500 Return nonzero if a change is made. */ 2501 2502 static int 2503 pre_delete (void) 2504 { 2505 unsigned int i; 2506 int changed; 2507 struct expr *expr; 2508 struct occr *occr; 2509 2510 changed = 0; 2511 for (i = 0; i < expr_hash_table.size; i++) 2512 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2513 { 2514 int indx = expr->bitmap_index; 2515 2516 /* We only need to search antic_occr since we require ANTLOC != 0. */ 2517 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2518 { 2519 rtx insn = occr->insn; 2520 rtx set; 2521 basic_block bb = BLOCK_FOR_INSN (insn); 2522 2523 /* We only delete insns that have a single_set. */ 2524 if (TEST_BIT (pre_delete_map[bb->index], indx) 2525 && (set = single_set (insn)) != 0 2526 && dbg_cnt (pre_insn)) 2527 { 2528 /* Create a pseudo-reg to store the result of reaching 2529 expressions into. Get the mode for the new pseudo from 2530 the mode of the original destination pseudo. */ 2531 if (expr->reaching_reg == NULL) 2532 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set)); 2533 2534 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn); 2535 delete_insn (insn); 2536 occr->deleted_p = 1; 2537 changed = 1; 2538 gcse_subst_count++; 2539 2540 if (dump_file) 2541 { 2542 fprintf (dump_file, 2543 "PRE: redundant insn %d (expression %d) in ", 2544 INSN_UID (insn), indx); 2545 fprintf (dump_file, "bb %d, reaching reg is %d\n", 2546 bb->index, REGNO (expr->reaching_reg)); 2547 } 2548 } 2549 } 2550 } 2551 2552 return changed; 2553 } 2554 2555 /* Perform GCSE optimizations using PRE. 2556 This is called by one_pre_gcse_pass after all the dataflow analysis 2557 has been done. 2558 2559 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and 2560 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced 2561 Compiler Design and Implementation. 2562 2563 ??? A new pseudo reg is created to hold the reaching expression. The nice 2564 thing about the classical approach is that it would try to use an existing 2565 reg. If the register can't be adequately optimized [i.e. we introduce 2566 reload problems], one could add a pass here to propagate the new register 2567 through the block. 2568 2569 ??? We don't handle single sets in PARALLELs because we're [currently] not 2570 able to copy the rest of the parallel when we insert copies to create full 2571 redundancies from partial redundancies. However, there's no reason why we 2572 can't handle PARALLELs in the cases where there are no partial 2573 redundancies. */ 2574 2575 static int 2576 pre_gcse (struct edge_list *edge_list) 2577 { 2578 unsigned int i; 2579 int did_insert, changed; 2580 struct expr **index_map; 2581 struct expr *expr; 2582 2583 /* Compute a mapping from expression number (`bitmap_index') to 2584 hash table entry. */ 2585 2586 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems); 2587 for (i = 0; i < expr_hash_table.size; i++) 2588 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2589 index_map[expr->bitmap_index] = expr; 2590 2591 /* Delete the redundant insns first so that 2592 - we know what register to use for the new insns and for the other 2593 ones with reaching expressions 2594 - we know which insns are redundant when we go to create copies */ 2595 2596 changed = pre_delete (); 2597 did_insert = pre_edge_insert (edge_list, index_map); 2598 2599 /* In other places with reaching expressions, copy the expression to the 2600 specially allocated pseudo-reg that reaches the redundant expr. */ 2601 pre_insert_copies (); 2602 if (did_insert) 2603 { 2604 commit_edge_insertions (); 2605 changed = 1; 2606 } 2607 2608 free (index_map); 2609 return changed; 2610 } 2611 2612 /* Top level routine to perform one PRE GCSE pass. 2613 2614 Return nonzero if a change was made. */ 2615 2616 static int 2617 one_pre_gcse_pass (void) 2618 { 2619 int changed = 0; 2620 2621 gcse_subst_count = 0; 2622 gcse_create_count = 0; 2623 2624 /* Return if there's nothing to do, or it is too expensive. */ 2625 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1 2626 || is_too_expensive (_("PRE disabled"))) 2627 return 0; 2628 2629 /* We need alias. */ 2630 init_alias_analysis (); 2631 2632 bytes_used = 0; 2633 gcc_obstack_init (&gcse_obstack); 2634 alloc_gcse_mem (); 2635 2636 alloc_hash_table (&expr_hash_table); 2637 add_noreturn_fake_exit_edges (); 2638 if (flag_gcse_lm) 2639 compute_ld_motion_mems (); 2640 2641 compute_hash_table (&expr_hash_table); 2642 if (flag_gcse_lm) 2643 trim_ld_motion_mems (); 2644 if (dump_file) 2645 dump_hash_table (dump_file, "Expression", &expr_hash_table); 2646 2647 if (expr_hash_table.n_elems > 0) 2648 { 2649 struct edge_list *edge_list; 2650 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems); 2651 edge_list = compute_pre_data (); 2652 changed |= pre_gcse (edge_list); 2653 free_edge_list (edge_list); 2654 free_pre_mem (); 2655 } 2656 2657 if (flag_gcse_lm) 2658 free_ld_motion_mems (); 2659 remove_fake_exit_edges (); 2660 free_hash_table (&expr_hash_table); 2661 2662 free_gcse_mem (); 2663 obstack_free (&gcse_obstack, NULL); 2664 2665 /* We are finished with alias. */ 2666 end_alias_analysis (); 2667 2668 if (dump_file) 2669 { 2670 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ", 2671 current_function_name (), n_basic_blocks, bytes_used); 2672 fprintf (dump_file, "%d substs, %d insns created\n", 2673 gcse_subst_count, gcse_create_count); 2674 } 2675 2676 return changed; 2677 } 2678 2679 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them 2680 to INSN. If such notes are added to an insn which references a 2681 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add 2682 that note, because the following loop optimization pass requires 2683 them. */ 2684 2685 /* ??? If there was a jump optimization pass after gcse and before loop, 2686 then we would not need to do this here, because jump would add the 2687 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ 2688 2689 static void 2690 add_label_notes (rtx x, rtx insn) 2691 { 2692 enum rtx_code code = GET_CODE (x); 2693 int i, j; 2694 const char *fmt; 2695 2696 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) 2697 { 2698 /* This code used to ignore labels that referred to dispatch tables to 2699 avoid flow generating (slightly) worse code. 2700 2701 We no longer ignore such label references (see LABEL_REF handling in 2702 mark_jump_label for additional information). */ 2703 2704 /* There's no reason for current users to emit jump-insns with 2705 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET 2706 notes. */ 2707 gcc_assert (!JUMP_P (insn)); 2708 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0)); 2709 2710 if (LABEL_P (XEXP (x, 0))) 2711 LABEL_NUSES (XEXP (x, 0))++; 2712 2713 return; 2714 } 2715 2716 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 2717 { 2718 if (fmt[i] == 'e') 2719 add_label_notes (XEXP (x, i), insn); 2720 else if (fmt[i] == 'E') 2721 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 2722 add_label_notes (XVECEXP (x, i, j), insn); 2723 } 2724 } 2725 2726 /* Code Hoisting variables and subroutines. */ 2727 2728 /* Very busy expressions. */ 2729 static sbitmap *hoist_vbein; 2730 static sbitmap *hoist_vbeout; 2731 2732 /* ??? We could compute post dominators and run this algorithm in 2733 reverse to perform tail merging, doing so would probably be 2734 more effective than the tail merging code in jump.c. 2735 2736 It's unclear if tail merging could be run in parallel with 2737 code hoisting. It would be nice. */ 2738 2739 /* Allocate vars used for code hoisting analysis. */ 2740 2741 static void 2742 alloc_code_hoist_mem (int n_blocks, int n_exprs) 2743 { 2744 antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 2745 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 2746 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 2747 2748 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); 2749 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); 2750 } 2751 2752 /* Free vars used for code hoisting analysis. */ 2753 2754 static void 2755 free_code_hoist_mem (void) 2756 { 2757 sbitmap_vector_free (antloc); 2758 sbitmap_vector_free (transp); 2759 sbitmap_vector_free (comp); 2760 2761 sbitmap_vector_free (hoist_vbein); 2762 sbitmap_vector_free (hoist_vbeout); 2763 2764 free_dominance_info (CDI_DOMINATORS); 2765 } 2766 2767 /* Compute the very busy expressions at entry/exit from each block. 2768 2769 An expression is very busy if all paths from a given point 2770 compute the expression. */ 2771 2772 static void 2773 compute_code_hoist_vbeinout (void) 2774 { 2775 int changed, passes; 2776 basic_block bb; 2777 2778 sbitmap_vector_zero (hoist_vbeout, last_basic_block); 2779 sbitmap_vector_zero (hoist_vbein, last_basic_block); 2780 2781 passes = 0; 2782 changed = 1; 2783 2784 while (changed) 2785 { 2786 changed = 0; 2787 2788 /* We scan the blocks in the reverse order to speed up 2789 the convergence. */ 2790 FOR_EACH_BB_REVERSE (bb) 2791 { 2792 if (bb->next_bb != EXIT_BLOCK_PTR) 2793 { 2794 sbitmap_intersection_of_succs (hoist_vbeout[bb->index], 2795 hoist_vbein, bb->index); 2796 2797 /* Include expressions in VBEout that are calculated 2798 in BB and available at its end. */ 2799 sbitmap_a_or_b (hoist_vbeout[bb->index], 2800 hoist_vbeout[bb->index], comp[bb->index]); 2801 } 2802 2803 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], 2804 antloc[bb->index], 2805 hoist_vbeout[bb->index], 2806 transp[bb->index]); 2807 } 2808 2809 passes++; 2810 } 2811 2812 if (dump_file) 2813 { 2814 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); 2815 2816 FOR_EACH_BB (bb) 2817 { 2818 fprintf (dump_file, "vbein (%d): ", bb->index); 2819 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); 2820 fprintf (dump_file, "vbeout(%d): ", bb->index); 2821 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); 2822 } 2823 } 2824 } 2825 2826 /* Top level routine to do the dataflow analysis needed by code hoisting. */ 2827 2828 static void 2829 compute_code_hoist_data (void) 2830 { 2831 compute_local_properties (transp, comp, antloc, &expr_hash_table); 2832 prune_expressions (false); 2833 compute_code_hoist_vbeinout (); 2834 calculate_dominance_info (CDI_DOMINATORS); 2835 if (dump_file) 2836 fprintf (dump_file, "\n"); 2837 } 2838 2839 /* Determine if the expression identified by EXPR_INDEX would 2840 reach BB unimpared if it was placed at the end of EXPR_BB. 2841 Stop the search if the expression would need to be moved more 2842 than DISTANCE instructions. 2843 2844 It's unclear exactly what Muchnick meant by "unimpared". It seems 2845 to me that the expression must either be computed or transparent in 2846 *every* block in the path(s) from EXPR_BB to BB. Any other definition 2847 would allow the expression to be hoisted out of loops, even if 2848 the expression wasn't a loop invariant. 2849 2850 Contrast this to reachability for PRE where an expression is 2851 considered reachable if *any* path reaches instead of *all* 2852 paths. */ 2853 2854 static int 2855 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, 2856 char *visited, int distance, int *bb_size) 2857 { 2858 edge pred; 2859 edge_iterator ei; 2860 int visited_allocated_locally = 0; 2861 2862 /* Terminate the search if distance, for which EXPR is allowed to move, 2863 is exhausted. */ 2864 if (distance > 0) 2865 { 2866 distance -= bb_size[bb->index]; 2867 2868 if (distance <= 0) 2869 return 0; 2870 } 2871 else 2872 gcc_assert (distance == 0); 2873 2874 if (visited == NULL) 2875 { 2876 visited_allocated_locally = 1; 2877 visited = XCNEWVEC (char, last_basic_block); 2878 } 2879 2880 FOR_EACH_EDGE (pred, ei, bb->preds) 2881 { 2882 basic_block pred_bb = pred->src; 2883 2884 if (pred->src == ENTRY_BLOCK_PTR) 2885 break; 2886 else if (pred_bb == expr_bb) 2887 continue; 2888 else if (visited[pred_bb->index]) 2889 continue; 2890 2891 else if (! TEST_BIT (transp[pred_bb->index], expr_index)) 2892 break; 2893 2894 /* Not killed. */ 2895 else 2896 { 2897 visited[pred_bb->index] = 1; 2898 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, 2899 visited, distance, bb_size)) 2900 break; 2901 } 2902 } 2903 if (visited_allocated_locally) 2904 free (visited); 2905 2906 return (pred == NULL); 2907 } 2908 2909 /* Find occurence in BB. */ 2910 2911 static struct occr * 2912 find_occr_in_bb (struct occr *occr, basic_block bb) 2913 { 2914 /* Find the right occurrence of this expression. */ 2915 while (occr && BLOCK_FOR_INSN (occr->insn) != bb) 2916 occr = occr->next; 2917 2918 return occr; 2919 } 2920 2921 /* Actually perform code hoisting. */ 2922 2923 static int 2924 hoist_code (void) 2925 { 2926 basic_block bb, dominated; 2927 VEC (basic_block, heap) *dom_tree_walk; 2928 unsigned int dom_tree_walk_index; 2929 VEC (basic_block, heap) *domby; 2930 unsigned int i,j; 2931 struct expr **index_map; 2932 struct expr *expr; 2933 int *to_bb_head; 2934 int *bb_size; 2935 int changed = 0; 2936 2937 /* Compute a mapping from expression number (`bitmap_index') to 2938 hash table entry. */ 2939 2940 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems); 2941 for (i = 0; i < expr_hash_table.size; i++) 2942 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2943 index_map[expr->bitmap_index] = expr; 2944 2945 /* Calculate sizes of basic blocks and note how far 2946 each instruction is from the start of its block. We then use this 2947 data to restrict distance an expression can travel. */ 2948 2949 to_bb_head = XCNEWVEC (int, get_max_uid ()); 2950 bb_size = XCNEWVEC (int, last_basic_block); 2951 2952 FOR_EACH_BB (bb) 2953 { 2954 rtx insn; 2955 int to_head; 2956 2957 to_head = 0; 2958 FOR_BB_INSNS (bb, insn) 2959 { 2960 /* Don't count debug instructions to avoid them affecting 2961 decision choices. */ 2962 if (NONDEBUG_INSN_P (insn)) 2963 to_bb_head[INSN_UID (insn)] = to_head++; 2964 } 2965 2966 bb_size[bb->index] = to_head; 2967 } 2968 2969 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 2970 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest 2971 == ENTRY_BLOCK_PTR->next_bb)); 2972 2973 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, 2974 ENTRY_BLOCK_PTR->next_bb); 2975 2976 /* Walk over each basic block looking for potentially hoistable 2977 expressions, nothing gets hoisted from the entry block. */ 2978 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb) 2979 { 2980 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); 2981 2982 if (VEC_length (basic_block, domby) == 0) 2983 continue; 2984 2985 /* Examine each expression that is very busy at the exit of this 2986 block. These are the potentially hoistable expressions. */ 2987 for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) 2988 { 2989 if (TEST_BIT (hoist_vbeout[bb->index], i)) 2990 { 2991 /* Current expression. */ 2992 struct expr *expr = index_map[i]; 2993 /* Number of occurences of EXPR that can be hoisted to BB. */ 2994 int hoistable = 0; 2995 /* Basic blocks that have occurences reachable from BB. */ 2996 bitmap_head _from_bbs, *from_bbs = &_from_bbs; 2997 /* Occurences reachable from BB. */ 2998 VEC (occr_t, heap) *occrs_to_hoist = NULL; 2999 /* We want to insert the expression into BB only once, so 3000 note when we've inserted it. */ 3001 int insn_inserted_p; 3002 occr_t occr; 3003 3004 bitmap_initialize (from_bbs, 0); 3005 3006 /* If an expression is computed in BB and is available at end of 3007 BB, hoist all occurences dominated by BB to BB. */ 3008 if (TEST_BIT (comp[bb->index], i)) 3009 { 3010 occr = find_occr_in_bb (expr->antic_occr, bb); 3011 3012 if (occr) 3013 { 3014 /* An occurence might've been already deleted 3015 while processing a dominator of BB. */ 3016 if (!occr->deleted_p) 3017 { 3018 gcc_assert (NONDEBUG_INSN_P (occr->insn)); 3019 hoistable++; 3020 } 3021 } 3022 else 3023 hoistable++; 3024 } 3025 3026 /* We've found a potentially hoistable expression, now 3027 we look at every block BB dominates to see if it 3028 computes the expression. */ 3029 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated) 3030 { 3031 int max_distance; 3032 3033 /* Ignore self dominance. */ 3034 if (bb == dominated) 3035 continue; 3036 /* We've found a dominated block, now see if it computes 3037 the busy expression and whether or not moving that 3038 expression to the "beginning" of that block is safe. */ 3039 if (!TEST_BIT (antloc[dominated->index], i)) 3040 continue; 3041 3042 occr = find_occr_in_bb (expr->antic_occr, dominated); 3043 gcc_assert (occr); 3044 3045 /* An occurence might've been already deleted 3046 while processing a dominator of BB. */ 3047 if (occr->deleted_p) 3048 continue; 3049 gcc_assert (NONDEBUG_INSN_P (occr->insn)); 3050 3051 max_distance = expr->max_distance; 3052 if (max_distance > 0) 3053 /* Adjust MAX_DISTANCE to account for the fact that 3054 OCCR won't have to travel all of DOMINATED, but 3055 only part of it. */ 3056 max_distance += (bb_size[dominated->index] 3057 - to_bb_head[INSN_UID (occr->insn)]); 3058 3059 /* Note if the expression would reach the dominated block 3060 unimpared if it was placed at the end of BB. 3061 3062 Keep track of how many times this expression is hoistable 3063 from a dominated block into BB. */ 3064 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, 3065 max_distance, bb_size)) 3066 { 3067 hoistable++; 3068 VEC_safe_push (occr_t, heap, 3069 occrs_to_hoist, occr); 3070 bitmap_set_bit (from_bbs, dominated->index); 3071 } 3072 } 3073 3074 /* If we found more than one hoistable occurrence of this 3075 expression, then note it in the vector of expressions to 3076 hoist. It makes no sense to hoist things which are computed 3077 in only one BB, and doing so tends to pessimize register 3078 allocation. One could increase this value to try harder 3079 to avoid any possible code expansion due to register 3080 allocation issues; however experiments have shown that 3081 the vast majority of hoistable expressions are only movable 3082 from two successors, so raising this threshold is likely 3083 to nullify any benefit we get from code hoisting. */ 3084 if (hoistable > 1 && dbg_cnt (hoist_insn)) 3085 { 3086 /* If (hoistable != VEC_length), then there is 3087 an occurence of EXPR in BB itself. Don't waste 3088 time looking for LCA in this case. */ 3089 if ((unsigned) hoistable 3090 == VEC_length (occr_t, occrs_to_hoist)) 3091 { 3092 basic_block lca; 3093 3094 lca = nearest_common_dominator_for_set (CDI_DOMINATORS, 3095 from_bbs); 3096 if (lca != bb) 3097 /* Punt, it's better to hoist these occurences to 3098 LCA. */ 3099 VEC_free (occr_t, heap, occrs_to_hoist); 3100 } 3101 } 3102 else 3103 /* Punt, no point hoisting a single occurence. */ 3104 VEC_free (occr_t, heap, occrs_to_hoist); 3105 3106 insn_inserted_p = 0; 3107 3108 /* Walk through occurences of I'th expressions we want 3109 to hoist to BB and make the transformations. */ 3110 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr) 3111 { 3112 rtx insn; 3113 rtx set; 3114 3115 gcc_assert (!occr->deleted_p); 3116 3117 insn = occr->insn; 3118 set = single_set (insn); 3119 gcc_assert (set); 3120 3121 /* Create a pseudo-reg to store the result of reaching 3122 expressions into. Get the mode for the new pseudo 3123 from the mode of the original destination pseudo. 3124 3125 It is important to use new pseudos whenever we 3126 emit a set. This will allow reload to use 3127 rematerialization for such registers. */ 3128 if (!insn_inserted_p) 3129 expr->reaching_reg 3130 = gen_reg_rtx_and_attrs (SET_DEST (set)); 3131 3132 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, 3133 insn); 3134 delete_insn (insn); 3135 occr->deleted_p = 1; 3136 changed = 1; 3137 gcse_subst_count++; 3138 3139 if (!insn_inserted_p) 3140 { 3141 insert_insn_end_basic_block (expr, bb); 3142 insn_inserted_p = 1; 3143 } 3144 } 3145 3146 VEC_free (occr_t, heap, occrs_to_hoist); 3147 bitmap_clear (from_bbs); 3148 } 3149 } 3150 VEC_free (basic_block, heap, domby); 3151 } 3152 3153 VEC_free (basic_block, heap, dom_tree_walk); 3154 free (bb_size); 3155 free (to_bb_head); 3156 free (index_map); 3157 3158 return changed; 3159 } 3160 3161 /* Top level routine to perform one code hoisting (aka unification) pass 3162 3163 Return nonzero if a change was made. */ 3164 3165 static int 3166 one_code_hoisting_pass (void) 3167 { 3168 int changed = 0; 3169 3170 gcse_subst_count = 0; 3171 gcse_create_count = 0; 3172 3173 /* Return if there's nothing to do, or it is too expensive. */ 3174 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1 3175 || is_too_expensive (_("GCSE disabled"))) 3176 return 0; 3177 3178 doing_code_hoisting_p = true; 3179 3180 /* We need alias. */ 3181 init_alias_analysis (); 3182 3183 bytes_used = 0; 3184 gcc_obstack_init (&gcse_obstack); 3185 alloc_gcse_mem (); 3186 3187 alloc_hash_table (&expr_hash_table); 3188 compute_hash_table (&expr_hash_table); 3189 if (dump_file) 3190 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table); 3191 3192 if (expr_hash_table.n_elems > 0) 3193 { 3194 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems); 3195 compute_code_hoist_data (); 3196 changed = hoist_code (); 3197 free_code_hoist_mem (); 3198 } 3199 3200 free_hash_table (&expr_hash_table); 3201 free_gcse_mem (); 3202 obstack_free (&gcse_obstack, NULL); 3203 3204 /* We are finished with alias. */ 3205 end_alias_analysis (); 3206 3207 if (dump_file) 3208 { 3209 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ", 3210 current_function_name (), n_basic_blocks, bytes_used); 3211 fprintf (dump_file, "%d substs, %d insns created\n", 3212 gcse_subst_count, gcse_create_count); 3213 } 3214 3215 doing_code_hoisting_p = false; 3216 3217 return changed; 3218 } 3219 3220 /* Here we provide the things required to do store motion towards the exit. 3221 In order for this to be effective, gcse also needed to be taught how to 3222 move a load when it is killed only by a store to itself. 3223 3224 int i; 3225 float a[10]; 3226 3227 void foo(float scale) 3228 { 3229 for (i=0; i<10; i++) 3230 a[i] *= scale; 3231 } 3232 3233 'i' is both loaded and stored to in the loop. Normally, gcse cannot move 3234 the load out since its live around the loop, and stored at the bottom 3235 of the loop. 3236 3237 The 'Load Motion' referred to and implemented in this file is 3238 an enhancement to gcse which when using edge based LCM, recognizes 3239 this situation and allows gcse to move the load out of the loop. 3240 3241 Once gcse has hoisted the load, store motion can then push this 3242 load towards the exit, and we end up with no loads or stores of 'i' 3243 in the loop. */ 3244 3245 static hashval_t 3246 pre_ldst_expr_hash (const void *p) 3247 { 3248 int do_not_record_p = 0; 3249 const struct ls_expr *const x = (const struct ls_expr *) p; 3250 return 3251 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false); 3252 } 3253 3254 static int 3255 pre_ldst_expr_eq (const void *p1, const void *p2) 3256 { 3257 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1, 3258 *const ptr2 = (const struct ls_expr *) p2; 3259 return expr_equiv_p (ptr1->pattern, ptr2->pattern); 3260 } 3261 3262 /* This will search the ldst list for a matching expression. If it 3263 doesn't find one, we create one and initialize it. */ 3264 3265 static struct ls_expr * 3266 ldst_entry (rtx x) 3267 { 3268 int do_not_record_p = 0; 3269 struct ls_expr * ptr; 3270 unsigned int hash; 3271 void **slot; 3272 struct ls_expr e; 3273 3274 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p, 3275 NULL, /*have_reg_qty=*/false); 3276 3277 e.pattern = x; 3278 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT); 3279 if (*slot) 3280 return (struct ls_expr *)*slot; 3281 3282 ptr = XNEW (struct ls_expr); 3283 3284 ptr->next = pre_ldst_mems; 3285 ptr->expr = NULL; 3286 ptr->pattern = x; 3287 ptr->pattern_regs = NULL_RTX; 3288 ptr->loads = NULL_RTX; 3289 ptr->stores = NULL_RTX; 3290 ptr->reaching_reg = NULL_RTX; 3291 ptr->invalid = 0; 3292 ptr->index = 0; 3293 ptr->hash_index = hash; 3294 pre_ldst_mems = ptr; 3295 *slot = ptr; 3296 3297 return ptr; 3298 } 3299 3300 /* Free up an individual ldst entry. */ 3301 3302 static void 3303 free_ldst_entry (struct ls_expr * ptr) 3304 { 3305 free_INSN_LIST_list (& ptr->loads); 3306 free_INSN_LIST_list (& ptr->stores); 3307 3308 free (ptr); 3309 } 3310 3311 /* Free up all memory associated with the ldst list. */ 3312 3313 static void 3314 free_ld_motion_mems (void) 3315 { 3316 if (pre_ldst_table) 3317 htab_delete (pre_ldst_table); 3318 pre_ldst_table = NULL; 3319 3320 while (pre_ldst_mems) 3321 { 3322 struct ls_expr * tmp = pre_ldst_mems; 3323 3324 pre_ldst_mems = pre_ldst_mems->next; 3325 3326 free_ldst_entry (tmp); 3327 } 3328 3329 pre_ldst_mems = NULL; 3330 } 3331 3332 /* Dump debugging info about the ldst list. */ 3333 3334 static void 3335 print_ldst_list (FILE * file) 3336 { 3337 struct ls_expr * ptr; 3338 3339 fprintf (file, "LDST list: \n"); 3340 3341 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) 3342 { 3343 fprintf (file, " Pattern (%3d): ", ptr->index); 3344 3345 print_rtl (file, ptr->pattern); 3346 3347 fprintf (file, "\n Loads : "); 3348 3349 if (ptr->loads) 3350 print_rtl (file, ptr->loads); 3351 else 3352 fprintf (file, "(nil)"); 3353 3354 fprintf (file, "\n Stores : "); 3355 3356 if (ptr->stores) 3357 print_rtl (file, ptr->stores); 3358 else 3359 fprintf (file, "(nil)"); 3360 3361 fprintf (file, "\n\n"); 3362 } 3363 3364 fprintf (file, "\n"); 3365 } 3366 3367 /* Returns 1 if X is in the list of ldst only expressions. */ 3368 3369 static struct ls_expr * 3370 find_rtx_in_ldst (rtx x) 3371 { 3372 struct ls_expr e; 3373 void **slot; 3374 if (!pre_ldst_table) 3375 return NULL; 3376 e.pattern = x; 3377 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT); 3378 if (!slot || ((struct ls_expr *)*slot)->invalid) 3379 return NULL; 3380 return (struct ls_expr *) *slot; 3381 } 3382 3383 /* Load Motion for loads which only kill themselves. */ 3384 3385 /* Return true if x, a MEM, is a simple access with no side effects. 3386 These are the types of loads we consider for the ld_motion list, 3387 otherwise we let the usual aliasing take care of it. */ 3388 3389 static int 3390 simple_mem (const_rtx x) 3391 { 3392 if (MEM_VOLATILE_P (x)) 3393 return 0; 3394 3395 if (GET_MODE (x) == BLKmode) 3396 return 0; 3397 3398 /* If we are handling exceptions, we must be careful with memory references 3399 that may trap. If we are not, the behavior is undefined, so we may just 3400 continue. */ 3401 if (cfun->can_throw_non_call_exceptions && may_trap_p (x)) 3402 return 0; 3403 3404 if (side_effects_p (x)) 3405 return 0; 3406 3407 /* Do not consider function arguments passed on stack. */ 3408 if (reg_mentioned_p (stack_pointer_rtx, x)) 3409 return 0; 3410 3411 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x))) 3412 return 0; 3413 3414 return 1; 3415 } 3416 3417 /* Make sure there isn't a buried reference in this pattern anywhere. 3418 If there is, invalidate the entry for it since we're not capable 3419 of fixing it up just yet.. We have to be sure we know about ALL 3420 loads since the aliasing code will allow all entries in the 3421 ld_motion list to not-alias itself. If we miss a load, we will get 3422 the wrong value since gcse might common it and we won't know to 3423 fix it up. */ 3424 3425 static void 3426 invalidate_any_buried_refs (rtx x) 3427 { 3428 const char * fmt; 3429 int i, j; 3430 struct ls_expr * ptr; 3431 3432 /* Invalidate it in the list. */ 3433 if (MEM_P (x) && simple_mem (x)) 3434 { 3435 ptr = ldst_entry (x); 3436 ptr->invalid = 1; 3437 } 3438 3439 /* Recursively process the insn. */ 3440 fmt = GET_RTX_FORMAT (GET_CODE (x)); 3441 3442 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 3443 { 3444 if (fmt[i] == 'e') 3445 invalidate_any_buried_refs (XEXP (x, i)); 3446 else if (fmt[i] == 'E') 3447 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 3448 invalidate_any_buried_refs (XVECEXP (x, i, j)); 3449 } 3450 } 3451 3452 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple 3453 being defined as MEM loads and stores to symbols, with no side effects 3454 and no registers in the expression. For a MEM destination, we also 3455 check that the insn is still valid if we replace the destination with a 3456 REG, as is done in update_ld_motion_stores. If there are any uses/defs 3457 which don't match this criteria, they are invalidated and trimmed out 3458 later. */ 3459 3460 static void 3461 compute_ld_motion_mems (void) 3462 { 3463 struct ls_expr * ptr; 3464 basic_block bb; 3465 rtx insn; 3466 3467 pre_ldst_mems = NULL; 3468 pre_ldst_table 3469 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL); 3470 3471 FOR_EACH_BB (bb) 3472 { 3473 FOR_BB_INSNS (bb, insn) 3474 { 3475 if (NONDEBUG_INSN_P (insn)) 3476 { 3477 if (GET_CODE (PATTERN (insn)) == SET) 3478 { 3479 rtx src = SET_SRC (PATTERN (insn)); 3480 rtx dest = SET_DEST (PATTERN (insn)); 3481 3482 /* Check for a simple LOAD... */ 3483 if (MEM_P (src) && simple_mem (src)) 3484 { 3485 ptr = ldst_entry (src); 3486 if (REG_P (dest)) 3487 ptr->loads = alloc_INSN_LIST (insn, ptr->loads); 3488 else 3489 ptr->invalid = 1; 3490 } 3491 else 3492 { 3493 /* Make sure there isn't a buried load somewhere. */ 3494 invalidate_any_buried_refs (src); 3495 } 3496 3497 /* Check for stores. Don't worry about aliased ones, they 3498 will block any movement we might do later. We only care 3499 about this exact pattern since those are the only 3500 circumstance that we will ignore the aliasing info. */ 3501 if (MEM_P (dest) && simple_mem (dest)) 3502 { 3503 ptr = ldst_entry (dest); 3504 3505 if (! MEM_P (src) 3506 && GET_CODE (src) != ASM_OPERANDS 3507 /* Check for REG manually since want_to_gcse_p 3508 returns 0 for all REGs. */ 3509 && can_assign_to_reg_without_clobbers_p (src)) 3510 ptr->stores = alloc_INSN_LIST (insn, ptr->stores); 3511 else 3512 ptr->invalid = 1; 3513 } 3514 } 3515 else 3516 invalidate_any_buried_refs (PATTERN (insn)); 3517 } 3518 } 3519 } 3520 } 3521 3522 /* Remove any references that have been either invalidated or are not in the 3523 expression list for pre gcse. */ 3524 3525 static void 3526 trim_ld_motion_mems (void) 3527 { 3528 struct ls_expr * * last = & pre_ldst_mems; 3529 struct ls_expr * ptr = pre_ldst_mems; 3530 3531 while (ptr != NULL) 3532 { 3533 struct expr * expr; 3534 3535 /* Delete if entry has been made invalid. */ 3536 if (! ptr->invalid) 3537 { 3538 /* Delete if we cannot find this mem in the expression list. */ 3539 unsigned int hash = ptr->hash_index % expr_hash_table.size; 3540 3541 for (expr = expr_hash_table.table[hash]; 3542 expr != NULL; 3543 expr = expr->next_same_hash) 3544 if (expr_equiv_p (expr->expr, ptr->pattern)) 3545 break; 3546 } 3547 else 3548 expr = (struct expr *) 0; 3549 3550 if (expr) 3551 { 3552 /* Set the expression field if we are keeping it. */ 3553 ptr->expr = expr; 3554 last = & ptr->next; 3555 ptr = ptr->next; 3556 } 3557 else 3558 { 3559 *last = ptr->next; 3560 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index); 3561 free_ldst_entry (ptr); 3562 ptr = * last; 3563 } 3564 } 3565 3566 /* Show the world what we've found. */ 3567 if (dump_file && pre_ldst_mems != NULL) 3568 print_ldst_list (dump_file); 3569 } 3570 3571 /* This routine will take an expression which we are replacing with 3572 a reaching register, and update any stores that are needed if 3573 that expression is in the ld_motion list. Stores are updated by 3574 copying their SRC to the reaching register, and then storing 3575 the reaching register into the store location. These keeps the 3576 correct value in the reaching register for the loads. */ 3577 3578 static void 3579 update_ld_motion_stores (struct expr * expr) 3580 { 3581 struct ls_expr * mem_ptr; 3582 3583 if ((mem_ptr = find_rtx_in_ldst (expr->expr))) 3584 { 3585 /* We can try to find just the REACHED stores, but is shouldn't 3586 matter to set the reaching reg everywhere... some might be 3587 dead and should be eliminated later. */ 3588 3589 /* We replace (set mem expr) with (set reg expr) (set mem reg) 3590 where reg is the reaching reg used in the load. We checked in 3591 compute_ld_motion_mems that we can replace (set mem expr) with 3592 (set reg expr) in that insn. */ 3593 rtx list = mem_ptr->stores; 3594 3595 for ( ; list != NULL_RTX; list = XEXP (list, 1)) 3596 { 3597 rtx insn = XEXP (list, 0); 3598 rtx pat = PATTERN (insn); 3599 rtx src = SET_SRC (pat); 3600 rtx reg = expr->reaching_reg; 3601 rtx copy; 3602 3603 /* If we've already copied it, continue. */ 3604 if (expr->reaching_reg == src) 3605 continue; 3606 3607 if (dump_file) 3608 { 3609 fprintf (dump_file, "PRE: store updated with reaching reg "); 3610 print_rtl (dump_file, reg); 3611 fprintf (dump_file, ":\n "); 3612 print_inline_rtx (dump_file, insn, 8); 3613 fprintf (dump_file, "\n"); 3614 } 3615 3616 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat))); 3617 emit_insn_before (copy, insn); 3618 SET_SRC (pat) = reg; 3619 df_insn_rescan (insn); 3620 3621 /* un-recognize this pattern since it's probably different now. */ 3622 INSN_CODE (insn) = -1; 3623 gcse_create_count++; 3624 } 3625 } 3626 } 3627 3628 /* Return true if the graph is too expensive to optimize. PASS is the 3629 optimization about to be performed. */ 3630 3631 static bool 3632 is_too_expensive (const char *pass) 3633 { 3634 /* Trying to perform global optimizations on flow graphs which have 3635 a high connectivity will take a long time and is unlikely to be 3636 particularly useful. 3637 3638 In normal circumstances a cfg should have about twice as many 3639 edges as blocks. But we do not want to punish small functions 3640 which have a couple switch statements. Rather than simply 3641 threshold the number of blocks, uses something with a more 3642 graceful degradation. */ 3643 if (n_edges > 20000 + n_basic_blocks * 4) 3644 { 3645 warning (OPT_Wdisabled_optimization, 3646 "%s: %d basic blocks and %d edges/basic block", 3647 pass, n_basic_blocks, n_edges / n_basic_blocks); 3648 3649 return true; 3650 } 3651 3652 /* If allocating memory for the dataflow bitmaps would take up too much 3653 storage it's better just to disable the optimization. */ 3654 if ((n_basic_blocks 3655 * SBITMAP_SET_SIZE (max_reg_num ()) 3656 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY) 3657 { 3658 warning (OPT_Wdisabled_optimization, 3659 "%s: %d basic blocks and %d registers", 3660 pass, n_basic_blocks, max_reg_num ()); 3661 3662 return true; 3663 } 3664 3665 return false; 3666 } 3667 3668 /* All the passes implemented in this file. Each pass has its 3669 own gate and execute function, and at the end of the file a 3670 pass definition for passes.c. 3671 3672 We do not construct an accurate cfg in functions which call 3673 setjmp, so none of these passes runs if the function calls 3674 setjmp. 3675 FIXME: Should just handle setjmp via REG_SETJMP notes. */ 3676 3677 static bool 3678 gate_rtl_pre (void) 3679 { 3680 return optimize > 0 && flag_gcse 3681 && !cfun->calls_setjmp 3682 && optimize_function_for_speed_p (cfun) 3683 && dbg_cnt (pre); 3684 } 3685 3686 static unsigned int 3687 execute_rtl_pre (void) 3688 { 3689 int changed; 3690 delete_unreachable_blocks (); 3691 df_analyze (); 3692 changed = one_pre_gcse_pass (); 3693 flag_rerun_cse_after_global_opts |= changed; 3694 if (changed) 3695 cleanup_cfg (0); 3696 return 0; 3697 } 3698 3699 static bool 3700 gate_rtl_hoist (void) 3701 { 3702 return optimize > 0 && flag_gcse 3703 && !cfun->calls_setjmp 3704 /* It does not make sense to run code hoisting unless we are optimizing 3705 for code size -- it rarely makes programs faster, and can make then 3706 bigger if we did PRE (when optimizing for space, we don't run PRE). */ 3707 && optimize_function_for_size_p (cfun) 3708 && dbg_cnt (hoist); 3709 } 3710 3711 static unsigned int 3712 execute_rtl_hoist (void) 3713 { 3714 int changed; 3715 delete_unreachable_blocks (); 3716 df_analyze (); 3717 changed = one_code_hoisting_pass (); 3718 flag_rerun_cse_after_global_opts |= changed; 3719 if (changed) 3720 cleanup_cfg (0); 3721 return 0; 3722 } 3723 3724 struct rtl_opt_pass pass_rtl_pre = 3725 { 3726 { 3727 RTL_PASS, 3728 "rtl pre", /* name */ 3729 gate_rtl_pre, /* gate */ 3730 execute_rtl_pre, /* execute */ 3731 NULL, /* sub */ 3732 NULL, /* next */ 3733 0, /* static_pass_number */ 3734 TV_PRE, /* tv_id */ 3735 PROP_cfglayout, /* properties_required */ 3736 0, /* properties_provided */ 3737 0, /* properties_destroyed */ 3738 0, /* todo_flags_start */ 3739 TODO_df_finish | TODO_verify_rtl_sharing | 3740 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ 3741 } 3742 }; 3743 3744 struct rtl_opt_pass pass_rtl_hoist = 3745 { 3746 { 3747 RTL_PASS, 3748 "hoist", /* name */ 3749 gate_rtl_hoist, /* gate */ 3750 execute_rtl_hoist, /* execute */ 3751 NULL, /* sub */ 3752 NULL, /* next */ 3753 0, /* static_pass_number */ 3754 TV_HOIST, /* tv_id */ 3755 PROP_cfglayout, /* properties_required */ 3756 0, /* properties_provided */ 3757 0, /* properties_destroyed */ 3758 0, /* todo_flags_start */ 3759 TODO_df_finish | TODO_verify_rtl_sharing | 3760 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */ 3761 } 3762 }; 3763 3764 #include "gt-gcse.h" 3765