1 /* The tracer pass for the GNU compiler. 2 Contributed by Jan Hubicka, SuSE Labs. 3 Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. 4 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 5 Free Software Foundation, Inc. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 17 License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 /* This pass performs the tail duplication needed for superblock formation. 24 For more information see: 25 26 Design and Analysis of Profile-Based Optimization in Compaq's 27 Compilation Tools for Alpha; Journal of Instruction-Level 28 Parallelism 3 (2000) 1-25 29 30 Unlike Compaq's implementation we don't do the loop peeling as most 31 probably a better job can be done by a special pass and we don't 32 need to worry too much about the code size implications as the tail 33 duplicates are crossjumped again if optimizations are not 34 performed. */ 35 36 37 #include "config.h" 38 #include "system.h" 39 #include "coretypes.h" 40 #include "tm.h" 41 #include "tree.h" 42 #include "rtl.h" 43 #include "hard-reg-set.h" 44 #include "basic-block.h" 45 #include "output.h" 46 #include "cfglayout.h" 47 #include "fibheap.h" 48 #include "flags.h" 49 #include "timevar.h" 50 #include "params.h" 51 #include "coverage.h" 52 #include "tree-pass.h" 53 #include "tree-flow.h" 54 #include "tree-inline.h" 55 56 static int count_insns (basic_block); 57 static bool ignore_bb_p (const_basic_block); 58 static bool better_p (const_edge, const_edge); 59 static edge find_best_successor (basic_block); 60 static edge find_best_predecessor (basic_block); 61 static int find_trace (basic_block, basic_block *); 62 static void tail_duplicate (void); 63 64 /* Minimal outgoing edge probability considered for superblock formation. */ 65 static int probability_cutoff; 66 static int branch_ratio_cutoff; 67 68 /* A bit BB->index is set if BB has already been seen, i.e. it is 69 connected to some trace already. */ 70 sbitmap bb_seen; 71 72 static inline void 73 mark_bb_seen (basic_block bb) 74 { 75 unsigned int size = SBITMAP_SIZE_BYTES (bb_seen) * 8; 76 77 if ((unsigned int)bb->index >= size) 78 bb_seen = sbitmap_resize (bb_seen, size * 2, 0); 79 80 SET_BIT (bb_seen, bb->index); 81 } 82 83 static inline bool 84 bb_seen_p (basic_block bb) 85 { 86 return TEST_BIT (bb_seen, bb->index); 87 } 88 89 /* Return true if we should ignore the basic block for purposes of tracing. */ 90 static bool 91 ignore_bb_p (const_basic_block bb) 92 { 93 gimple g; 94 95 if (bb->index < NUM_FIXED_BLOCKS) 96 return true; 97 if (optimize_bb_for_size_p (bb)) 98 return true; 99 100 /* A transaction is a single entry multiple exit region. It must be 101 duplicated in its entirety or not at all. */ 102 g = last_stmt (CONST_CAST_BB (bb)); 103 if (g && gimple_code (g) == GIMPLE_TRANSACTION) 104 return true; 105 106 return false; 107 } 108 109 /* Return number of instructions in the block. */ 110 111 static int 112 count_insns (basic_block bb) 113 { 114 gimple_stmt_iterator gsi; 115 gimple stmt; 116 int n = 0; 117 118 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 119 { 120 stmt = gsi_stmt (gsi); 121 n += estimate_num_insns (stmt, &eni_size_weights); 122 } 123 return n; 124 } 125 126 /* Return true if E1 is more frequent than E2. */ 127 static bool 128 better_p (const_edge e1, const_edge e2) 129 { 130 if (e1->count != e2->count) 131 return e1->count > e2->count; 132 if (e1->src->frequency * e1->probability != 133 e2->src->frequency * e2->probability) 134 return (e1->src->frequency * e1->probability 135 > e2->src->frequency * e2->probability); 136 /* This is needed to avoid changes in the decision after 137 CFG is modified. */ 138 if (e1->src != e2->src) 139 return e1->src->index > e2->src->index; 140 return e1->dest->index > e2->dest->index; 141 } 142 143 /* Return most frequent successor of basic block BB. */ 144 145 static edge 146 find_best_successor (basic_block bb) 147 { 148 edge e; 149 edge best = NULL; 150 edge_iterator ei; 151 152 FOR_EACH_EDGE (e, ei, bb->succs) 153 if (!best || better_p (e, best)) 154 best = e; 155 if (!best || ignore_bb_p (best->dest)) 156 return NULL; 157 if (best->probability <= probability_cutoff) 158 return NULL; 159 return best; 160 } 161 162 /* Return most frequent predecessor of basic block BB. */ 163 164 static edge 165 find_best_predecessor (basic_block bb) 166 { 167 edge e; 168 edge best = NULL; 169 edge_iterator ei; 170 171 FOR_EACH_EDGE (e, ei, bb->preds) 172 if (!best || better_p (e, best)) 173 best = e; 174 if (!best || ignore_bb_p (best->src)) 175 return NULL; 176 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE 177 < bb->frequency * branch_ratio_cutoff) 178 return NULL; 179 return best; 180 } 181 182 /* Find the trace using bb and record it in the TRACE array. 183 Return number of basic blocks recorded. */ 184 185 static int 186 find_trace (basic_block bb, basic_block *trace) 187 { 188 int i = 0; 189 edge e; 190 191 if (dump_file) 192 fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); 193 194 while ((e = find_best_predecessor (bb)) != NULL) 195 { 196 basic_block bb2 = e->src; 197 if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 198 || find_best_successor (bb2) != e) 199 break; 200 if (dump_file) 201 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 202 bb = bb2; 203 } 204 if (dump_file) 205 fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); 206 trace[i++] = bb; 207 208 /* Follow the trace in forward direction. */ 209 while ((e = find_best_successor (bb)) != NULL) 210 { 211 bb = e->dest; 212 if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 213 || find_best_predecessor (bb) != e) 214 break; 215 if (dump_file) 216 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 217 trace[i++] = bb; 218 } 219 if (dump_file) 220 fprintf (dump_file, "\n"); 221 return i; 222 } 223 224 /* Look for basic blocks in frequency order, construct traces and tail duplicate 225 if profitable. */ 226 227 static void 228 tail_duplicate (void) 229 { 230 fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); 231 basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); 232 int *counts = XNEWVEC (int, last_basic_block); 233 int ninsns = 0, nduplicated = 0; 234 gcov_type weighted_insns = 0, traced_insns = 0; 235 fibheap_t heap = fibheap_new (); 236 gcov_type cover_insns; 237 int max_dup_insns; 238 basic_block bb; 239 240 /* Create an oversized sbitmap to reduce the chance that we need to 241 resize it. */ 242 bb_seen = sbitmap_alloc (last_basic_block * 2); 243 sbitmap_zero (bb_seen); 244 initialize_original_copy_tables (); 245 246 if (profile_info && flag_branch_probabilities) 247 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); 248 else 249 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); 250 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 251 252 branch_ratio_cutoff = 253 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); 254 255 FOR_EACH_BB (bb) 256 { 257 int n = count_insns (bb); 258 if (!ignore_bb_p (bb)) 259 blocks[bb->index] = fibheap_insert (heap, -bb->frequency, 260 bb); 261 262 counts [bb->index] = n; 263 ninsns += n; 264 weighted_insns += n * bb->frequency; 265 } 266 267 if (profile_info && flag_branch_probabilities) 268 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); 269 else 270 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); 271 cover_insns = (weighted_insns * cover_insns + 50) / 100; 272 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; 273 274 while (traced_insns < cover_insns && nduplicated < max_dup_insns 275 && !fibheap_empty (heap)) 276 { 277 basic_block bb = (basic_block) fibheap_extract_min (heap); 278 int n, pos; 279 280 if (!bb) 281 break; 282 283 blocks[bb->index] = NULL; 284 285 if (ignore_bb_p (bb)) 286 continue; 287 gcc_assert (!bb_seen_p (bb)); 288 289 n = find_trace (bb, trace); 290 291 bb = trace[0]; 292 traced_insns += bb->frequency * counts [bb->index]; 293 if (blocks[bb->index]) 294 { 295 fibheap_delete_node (heap, blocks[bb->index]); 296 blocks[bb->index] = NULL; 297 } 298 299 for (pos = 1; pos < n; pos++) 300 { 301 basic_block bb2 = trace[pos]; 302 303 if (blocks[bb2->index]) 304 { 305 fibheap_delete_node (heap, blocks[bb2->index]); 306 blocks[bb2->index] = NULL; 307 } 308 traced_insns += bb2->frequency * counts [bb2->index]; 309 if (EDGE_COUNT (bb2->preds) > 1 310 && can_duplicate_block_p (bb2)) 311 { 312 edge e; 313 basic_block copy; 314 315 nduplicated += counts [bb2->index]; 316 317 e = find_edge (bb, bb2); 318 319 copy = duplicate_block (bb2, e, bb); 320 flush_pending_stmts (e); 321 322 add_phi_args_after_copy (©, 1, NULL); 323 324 /* Reconsider the original copy of block we've duplicated. 325 Removing the most common predecessor may make it to be 326 head. */ 327 blocks[bb2->index] = 328 fibheap_insert (heap, -bb2->frequency, bb2); 329 330 if (dump_file) 331 fprintf (dump_file, "Duplicated %i as %i [%i]\n", 332 bb2->index, copy->index, copy->frequency); 333 334 bb2 = copy; 335 } 336 mark_bb_seen (bb2); 337 bb = bb2; 338 /* In case the trace became infrequent, stop duplicating. */ 339 if (ignore_bb_p (bb)) 340 break; 341 } 342 if (dump_file) 343 fprintf (dump_file, " covered now %.1f\n\n", 344 traced_insns * 100.0 / weighted_insns); 345 } 346 if (dump_file) 347 fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 348 nduplicated * 100 / ninsns); 349 350 free_original_copy_tables (); 351 sbitmap_free (bb_seen); 352 free (blocks); 353 free (trace); 354 free (counts); 355 fibheap_delete (heap); 356 } 357 358 /* Main entry point to this file. */ 359 360 static unsigned int 361 tracer (void) 362 { 363 gcc_assert (current_ir_type () == IR_GIMPLE); 364 365 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 366 return 0; 367 368 mark_dfs_back_edges (); 369 if (dump_file) 370 dump_flow_info (dump_file, dump_flags); 371 372 /* Trace formation is done on the fly inside tail_duplicate */ 373 tail_duplicate (); 374 375 /* FIXME: We really only need to do this when we know tail duplication 376 has altered the CFG. */ 377 free_dominance_info (CDI_DOMINATORS); 378 if (dump_file) 379 dump_flow_info (dump_file, dump_flags); 380 381 return 0; 382 } 383 384 static bool 385 gate_tracer (void) 386 { 387 return (optimize > 0 && flag_tracer && flag_reorder_blocks); 388 } 389 390 struct gimple_opt_pass pass_tracer = 391 { 392 { 393 GIMPLE_PASS, 394 "tracer", /* name */ 395 gate_tracer, /* gate */ 396 tracer, /* execute */ 397 NULL, /* sub */ 398 NULL, /* next */ 399 0, /* static_pass_number */ 400 TV_TRACER, /* tv_id */ 401 0, /* properties_required */ 402 0, /* properties_provided */ 403 0, /* properties_destroyed */ 404 0, /* todo_flags_start */ 405 TODO_update_ssa 406 | TODO_verify_ssa /* todo_flags_finish */ 407 } 408 }; 409