1 /* Vectorizer 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* Loop and basic block vectorizer. 23 24 This file contains drivers for the three vectorizers: 25 (1) loop vectorizer (inter-iteration parallelism), 26 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop 27 vectorizer) 28 (3) BB vectorizer (out-of-loops), aka SLP 29 30 The rest of the vectorizer's code is organized as follows: 31 - tree-vect-loop.c - loop specific parts such as reductions, etc. These are 32 used by drivers (1) and (2). 33 - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by 34 drivers (1) and (2). 35 - tree-vect-slp.c - BB vectorization specific analysis and transformation, 36 used by drivers (2) and (3). 37 - tree-vect-stmts.c - statements analysis and transformation (used by all). 38 - tree-vect-data-refs.c - vectorizer specific data-refs analysis and 39 manipulations (used by all). 40 - tree-vect-patterns.c - vectorizable code patterns detector (used by all) 41 42 Here's a poor attempt at illustrating that: 43 44 tree-vectorizer.c: 45 loop_vect() loop_aware_slp() slp_vect() 46 | / \ / 47 | / \ / 48 tree-vect-loop.c tree-vect-slp.c 49 | \ \ / / | 50 | \ \/ / | 51 | \ /\ / | 52 | \ / \ / | 53 tree-vect-stmts.c tree-vect-data-refs.c 54 \ / 55 tree-vect-patterns.c 56 */ 57 58 #include "config.h" 59 #include "system.h" 60 #include "coretypes.h" 61 #include "tm.h" 62 #include "ggc.h" 63 #include "tree.h" 64 #include "tree-pretty-print.h" 65 #include "tree-flow.h" 66 #include "tree-dump.h" 67 #include "cfgloop.h" 68 #include "cfglayout.h" 69 #include "tree-vectorizer.h" 70 #include "tree-pass.h" 71 #include "timevar.h" 72 73 /* vect_dump will be set to stderr or dump_file if exist. */ 74 FILE *vect_dump; 75 76 /* vect_verbosity_level set to an invalid value 77 to mark that it's uninitialized. */ 78 static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; 79 80 /* Loop or bb location. */ 81 LOC vect_location; 82 83 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ 84 VEC(vec_void_p,heap) *stmt_vec_info_vec; 85 86 87 88 /* Function vect_set_dump_settings. 89 90 Fix the verbosity level of the vectorizer if the 91 requested level was not set explicitly using the flag 92 -ftree-vectorizer-verbose=N. 93 Decide where to print the debugging information (dump_file/stderr). 94 If the user defined the verbosity level, but there is no dump file, 95 print to stderr, otherwise print to the dump file. */ 96 97 static void 98 vect_set_dump_settings (bool slp) 99 { 100 vect_dump = dump_file; 101 102 /* Check if the verbosity level was defined by the user: */ 103 if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL) 104 { 105 vect_verbosity_level = user_vect_verbosity_level; 106 /* Ignore user defined verbosity if dump flags require higher level of 107 verbosity. */ 108 if (dump_file) 109 { 110 if (((dump_flags & TDF_DETAILS) 111 && vect_verbosity_level >= REPORT_DETAILS) 112 || ((dump_flags & TDF_STATS) 113 && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS)) 114 return; 115 } 116 else 117 { 118 /* If there is no dump file, print to stderr in case of loop 119 vectorization. */ 120 if (!slp) 121 vect_dump = stderr; 122 123 return; 124 } 125 } 126 127 /* User didn't specify verbosity level: */ 128 if (dump_file && (dump_flags & TDF_DETAILS)) 129 vect_verbosity_level = REPORT_DETAILS; 130 else if (dump_file && (dump_flags & TDF_STATS)) 131 vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS; 132 else 133 vect_verbosity_level = REPORT_NONE; 134 135 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); 136 } 137 138 139 /* Function debug_loop_details. 140 141 For vectorization debug dumps. */ 142 143 bool 144 vect_print_dump_info (enum vect_verbosity_levels vl) 145 { 146 if (vl > vect_verbosity_level) 147 return false; 148 149 if (!current_function_decl || !vect_dump) 150 return false; 151 152 if (vect_location == UNKNOWN_LOC) 153 fprintf (vect_dump, "\n%s:%d: note: ", 154 DECL_SOURCE_FILE (current_function_decl), 155 DECL_SOURCE_LINE (current_function_decl)); 156 else 157 fprintf (vect_dump, "\n%d: ", LOC_LINE (vect_location)); 158 159 return true; 160 } 161 162 163 /* Function vectorize_loops. 164 165 Entry point to loop vectorization phase. */ 166 167 unsigned 168 vectorize_loops (void) 169 { 170 unsigned int i; 171 unsigned int num_vectorized_loops = 0; 172 unsigned int vect_loops_num; 173 loop_iterator li; 174 struct loop *loop; 175 176 vect_loops_num = number_of_loops (); 177 178 /* Bail out if there are no loops. */ 179 if (vect_loops_num <= 1) 180 return 0; 181 182 /* Fix the verbosity level if not defined explicitly by the user. */ 183 vect_set_dump_settings (false); 184 185 init_stmt_vec_info_vec (); 186 187 /* ----------- Analyze loops. ----------- */ 188 189 /* If some loop was duplicated, it gets bigger number 190 than all previously defined loops. This fact allows us to run 191 only over initial loops skipping newly generated ones. */ 192 FOR_EACH_LOOP (li, loop, 0) 193 if (optimize_loop_nest_for_speed_p (loop)) 194 { 195 loop_vec_info loop_vinfo; 196 197 vect_location = find_loop_location (loop); 198 if (vect_location != UNKNOWN_LOC 199 && vect_verbosity_level > REPORT_NONE) 200 fprintf (vect_dump, "\nAnalyzing loop at %s:%d\n", 201 LOC_FILE (vect_location), LOC_LINE (vect_location)); 202 203 loop_vinfo = vect_analyze_loop (loop); 204 loop->aux = loop_vinfo; 205 206 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) 207 continue; 208 209 if (vect_location != UNKNOWN_LOC 210 && vect_verbosity_level > REPORT_NONE) 211 fprintf (vect_dump, "\n\nVectorizing loop at %s:%d\n", 212 LOC_FILE (vect_location), LOC_LINE (vect_location)); 213 214 vect_transform_loop (loop_vinfo); 215 num_vectorized_loops++; 216 } 217 218 vect_location = UNKNOWN_LOC; 219 220 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); 221 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS) 222 || (num_vectorized_loops > 0 223 && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))) 224 fprintf (vect_dump, "vectorized %u loops in function.\n", 225 num_vectorized_loops); 226 227 /* ----------- Finalize. ----------- */ 228 229 mark_sym_for_renaming (gimple_vop (cfun)); 230 231 for (i = 1; i < vect_loops_num; i++) 232 { 233 loop_vec_info loop_vinfo; 234 235 loop = get_loop (i); 236 if (!loop) 237 continue; 238 loop_vinfo = (loop_vec_info) loop->aux; 239 destroy_loop_vec_info (loop_vinfo, true); 240 loop->aux = NULL; 241 } 242 243 free_stmt_vec_info_vec (); 244 245 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; 246 } 247 248 249 /* Entry point to basic block SLP phase. */ 250 251 static unsigned int 252 execute_vect_slp (void) 253 { 254 basic_block bb; 255 256 /* Fix the verbosity level if not defined explicitly by the user. */ 257 vect_set_dump_settings (true); 258 259 init_stmt_vec_info_vec (); 260 261 FOR_EACH_BB (bb) 262 { 263 vect_location = find_bb_location (bb); 264 265 if (vect_slp_analyze_bb (bb)) 266 { 267 vect_slp_transform_bb (bb); 268 269 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) 270 fprintf (vect_dump, "basic block vectorized using SLP\n"); 271 } 272 } 273 274 free_stmt_vec_info_vec (); 275 return 0; 276 } 277 278 static bool 279 gate_vect_slp (void) 280 { 281 /* Apply SLP either if the vectorizer is on and the user didn't specify 282 whether to run SLP or not, or if the SLP flag was set by the user. */ 283 return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) 284 || flag_tree_slp_vectorize == 1); 285 } 286 287 struct gimple_opt_pass pass_slp_vectorize = 288 { 289 { 290 GIMPLE_PASS, 291 "slp", /* name */ 292 gate_vect_slp, /* gate */ 293 execute_vect_slp, /* execute */ 294 NULL, /* sub */ 295 NULL, /* next */ 296 0, /* static_pass_number */ 297 TV_TREE_SLP_VECTORIZATION, /* tv_id */ 298 PROP_ssa | PROP_cfg, /* properties_required */ 299 0, /* properties_provided */ 300 0, /* properties_destroyed */ 301 0, /* todo_flags_start */ 302 TODO_ggc_collect 303 | TODO_verify_ssa 304 | TODO_update_ssa 305 | TODO_verify_stmts /* todo_flags_finish */ 306 } 307 }; 308 309 310 /* Increase alignment of global arrays to improve vectorization potential. 311 TODO: 312 - Consider also structs that have an array field. 313 - Use ipa analysis to prune arrays that can't be vectorized? 314 This should involve global alignment analysis and in the future also 315 array padding. */ 316 317 static unsigned int 318 increase_alignment (void) 319 { 320 struct varpool_node *vnode; 321 322 /* Increase the alignment of all global arrays for vectorization. */ 323 for (vnode = varpool_nodes_queue; 324 vnode; 325 vnode = vnode->next_needed) 326 { 327 tree vectype, decl = vnode->decl; 328 tree t; 329 unsigned int alignment; 330 331 t = TREE_TYPE(decl); 332 if (TREE_CODE (t) != ARRAY_TYPE) 333 continue; 334 vectype = get_vectype_for_scalar_type (strip_array_types (t)); 335 if (!vectype) 336 continue; 337 alignment = TYPE_ALIGN (vectype); 338 if (DECL_ALIGN (decl) >= alignment) 339 continue; 340 341 if (vect_can_force_dr_alignment_p (decl, alignment)) 342 { 343 DECL_ALIGN (decl) = TYPE_ALIGN (vectype); 344 DECL_USER_ALIGN (decl) = 1; 345 if (dump_file) 346 { 347 fprintf (dump_file, "Increasing alignment of decl: "); 348 print_generic_expr (dump_file, decl, TDF_SLIM); 349 fprintf (dump_file, "\n"); 350 } 351 } 352 } 353 return 0; 354 } 355 356 357 static bool 358 gate_increase_alignment (void) 359 { 360 return flag_section_anchors && flag_tree_vectorize; 361 } 362 363 364 struct simple_ipa_opt_pass pass_ipa_increase_alignment = 365 { 366 { 367 SIMPLE_IPA_PASS, 368 "increase_alignment", /* name */ 369 gate_increase_alignment, /* gate */ 370 increase_alignment, /* execute */ 371 NULL, /* sub */ 372 NULL, /* next */ 373 0, /* static_pass_number */ 374 TV_IPA_OPT, /* tv_id */ 375 0, /* properties_required */ 376 0, /* properties_provided */ 377 0, /* properties_destroyed */ 378 0, /* todo_flags_start */ 379 0 /* todo_flags_finish */ 380 } 381 }; 382