1 /* Data References Analysis and Manipulation Utilities for Vectorization. 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "ggc.h" 28 #include "tree.h" 29 #include "tm_p.h" 30 #include "target.h" 31 #include "basic-block.h" 32 #include "tree-pretty-print.h" 33 #include "gimple-pretty-print.h" 34 #include "tree-flow.h" 35 #include "tree-dump.h" 36 #include "cfgloop.h" 37 #include "tree-chrec.h" 38 #include "tree-scalar-evolution.h" 39 #include "tree-vectorizer.h" 40 #include "diagnostic-core.h" 41 42 /* Need to include rtl.h, expr.h, etc. for optabs. */ 43 #include "expr.h" 44 #include "optabs.h" 45 46 /* Return true if load- or store-lanes optab OPTAB is implemented for 47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ 48 49 static bool 50 vect_lanes_optab_supported_p (const char *name, convert_optab optab, 51 tree vectype, unsigned HOST_WIDE_INT count) 52 { 53 enum machine_mode mode, array_mode; 54 bool limit_p; 55 56 mode = TYPE_MODE (vectype); 57 limit_p = !targetm.array_mode_supported_p (mode, count); 58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode), 59 MODE_INT, limit_p); 60 61 if (array_mode == BLKmode) 62 { 63 if (vect_print_dump_info (REPORT_DETAILS)) 64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]", 65 GET_MODE_NAME (mode), count); 66 return false; 67 } 68 69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) 70 { 71 if (vect_print_dump_info (REPORT_DETAILS)) 72 fprintf (vect_dump, "cannot use %s<%s><%s>", 73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); 74 return false; 75 } 76 77 if (vect_print_dump_info (REPORT_DETAILS)) 78 fprintf (vect_dump, "can use %s<%s><%s>", 79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); 80 81 return true; 82 } 83 84 85 /* Return the smallest scalar part of STMT. 86 This is used to determine the vectype of the stmt. We generally set the 87 vectype according to the type of the result (lhs). For stmts whose 88 result-type is different than the type of the arguments (e.g., demotion, 89 promotion), vectype will be reset appropriately (later). Note that we have 90 to visit the smallest datatype in this function, because that determines the 91 VF. If the smallest datatype in the loop is present only as the rhs of a 92 promotion operation - we'd miss it. 93 Such a case, where a variable of this datatype does not appear in the lhs 94 anywhere in the loop, can only occur if it's an invariant: e.g.: 95 'int_x = (int) short_inv', which we'd expect to have been optimized away by 96 invariant motion. However, we cannot rely on invariant motion to always 97 take invariants out of the loop, and so in the case of promotion we also 98 have to check the rhs. 99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding 100 types. */ 101 102 tree 103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit, 104 HOST_WIDE_INT *rhs_size_unit) 105 { 106 tree scalar_type = gimple_expr_type (stmt); 107 HOST_WIDE_INT lhs, rhs; 108 109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); 110 111 if (is_gimple_assign (stmt) 112 && (gimple_assign_cast_p (stmt) 113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR 114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR)) 115 { 116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); 117 118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); 119 if (rhs < lhs) 120 scalar_type = rhs_type; 121 } 122 123 *lhs_size_unit = lhs; 124 *rhs_size_unit = rhs; 125 return scalar_type; 126 } 127 128 129 /* Find the place of the data-ref in STMT in the interleaving chain that starts 130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ 131 132 int 133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt) 134 { 135 gimple next_stmt = first_stmt; 136 int result = 0; 137 138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 139 return -1; 140 141 while (next_stmt && next_stmt != stmt) 142 { 143 result++; 144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 145 } 146 147 if (next_stmt) 148 return result; 149 else 150 return -1; 151 } 152 153 154 /* Function vect_insert_into_interleaving_chain. 155 156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */ 157 158 static void 159 vect_insert_into_interleaving_chain (struct data_reference *dra, 160 struct data_reference *drb) 161 { 162 gimple prev, next; 163 tree next_init; 164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 166 167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b); 168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)); 169 while (next) 170 { 171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0) 173 { 174 /* Insert here. */ 175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra); 176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next; 177 return; 178 } 179 prev = next; 180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)); 181 } 182 183 /* We got to the end of the list. Insert here. */ 184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra); 185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL; 186 } 187 188 189 /* Function vect_update_interleaving_chain. 190 191 For two data-refs DRA and DRB that are a part of a chain interleaved data 192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's. 193 194 There are four possible cases: 195 1. New stmts - both DRA and DRB are not a part of any chain: 196 FIRST_DR = DRB 197 NEXT_DR (DRB) = DRA 198 2. DRB is a part of a chain and DRA is not: 199 no need to update FIRST_DR 200 no need to insert DRB 201 insert DRA according to init 202 3. DRA is a part of a chain and DRB is not: 203 if (init of FIRST_DR > init of DRB) 204 FIRST_DR = DRB 205 NEXT(FIRST_DR) = previous FIRST_DR 206 else 207 insert DRB according to its init 208 4. both DRA and DRB are in some interleaving chains: 209 choose the chain with the smallest init of FIRST_DR 210 insert the nodes of the second chain into the first one. */ 211 212 static void 213 vect_update_interleaving_chain (struct data_reference *drb, 214 struct data_reference *dra) 215 { 216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 218 tree next_init, init_dra_chain, init_drb_chain; 219 gimple first_a, first_b; 220 tree node_init; 221 gimple node, prev, next, first_stmt; 222 223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */ 224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b)) 225 { 226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb); 227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb); 228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra); 229 return; 230 } 231 232 /* 2. DRB is a part of a chain and DRA is not. */ 233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b)) 234 { 235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b); 236 /* Insert DRA into the chain of DRB. */ 237 vect_insert_into_interleaving_chain (dra, drb); 238 return; 239 } 240 241 /* 3. DRA is a part of a chain and DRB is not. */ 242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b)) 243 { 244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a); 245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt ( 246 old_first_stmt))); 247 gimple tmp; 248 249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0) 250 { 251 /* DRB's init is smaller than the init of the stmt previously marked 252 as the first stmt of the interleaving chain of DRA. Therefore, we 253 update FIRST_STMT and put DRB in the head of the list. */ 254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb); 255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt; 256 257 /* Update all the stmts in the list to point to the new FIRST_STMT. */ 258 tmp = old_first_stmt; 259 while (tmp) 260 { 261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb); 262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp)); 263 } 264 } 265 else 266 { 267 /* Insert DRB in the list of DRA. */ 268 vect_insert_into_interleaving_chain (drb, dra); 269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a); 270 } 271 return; 272 } 273 274 /* 4. both DRA and DRB are in some interleaving chains. */ 275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a); 276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b); 277 if (first_a == first_b) 278 return; 279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a))); 280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b))); 281 282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0) 283 { 284 /* Insert the nodes of DRA chain into the DRB chain. 285 After inserting a node, continue from this node of the DRB chain (don't 286 start from the beginning. */ 287 node = GROUP_FIRST_ELEMENT (stmtinfo_a); 288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b); 289 first_stmt = first_b; 290 } 291 else 292 { 293 /* Insert the nodes of DRB chain into the DRA chain. 294 After inserting a node, continue from this node of the DRA chain (don't 295 start from the beginning. */ 296 node = GROUP_FIRST_ELEMENT (stmtinfo_b); 297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a); 298 first_stmt = first_a; 299 } 300 301 while (node) 302 { 303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node))); 304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)); 305 while (next) 306 { 307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 308 if (tree_int_cst_compare (next_init, node_init) > 0) 309 { 310 /* Insert here. */ 311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node; 312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next; 313 prev = node; 314 break; 315 } 316 prev = next; 317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)); 318 } 319 if (!next) 320 { 321 /* We got to the end of the list. Insert here. */ 322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node; 323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL; 324 prev = node; 325 } 326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt; 327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)); 328 } 329 } 330 331 /* Check dependence between DRA and DRB for basic block vectorization. 332 If the accesses share same bases and offsets, we can compare their initial 333 constant offsets to decide whether they differ or not. In case of a read- 334 write dependence we check that the load is before the store to ensure that 335 vectorization will not change the order of the accesses. */ 336 337 static bool 338 vect_drs_dependent_in_basic_block (struct data_reference *dra, 339 struct data_reference *drb) 340 { 341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b; 342 gimple earlier_stmt; 343 344 /* We only call this function for pairs of loads and stores, but we verify 345 it here. */ 346 if (DR_IS_READ (dra) == DR_IS_READ (drb)) 347 { 348 if (DR_IS_READ (dra)) 349 return false; 350 else 351 return true; 352 } 353 354 /* Check that the data-refs have same bases and offsets. If not, we can't 355 determine if they are dependent. */ 356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) 357 || !dr_equal_offsets_p (dra, drb)) 358 return true; 359 360 /* Check the types. */ 361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))); 362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); 363 364 if (type_size_a != type_size_b 365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)), 366 TREE_TYPE (DR_REF (drb)))) 367 return true; 368 369 init_a = TREE_INT_CST_LOW (DR_INIT (dra)); 370 init_b = TREE_INT_CST_LOW (DR_INIT (drb)); 371 372 /* Two different locations - no dependence. */ 373 if (init_a != init_b) 374 return false; 375 376 /* We have a read-write dependence. Check that the load is before the store. 377 When we vectorize basic blocks, vector load can be only before 378 corresponding scalar load, and vector store can be only after its 379 corresponding scalar store. So the order of the acceses is preserved in 380 case the load is before the store. */ 381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); 382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)))) 383 return false; 384 385 return true; 386 } 387 388 389 /* Function vect_check_interleaving. 390 391 Check if DRA and DRB are a part of interleaving. In case they are, insert 392 DRA and DRB in an interleaving chain. */ 393 394 static bool 395 vect_check_interleaving (struct data_reference *dra, 396 struct data_reference *drb) 397 { 398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b; 399 400 /* Check that the data-refs have same first location (except init) and they 401 are both either store or load (not load and store). */ 402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) 403 || !dr_equal_offsets_p (dra, drb) 404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) 405 || DR_IS_READ (dra) != DR_IS_READ (drb)) 406 return false; 407 408 /* Check: 409 1. data-refs are of the same type 410 2. their steps are equal 411 3. the step (if greater than zero) is greater than the difference between 412 data-refs' inits. */ 413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))); 414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); 415 416 if (type_size_a != type_size_b 417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)) 418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)), 419 TREE_TYPE (DR_REF (drb)))) 420 return false; 421 422 init_a = TREE_INT_CST_LOW (DR_INIT (dra)); 423 init_b = TREE_INT_CST_LOW (DR_INIT (drb)); 424 step = TREE_INT_CST_LOW (DR_STEP (dra)); 425 426 if (init_a > init_b) 427 { 428 /* If init_a == init_b + the size of the type * k, we have an interleaving, 429 and DRB is accessed before DRA. */ 430 diff_mod_size = (init_a - init_b) % type_size_a; 431 432 if (step && (init_a - init_b) > step) 433 return false; 434 435 if (diff_mod_size == 0) 436 { 437 vect_update_interleaving_chain (drb, dra); 438 if (vect_print_dump_info (REPORT_DR_DETAILS)) 439 { 440 fprintf (vect_dump, "Detected interleaving "); 441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 442 fprintf (vect_dump, " and "); 443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 444 } 445 return true; 446 } 447 } 448 else 449 { 450 /* If init_b == init_a + the size of the type * k, we have an 451 interleaving, and DRA is accessed before DRB. */ 452 diff_mod_size = (init_b - init_a) % type_size_a; 453 454 if (step && (init_b - init_a) > step) 455 return false; 456 457 if (diff_mod_size == 0) 458 { 459 vect_update_interleaving_chain (dra, drb); 460 if (vect_print_dump_info (REPORT_DR_DETAILS)) 461 { 462 fprintf (vect_dump, "Detected interleaving "); 463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 464 fprintf (vect_dump, " and "); 465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 466 } 467 return true; 468 } 469 } 470 471 return false; 472 } 473 474 /* Check if data references pointed by DR_I and DR_J are same or 475 belong to same interleaving group. Return FALSE if drs are 476 different, otherwise return TRUE. */ 477 478 static bool 479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j) 480 { 481 gimple stmt_i = DR_STMT (dr_i); 482 gimple stmt_j = DR_STMT (dr_j); 483 484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0) 485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i)) 486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)) 487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i)) 488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))))) 489 return true; 490 else 491 return false; 492 } 493 494 /* If address ranges represented by DDR_I and DDR_J are equal, 495 return TRUE, otherwise return FALSE. */ 496 497 static bool 498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j) 499 { 500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j)) 501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j))) 502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j)) 503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j)))) 504 return true; 505 else 506 return false; 507 } 508 509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be 510 tested at run-time. Return TRUE if DDR was successfully inserted. 511 Return false if versioning is not supported. */ 512 513 static bool 514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) 515 { 516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 517 518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0) 519 return false; 520 521 if (vect_print_dump_info (REPORT_DR_DETAILS)) 522 { 523 fprintf (vect_dump, "mark for run-time aliasing test between "); 524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM); 525 fprintf (vect_dump, " and "); 526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM); 527 } 528 529 if (optimize_loop_nest_for_size_p (loop)) 530 { 531 if (vect_print_dump_info (REPORT_DR_DETAILS)) 532 fprintf (vect_dump, "versioning not supported when optimizing for size."); 533 return false; 534 } 535 536 /* FORNOW: We don't support versioning with outer-loop vectorization. */ 537 if (loop->inner) 538 { 539 if (vect_print_dump_info (REPORT_DR_DETAILS)) 540 fprintf (vect_dump, "versioning not yet supported for outer-loops."); 541 return false; 542 } 543 544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr); 545 return true; 546 } 547 548 549 /* Function vect_analyze_data_ref_dependence. 550 551 Return TRUE if there (might) exist a dependence between a memory-reference 552 DRA and a memory-reference DRB. When versioning for alias may check a 553 dependence at run-time, return FALSE. Adjust *MAX_VF according to 554 the data dependence. */ 555 556 static bool 557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, 558 loop_vec_info loop_vinfo, int *max_vf) 559 { 560 unsigned int i; 561 struct loop *loop = NULL; 562 struct data_reference *dra = DDR_A (ddr); 563 struct data_reference *drb = DDR_B (ddr); 564 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 565 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 566 lambda_vector dist_v; 567 unsigned int loop_depth; 568 569 /* Don't bother to analyze statements marked as unvectorizable. */ 570 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a) 571 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b)) 572 return false; 573 574 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 575 { 576 /* Independent data accesses. */ 577 vect_check_interleaving (dra, drb); 578 return false; 579 } 580 581 if (loop_vinfo) 582 loop = LOOP_VINFO_LOOP (loop_vinfo); 583 584 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb) 585 return false; 586 587 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 588 { 589 gimple earlier_stmt; 590 591 if (loop_vinfo) 592 { 593 if (vect_print_dump_info (REPORT_DR_DETAILS)) 594 { 595 fprintf (vect_dump, "versioning for alias required: " 596 "can't determine dependence between "); 597 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 598 fprintf (vect_dump, " and "); 599 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 600 } 601 602 /* Add to list of ddrs that need to be tested at run-time. */ 603 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo); 604 } 605 606 /* When vectorizing a basic block unknown depnedence can still mean 607 strided access. */ 608 if (vect_check_interleaving (dra, drb)) 609 return false; 610 611 /* Read-read is OK (we need this check here, after checking for 612 interleaving). */ 613 if (DR_IS_READ (dra) && DR_IS_READ (drb)) 614 return false; 615 616 if (vect_print_dump_info (REPORT_DR_DETAILS)) 617 { 618 fprintf (vect_dump, "can't determine dependence between "); 619 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 620 fprintf (vect_dump, " and "); 621 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 622 } 623 624 /* We do not vectorize basic blocks with write-write dependencies. */ 625 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) 626 return true; 627 628 /* Check that it's not a load-after-store dependence. */ 629 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); 630 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)))) 631 return true; 632 633 return false; 634 } 635 636 /* Versioning for alias is not yet supported for basic block SLP, and 637 dependence distance is unapplicable, hence, in case of known data 638 dependence, basic block vectorization is impossible for now. */ 639 if (!loop_vinfo) 640 { 641 if (dra != drb && vect_check_interleaving (dra, drb)) 642 return false; 643 644 if (vect_print_dump_info (REPORT_DR_DETAILS)) 645 { 646 fprintf (vect_dump, "determined dependence between "); 647 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 648 fprintf (vect_dump, " and "); 649 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 650 } 651 652 /* Do not vectorize basic blcoks with write-write dependences. */ 653 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) 654 return true; 655 656 /* Check if this dependence is allowed in basic block vectorization. */ 657 return vect_drs_dependent_in_basic_block (dra, drb); 658 } 659 660 /* Loop-based vectorization and known data dependence. */ 661 if (DDR_NUM_DIST_VECTS (ddr) == 0) 662 { 663 if (vect_print_dump_info (REPORT_DR_DETAILS)) 664 { 665 fprintf (vect_dump, "versioning for alias required: bad dist vector for "); 666 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 667 fprintf (vect_dump, " and "); 668 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 669 } 670 /* Add to list of ddrs that need to be tested at run-time. */ 671 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo); 672 } 673 674 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); 675 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v) 676 { 677 int dist = dist_v[loop_depth]; 678 679 if (vect_print_dump_info (REPORT_DR_DETAILS)) 680 fprintf (vect_dump, "dependence distance = %d.", dist); 681 682 if (dist == 0) 683 { 684 if (vect_print_dump_info (REPORT_DR_DETAILS)) 685 { 686 fprintf (vect_dump, "dependence distance == 0 between "); 687 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 688 fprintf (vect_dump, " and "); 689 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 690 } 691 692 /* For interleaving, mark that there is a read-write dependency if 693 necessary. We check before that one of the data-refs is store. */ 694 if (DR_IS_READ (dra)) 695 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true; 696 else 697 { 698 if (DR_IS_READ (drb)) 699 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true; 700 } 701 702 continue; 703 } 704 705 if (dist > 0 && DDR_REVERSED_P (ddr)) 706 { 707 /* If DDR_REVERSED_P the order of the data-refs in DDR was 708 reversed (to make distance vector positive), and the actual 709 distance is negative. */ 710 if (vect_print_dump_info (REPORT_DR_DETAILS)) 711 fprintf (vect_dump, "dependence distance negative."); 712 continue; 713 } 714 715 if (abs (dist) >= 2 716 && abs (dist) < *max_vf) 717 { 718 /* The dependence distance requires reduction of the maximal 719 vectorization factor. */ 720 *max_vf = abs (dist); 721 if (vect_print_dump_info (REPORT_DR_DETAILS)) 722 fprintf (vect_dump, "adjusting maximal vectorization factor to %i", 723 *max_vf); 724 } 725 726 if (abs (dist) >= *max_vf) 727 { 728 /* Dependence distance does not create dependence, as far as 729 vectorization is concerned, in this case. */ 730 if (vect_print_dump_info (REPORT_DR_DETAILS)) 731 fprintf (vect_dump, "dependence distance >= VF."); 732 continue; 733 } 734 735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 736 { 737 fprintf (vect_dump, "not vectorized, possible dependence " 738 "between data-refs "); 739 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 740 fprintf (vect_dump, " and "); 741 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 742 } 743 744 return true; 745 } 746 747 return false; 748 } 749 750 /* Function vect_analyze_data_ref_dependences. 751 752 Examine all the data references in the loop, and make sure there do not 753 exist any data dependences between them. Set *MAX_VF according to 754 the maximum vectorization factor the data dependences allow. */ 755 756 bool 757 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, 758 bb_vec_info bb_vinfo, int *max_vf) 759 { 760 unsigned int i; 761 VEC (ddr_p, heap) *ddrs = NULL; 762 struct data_dependence_relation *ddr; 763 764 if (vect_print_dump_info (REPORT_DETAILS)) 765 fprintf (vect_dump, "=== vect_analyze_dependences ==="); 766 767 if (loop_vinfo) 768 ddrs = LOOP_VINFO_DDRS (loop_vinfo); 769 else 770 ddrs = BB_VINFO_DDRS (bb_vinfo); 771 772 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr) 773 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) 774 return false; 775 776 return true; 777 } 778 779 780 /* Function vect_compute_data_ref_alignment 781 782 Compute the misalignment of the data reference DR. 783 784 Output: 785 1. If during the misalignment computation it is found that the data reference 786 cannot be vectorized then false is returned. 787 2. DR_MISALIGNMENT (DR) is defined. 788 789 FOR NOW: No analysis is actually performed. Misalignment is calculated 790 only for trivial cases. TODO. */ 791 792 static bool 793 vect_compute_data_ref_alignment (struct data_reference *dr) 794 { 795 gimple stmt = DR_STMT (dr); 796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 798 struct loop *loop = NULL; 799 tree ref = DR_REF (dr); 800 tree vectype; 801 tree base, base_addr; 802 bool base_aligned; 803 tree misalign; 804 tree aligned_to, alignment; 805 806 if (vect_print_dump_info (REPORT_DETAILS)) 807 fprintf (vect_dump, "vect_compute_data_ref_alignment:"); 808 809 if (loop_vinfo) 810 loop = LOOP_VINFO_LOOP (loop_vinfo); 811 812 /* Initialize misalignment to unknown. */ 813 SET_DR_MISALIGNMENT (dr, -1); 814 815 misalign = DR_INIT (dr); 816 aligned_to = DR_ALIGNED_TO (dr); 817 base_addr = DR_BASE_ADDRESS (dr); 818 vectype = STMT_VINFO_VECTYPE (stmt_info); 819 820 /* In case the dataref is in an inner-loop of the loop that is being 821 vectorized (LOOP), we use the base and misalignment information 822 relative to the outer-loop (LOOP). This is ok only if the misalignment 823 stays the same throughout the execution of the inner-loop, which is why 824 we have to check that the stride of the dataref in the inner-loop evenly 825 divides by the vector size. */ 826 if (loop && nested_in_vect_loop_p (loop, stmt)) 827 { 828 tree step = DR_STEP (dr); 829 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); 830 831 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) 832 { 833 if (vect_print_dump_info (REPORT_ALIGNMENT)) 834 fprintf (vect_dump, "inner step divides the vector-size."); 835 misalign = STMT_VINFO_DR_INIT (stmt_info); 836 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info); 837 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info); 838 } 839 else 840 { 841 if (vect_print_dump_info (REPORT_ALIGNMENT)) 842 fprintf (vect_dump, "inner step doesn't divide the vector-size."); 843 misalign = NULL_TREE; 844 } 845 } 846 847 base = build_fold_indirect_ref (base_addr); 848 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); 849 850 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) 851 || !misalign) 852 { 853 if (vect_print_dump_info (REPORT_ALIGNMENT)) 854 { 855 fprintf (vect_dump, "Unknown alignment for access: "); 856 print_generic_expr (vect_dump, base, TDF_SLIM); 857 } 858 return true; 859 } 860 861 if ((DECL_P (base) 862 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)), 863 alignment) >= 0) 864 || (TREE_CODE (base_addr) == SSA_NAME 865 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE ( 866 TREE_TYPE (base_addr)))), 867 alignment) >= 0) 868 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype))) 869 base_aligned = true; 870 else 871 base_aligned = false; 872 873 if (!base_aligned) 874 { 875 /* Do not change the alignment of global variables if 876 flag_section_anchors is enabled. */ 877 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)) 878 || (TREE_STATIC (base) && flag_section_anchors)) 879 { 880 if (vect_print_dump_info (REPORT_DETAILS)) 881 { 882 fprintf (vect_dump, "can't force alignment of ref: "); 883 print_generic_expr (vect_dump, ref, TDF_SLIM); 884 } 885 return true; 886 } 887 888 /* Force the alignment of the decl. 889 NOTE: This is the only change to the code we make during 890 the analysis phase, before deciding to vectorize the loop. */ 891 if (vect_print_dump_info (REPORT_DETAILS)) 892 { 893 fprintf (vect_dump, "force alignment of "); 894 print_generic_expr (vect_dump, ref, TDF_SLIM); 895 } 896 897 DECL_ALIGN (base) = TYPE_ALIGN (vectype); 898 DECL_USER_ALIGN (base) = 1; 899 } 900 901 /* At this point we assume that the base is aligned. */ 902 gcc_assert (base_aligned 903 || (TREE_CODE (base) == VAR_DECL 904 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); 905 906 /* If this is a backward running DR then first access in the larger 907 vectype actually is N-1 elements before the address in the DR. 908 Adjust misalign accordingly. */ 909 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) 910 { 911 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 912 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type, 913 otherwise we wouldn't be here. */ 914 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr)); 915 /* PLUS because DR_STEP was negative. */ 916 misalign = size_binop (PLUS_EXPR, misalign, offset); 917 } 918 919 /* Modulo alignment. */ 920 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment); 921 922 if (!host_integerp (misalign, 1)) 923 { 924 /* Negative or overflowed misalignment value. */ 925 if (vect_print_dump_info (REPORT_DETAILS)) 926 fprintf (vect_dump, "unexpected misalign value"); 927 return false; 928 } 929 930 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign)); 931 932 if (vect_print_dump_info (REPORT_DETAILS)) 933 { 934 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr)); 935 print_generic_expr (vect_dump, ref, TDF_SLIM); 936 } 937 938 return true; 939 } 940 941 942 /* Function vect_compute_data_refs_alignment 943 944 Compute the misalignment of data references in the loop. 945 Return FALSE if a data reference is found that cannot be vectorized. */ 946 947 static bool 948 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo, 949 bb_vec_info bb_vinfo) 950 { 951 VEC (data_reference_p, heap) *datarefs; 952 struct data_reference *dr; 953 unsigned int i; 954 955 if (loop_vinfo) 956 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 957 else 958 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 959 960 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 961 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) 962 && !vect_compute_data_ref_alignment (dr)) 963 { 964 if (bb_vinfo) 965 { 966 /* Mark unsupported statement as unvectorizable. */ 967 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; 968 continue; 969 } 970 else 971 return false; 972 } 973 974 return true; 975 } 976 977 978 /* Function vect_update_misalignment_for_peel 979 980 DR - the data reference whose misalignment is to be adjusted. 981 DR_PEEL - the data reference whose misalignment is being made 982 zero in the vector loop by the peel. 983 NPEEL - the number of iterations in the peel loop if the misalignment 984 of DR_PEEL is known at compile time. */ 985 986 static void 987 vect_update_misalignment_for_peel (struct data_reference *dr, 988 struct data_reference *dr_peel, int npeel) 989 { 990 unsigned int i; 991 VEC(dr_p,heap) *same_align_drs; 992 struct data_reference *current_dr; 993 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); 994 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel)))); 995 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); 996 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel)); 997 998 /* For interleaved data accesses the step in the loop must be multiplied by 999 the size of the interleaving group. */ 1000 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 1001 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))); 1002 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info)) 1003 dr_peel_size *= GROUP_SIZE (peel_stmt_info); 1004 1005 /* It can be assumed that the data refs with the same alignment as dr_peel 1006 are aligned in the vector loop. */ 1007 same_align_drs 1008 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel))); 1009 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr) 1010 { 1011 if (current_dr != dr) 1012 continue; 1013 gcc_assert (DR_MISALIGNMENT (dr) / dr_size == 1014 DR_MISALIGNMENT (dr_peel) / dr_peel_size); 1015 SET_DR_MISALIGNMENT (dr, 0); 1016 return; 1017 } 1018 1019 if (known_alignment_for_access_p (dr) 1020 && known_alignment_for_access_p (dr_peel)) 1021 { 1022 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; 1023 int misal = DR_MISALIGNMENT (dr); 1024 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1025 misal += negative ? -npeel * dr_size : npeel * dr_size; 1026 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1; 1027 SET_DR_MISALIGNMENT (dr, misal); 1028 return; 1029 } 1030 1031 if (vect_print_dump_info (REPORT_DETAILS)) 1032 fprintf (vect_dump, "Setting misalignment to -1."); 1033 SET_DR_MISALIGNMENT (dr, -1); 1034 } 1035 1036 1037 /* Function vect_verify_datarefs_alignment 1038 1039 Return TRUE if all data references in the loop can be 1040 handled with respect to alignment. */ 1041 1042 bool 1043 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 1044 { 1045 VEC (data_reference_p, heap) *datarefs; 1046 struct data_reference *dr; 1047 enum dr_alignment_support supportable_dr_alignment; 1048 unsigned int i; 1049 1050 if (loop_vinfo) 1051 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1052 else 1053 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 1054 1055 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1056 { 1057 gimple stmt = DR_STMT (dr); 1058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1059 1060 /* For interleaving, only the alignment of the first access matters. 1061 Skip statements marked as not vectorizable. */ 1062 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info) 1063 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) 1064 || !STMT_VINFO_VECTORIZABLE (stmt_info)) 1065 continue; 1066 1067 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); 1068 if (!supportable_dr_alignment) 1069 { 1070 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1071 { 1072 if (DR_IS_READ (dr)) 1073 fprintf (vect_dump, 1074 "not vectorized: unsupported unaligned load."); 1075 else 1076 fprintf (vect_dump, 1077 "not vectorized: unsupported unaligned store."); 1078 1079 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 1080 } 1081 return false; 1082 } 1083 if (supportable_dr_alignment != dr_aligned 1084 && vect_print_dump_info (REPORT_ALIGNMENT)) 1085 fprintf (vect_dump, "Vectorizing an unaligned access."); 1086 } 1087 return true; 1088 } 1089 1090 1091 /* Function vector_alignment_reachable_p 1092 1093 Return true if vector alignment for DR is reachable by peeling 1094 a few loop iterations. Return false otherwise. */ 1095 1096 static bool 1097 vector_alignment_reachable_p (struct data_reference *dr) 1098 { 1099 gimple stmt = DR_STMT (dr); 1100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1101 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1102 1103 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 1104 { 1105 /* For interleaved access we peel only if number of iterations in 1106 the prolog loop ({VF - misalignment}), is a multiple of the 1107 number of the interleaved accesses. */ 1108 int elem_size, mis_in_elements; 1109 int nelements = TYPE_VECTOR_SUBPARTS (vectype); 1110 1111 /* FORNOW: handle only known alignment. */ 1112 if (!known_alignment_for_access_p (dr)) 1113 return false; 1114 1115 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements; 1116 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; 1117 1118 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info)) 1119 return false; 1120 } 1121 1122 /* If misalignment is known at the compile time then allow peeling 1123 only if natural alignment is reachable through peeling. */ 1124 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr)) 1125 { 1126 HOST_WIDE_INT elmsize = 1127 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 1128 if (vect_print_dump_info (REPORT_DETAILS)) 1129 { 1130 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize); 1131 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr)); 1132 } 1133 if (DR_MISALIGNMENT (dr) % elmsize) 1134 { 1135 if (vect_print_dump_info (REPORT_DETAILS)) 1136 fprintf (vect_dump, "data size does not divide the misalignment.\n"); 1137 return false; 1138 } 1139 } 1140 1141 if (!known_alignment_for_access_p (dr)) 1142 { 1143 tree type = (TREE_TYPE (DR_REF (dr))); 1144 bool is_packed = contains_packed_reference (DR_REF (dr)); 1145 1146 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0) 1147 is_packed = true; 1148 1149 if (vect_print_dump_info (REPORT_DETAILS)) 1150 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed); 1151 if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) 1152 return true; 1153 else 1154 return false; 1155 } 1156 1157 return true; 1158 } 1159 1160 1161 /* Calculate the cost of the memory access represented by DR. */ 1162 1163 static void 1164 vect_get_data_access_cost (struct data_reference *dr, 1165 unsigned int *inside_cost, 1166 unsigned int *outside_cost) 1167 { 1168 gimple stmt = DR_STMT (dr); 1169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1170 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); 1171 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1173 int ncopies = vf / nunits; 1174 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); 1175 1176 if (!supportable_dr_alignment) 1177 *inside_cost = VECT_MAX_COST; 1178 else 1179 { 1180 if (DR_IS_READ (dr)) 1181 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost); 1182 else 1183 vect_get_store_cost (dr, ncopies, inside_cost); 1184 } 1185 1186 if (vect_print_dump_info (REPORT_COST)) 1187 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, " 1188 "outside_cost = %d.", *inside_cost, *outside_cost); 1189 } 1190 1191 1192 static hashval_t 1193 vect_peeling_hash (const void *elem) 1194 { 1195 const struct _vect_peel_info *peel_info; 1196 1197 peel_info = (const struct _vect_peel_info *) elem; 1198 return (hashval_t) peel_info->npeel; 1199 } 1200 1201 1202 static int 1203 vect_peeling_hash_eq (const void *elem1, const void *elem2) 1204 { 1205 const struct _vect_peel_info *a, *b; 1206 1207 a = (const struct _vect_peel_info *) elem1; 1208 b = (const struct _vect_peel_info *) elem2; 1209 return (a->npeel == b->npeel); 1210 } 1211 1212 1213 /* Insert DR into peeling hash table with NPEEL as key. */ 1214 1215 static void 1216 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr, 1217 int npeel) 1218 { 1219 struct _vect_peel_info elem, *slot; 1220 void **new_slot; 1221 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); 1222 1223 elem.npeel = npeel; 1224 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo), 1225 &elem); 1226 if (slot) 1227 slot->count++; 1228 else 1229 { 1230 slot = XNEW (struct _vect_peel_info); 1231 slot->npeel = npeel; 1232 slot->dr = dr; 1233 slot->count = 1; 1234 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot, 1235 INSERT); 1236 *new_slot = slot; 1237 } 1238 1239 if (!supportable_dr_alignment && !flag_vect_cost_model) 1240 slot->count += VECT_MAX_COST; 1241 } 1242 1243 1244 /* Traverse peeling hash table to find peeling option that aligns maximum 1245 number of data accesses. */ 1246 1247 static int 1248 vect_peeling_hash_get_most_frequent (void **slot, void *data) 1249 { 1250 vect_peel_info elem = (vect_peel_info) *slot; 1251 vect_peel_extended_info max = (vect_peel_extended_info) data; 1252 1253 if (elem->count > max->peel_info.count 1254 || (elem->count == max->peel_info.count 1255 && max->peel_info.npeel > elem->npeel)) 1256 { 1257 max->peel_info.npeel = elem->npeel; 1258 max->peel_info.count = elem->count; 1259 max->peel_info.dr = elem->dr; 1260 } 1261 1262 return 1; 1263 } 1264 1265 1266 /* Traverse peeling hash table and calculate cost for each peeling option. 1267 Find the one with the lowest cost. */ 1268 1269 static int 1270 vect_peeling_hash_get_lowest_cost (void **slot, void *data) 1271 { 1272 vect_peel_info elem = (vect_peel_info) *slot; 1273 vect_peel_extended_info min = (vect_peel_extended_info) data; 1274 int save_misalignment, dummy; 1275 unsigned int inside_cost = 0, outside_cost = 0, i; 1276 gimple stmt = DR_STMT (elem->dr); 1277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1279 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1280 struct data_reference *dr; 1281 1282 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1283 { 1284 stmt = DR_STMT (dr); 1285 stmt_info = vinfo_for_stmt (stmt); 1286 /* For interleaving, only the alignment of the first access 1287 matters. */ 1288 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1289 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) 1290 continue; 1291 1292 save_misalignment = DR_MISALIGNMENT (dr); 1293 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel); 1294 vect_get_data_access_cost (dr, &inside_cost, &outside_cost); 1295 SET_DR_MISALIGNMENT (dr, save_misalignment); 1296 } 1297 1298 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy, 1299 vect_get_single_scalar_iteration_cost (loop_vinfo)); 1300 1301 if (inside_cost < min->inside_cost 1302 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost)) 1303 { 1304 min->inside_cost = inside_cost; 1305 min->outside_cost = outside_cost; 1306 min->peel_info.dr = elem->dr; 1307 min->peel_info.npeel = elem->npeel; 1308 } 1309 1310 return 1; 1311 } 1312 1313 1314 /* Choose best peeling option by traversing peeling hash table and either 1315 choosing an option with the lowest cost (if cost model is enabled) or the 1316 option that aligns as many accesses as possible. */ 1317 1318 static struct data_reference * 1319 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo, 1320 unsigned int *npeel) 1321 { 1322 struct _vect_peel_extended_info res; 1323 1324 res.peel_info.dr = NULL; 1325 1326 if (flag_vect_cost_model) 1327 { 1328 res.inside_cost = INT_MAX; 1329 res.outside_cost = INT_MAX; 1330 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo), 1331 vect_peeling_hash_get_lowest_cost, &res); 1332 } 1333 else 1334 { 1335 res.peel_info.count = 0; 1336 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo), 1337 vect_peeling_hash_get_most_frequent, &res); 1338 } 1339 1340 *npeel = res.peel_info.npeel; 1341 return res.peel_info.dr; 1342 } 1343 1344 1345 /* Function vect_enhance_data_refs_alignment 1346 1347 This pass will use loop versioning and loop peeling in order to enhance 1348 the alignment of data references in the loop. 1349 1350 FOR NOW: we assume that whatever versioning/peeling takes place, only the 1351 original loop is to be vectorized. Any other loops that are created by 1352 the transformations performed in this pass - are not supposed to be 1353 vectorized. This restriction will be relaxed. 1354 1355 This pass will require a cost model to guide it whether to apply peeling 1356 or versioning or a combination of the two. For example, the scheme that 1357 intel uses when given a loop with several memory accesses, is as follows: 1358 choose one memory access ('p') which alignment you want to force by doing 1359 peeling. Then, either (1) generate a loop in which 'p' is aligned and all 1360 other accesses are not necessarily aligned, or (2) use loop versioning to 1361 generate one loop in which all accesses are aligned, and another loop in 1362 which only 'p' is necessarily aligned. 1363 1364 ("Automatic Intra-Register Vectorization for the Intel Architecture", 1365 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International 1366 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) 1367 1368 Devising a cost model is the most critical aspect of this work. It will 1369 guide us on which access to peel for, whether to use loop versioning, how 1370 many versions to create, etc. The cost model will probably consist of 1371 generic considerations as well as target specific considerations (on 1372 powerpc for example, misaligned stores are more painful than misaligned 1373 loads). 1374 1375 Here are the general steps involved in alignment enhancements: 1376 1377 -- original loop, before alignment analysis: 1378 for (i=0; i<N; i++){ 1379 x = q[i]; # DR_MISALIGNMENT(q) = unknown 1380 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1381 } 1382 1383 -- After vect_compute_data_refs_alignment: 1384 for (i=0; i<N; i++){ 1385 x = q[i]; # DR_MISALIGNMENT(q) = 3 1386 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1387 } 1388 1389 -- Possibility 1: we do loop versioning: 1390 if (p is aligned) { 1391 for (i=0; i<N; i++){ # loop 1A 1392 x = q[i]; # DR_MISALIGNMENT(q) = 3 1393 p[i] = y; # DR_MISALIGNMENT(p) = 0 1394 } 1395 } 1396 else { 1397 for (i=0; i<N; i++){ # loop 1B 1398 x = q[i]; # DR_MISALIGNMENT(q) = 3 1399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1400 } 1401 } 1402 1403 -- Possibility 2: we do loop peeling: 1404 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1405 x = q[i]; 1406 p[i] = y; 1407 } 1408 for (i = 3; i < N; i++){ # loop 2A 1409 x = q[i]; # DR_MISALIGNMENT(q) = 0 1410 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1411 } 1412 1413 -- Possibility 3: combination of loop peeling and versioning: 1414 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1415 x = q[i]; 1416 p[i] = y; 1417 } 1418 if (p is aligned) { 1419 for (i = 3; i<N; i++){ # loop 3A 1420 x = q[i]; # DR_MISALIGNMENT(q) = 0 1421 p[i] = y; # DR_MISALIGNMENT(p) = 0 1422 } 1423 } 1424 else { 1425 for (i = 3; i<N; i++){ # loop 3B 1426 x = q[i]; # DR_MISALIGNMENT(q) = 0 1427 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1428 } 1429 } 1430 1431 These loops are later passed to loop_transform to be vectorized. The 1432 vectorizer will use the alignment information to guide the transformation 1433 (whether to generate regular loads/stores, or with special handling for 1434 misalignment). */ 1435 1436 bool 1437 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 1438 { 1439 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1441 enum dr_alignment_support supportable_dr_alignment; 1442 struct data_reference *dr0 = NULL, *first_store = NULL; 1443 struct data_reference *dr; 1444 unsigned int i, j; 1445 bool do_peeling = false; 1446 bool do_versioning = false; 1447 bool stat; 1448 gimple stmt; 1449 stmt_vec_info stmt_info; 1450 int vect_versioning_for_alias_required; 1451 unsigned int npeel = 0; 1452 bool all_misalignments_unknown = true; 1453 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1454 unsigned possible_npeel_number = 1; 1455 tree vectype; 1456 unsigned int nelements, mis, same_align_drs_max = 0; 1457 1458 if (vect_print_dump_info (REPORT_DETAILS)) 1459 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ==="); 1460 1461 /* While cost model enhancements are expected in the future, the high level 1462 view of the code at this time is as follows: 1463 1464 A) If there is a misaligned access then see if peeling to align 1465 this access can make all data references satisfy 1466 vect_supportable_dr_alignment. If so, update data structures 1467 as needed and return true. 1468 1469 B) If peeling wasn't possible and there is a data reference with an 1470 unknown misalignment that does not satisfy vect_supportable_dr_alignment 1471 then see if loop versioning checks can be used to make all data 1472 references satisfy vect_supportable_dr_alignment. If so, update 1473 data structures as needed and return true. 1474 1475 C) If neither peeling nor versioning were successful then return false if 1476 any data reference does not satisfy vect_supportable_dr_alignment. 1477 1478 D) Return true (all data references satisfy vect_supportable_dr_alignment). 1479 1480 Note, Possibility 3 above (which is peeling and versioning together) is not 1481 being done at this time. */ 1482 1483 /* (1) Peeling to force alignment. */ 1484 1485 /* (1.1) Decide whether to perform peeling, and how many iterations to peel: 1486 Considerations: 1487 + How many accesses will become aligned due to the peeling 1488 - How many accesses will become unaligned due to the peeling, 1489 and the cost of misaligned accesses. 1490 - The cost of peeling (the extra runtime checks, the increase 1491 in code size). */ 1492 1493 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1494 { 1495 stmt = DR_STMT (dr); 1496 stmt_info = vinfo_for_stmt (stmt); 1497 1498 if (!STMT_VINFO_RELEVANT (stmt_info)) 1499 continue; 1500 1501 /* For interleaving, only the alignment of the first access 1502 matters. */ 1503 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1504 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) 1505 continue; 1506 1507 /* For invariant accesses there is nothing to enhance. */ 1508 if (integer_zerop (DR_STEP (dr))) 1509 continue; 1510 1511 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); 1512 do_peeling = vector_alignment_reachable_p (dr); 1513 if (do_peeling) 1514 { 1515 if (known_alignment_for_access_p (dr)) 1516 { 1517 unsigned int npeel_tmp; 1518 bool negative = tree_int_cst_compare (DR_STEP (dr), 1519 size_zero_node) < 0; 1520 1521 /* Save info about DR in the hash table. */ 1522 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo)) 1523 LOOP_VINFO_PEELING_HTAB (loop_vinfo) = 1524 htab_create (1, vect_peeling_hash, 1525 vect_peeling_hash_eq, free); 1526 1527 vectype = STMT_VINFO_VECTYPE (stmt_info); 1528 nelements = TYPE_VECTOR_SUBPARTS (vectype); 1529 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE ( 1530 TREE_TYPE (DR_REF (dr)))); 1531 npeel_tmp = (negative 1532 ? (mis - nelements) : (nelements - mis)) 1533 & (nelements - 1); 1534 1535 /* For multiple types, it is possible that the bigger type access 1536 will have more than one peeling option. E.g., a loop with two 1537 types: one of size (vector size / 4), and the other one of 1538 size (vector size / 8). Vectorization factor will 8. If both 1539 access are misaligned by 3, the first one needs one scalar 1540 iteration to be aligned, and the second one needs 5. But the 1541 the first one will be aligned also by peeling 5 scalar 1542 iterations, and in that case both accesses will be aligned. 1543 Hence, except for the immediate peeling amount, we also want 1544 to try to add full vector size, while we don't exceed 1545 vectorization factor. 1546 We do this automtically for cost model, since we calculate cost 1547 for every peeling option. */ 1548 if (!flag_vect_cost_model) 1549 possible_npeel_number = vf /nelements; 1550 1551 /* Handle the aligned case. We may decide to align some other 1552 access, making DR unaligned. */ 1553 if (DR_MISALIGNMENT (dr) == 0) 1554 { 1555 npeel_tmp = 0; 1556 if (!flag_vect_cost_model) 1557 possible_npeel_number++; 1558 } 1559 1560 for (j = 0; j < possible_npeel_number; j++) 1561 { 1562 gcc_assert (npeel_tmp <= vf); 1563 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp); 1564 npeel_tmp += nelements; 1565 } 1566 1567 all_misalignments_unknown = false; 1568 /* Data-ref that was chosen for the case that all the 1569 misalignments are unknown is not relevant anymore, since we 1570 have a data-ref with known alignment. */ 1571 dr0 = NULL; 1572 } 1573 else 1574 { 1575 /* If we don't know all the misalignment values, we prefer 1576 peeling for data-ref that has maximum number of data-refs 1577 with the same alignment, unless the target prefers to align 1578 stores over load. */ 1579 if (all_misalignments_unknown) 1580 { 1581 if (same_align_drs_max < VEC_length (dr_p, 1582 STMT_VINFO_SAME_ALIGN_REFS (stmt_info)) 1583 || !dr0) 1584 { 1585 same_align_drs_max = VEC_length (dr_p, 1586 STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); 1587 dr0 = dr; 1588 } 1589 1590 if (!first_store && DR_IS_WRITE (dr)) 1591 first_store = dr; 1592 } 1593 1594 /* If there are both known and unknown misaligned accesses in the 1595 loop, we choose peeling amount according to the known 1596 accesses. */ 1597 1598 1599 if (!supportable_dr_alignment) 1600 { 1601 dr0 = dr; 1602 if (!first_store && DR_IS_WRITE (dr)) 1603 first_store = dr; 1604 } 1605 } 1606 } 1607 else 1608 { 1609 if (!aligned_access_p (dr)) 1610 { 1611 if (vect_print_dump_info (REPORT_DETAILS)) 1612 fprintf (vect_dump, "vector alignment may not be reachable"); 1613 1614 break; 1615 } 1616 } 1617 } 1618 1619 vect_versioning_for_alias_required 1620 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); 1621 1622 /* Temporarily, if versioning for alias is required, we disable peeling 1623 until we support peeling and versioning. Often peeling for alignment 1624 will require peeling for loop-bound, which in turn requires that we 1625 know how to adjust the loop ivs after the loop. */ 1626 if (vect_versioning_for_alias_required 1627 || !vect_can_advance_ivs_p (loop_vinfo) 1628 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) 1629 do_peeling = false; 1630 1631 if (do_peeling && all_misalignments_unknown 1632 && vect_supportable_dr_alignment (dr0, false)) 1633 { 1634 1635 /* Check if the target requires to prefer stores over loads, i.e., if 1636 misaligned stores are more expensive than misaligned loads (taking 1637 drs with same alignment into account). */ 1638 if (first_store && DR_IS_READ (dr0)) 1639 { 1640 unsigned int load_inside_cost = 0, load_outside_cost = 0; 1641 unsigned int store_inside_cost = 0, store_outside_cost = 0; 1642 unsigned int load_inside_penalty = 0, load_outside_penalty = 0; 1643 unsigned int store_inside_penalty = 0, store_outside_penalty = 0; 1644 1645 vect_get_data_access_cost (dr0, &load_inside_cost, 1646 &load_outside_cost); 1647 vect_get_data_access_cost (first_store, &store_inside_cost, 1648 &store_outside_cost); 1649 1650 /* Calculate the penalty for leaving FIRST_STORE unaligned (by 1651 aligning the load DR0). */ 1652 load_inside_penalty = store_inside_cost; 1653 load_outside_penalty = store_outside_cost; 1654 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS 1655 (vinfo_for_stmt (DR_STMT (first_store))), 1656 i, dr); 1657 i++) 1658 if (DR_IS_READ (dr)) 1659 { 1660 load_inside_penalty += load_inside_cost; 1661 load_outside_penalty += load_outside_cost; 1662 } 1663 else 1664 { 1665 load_inside_penalty += store_inside_cost; 1666 load_outside_penalty += store_outside_cost; 1667 } 1668 1669 /* Calculate the penalty for leaving DR0 unaligned (by 1670 aligning the FIRST_STORE). */ 1671 store_inside_penalty = load_inside_cost; 1672 store_outside_penalty = load_outside_cost; 1673 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS 1674 (vinfo_for_stmt (DR_STMT (dr0))), 1675 i, dr); 1676 i++) 1677 if (DR_IS_READ (dr)) 1678 { 1679 store_inside_penalty += load_inside_cost; 1680 store_outside_penalty += load_outside_cost; 1681 } 1682 else 1683 { 1684 store_inside_penalty += store_inside_cost; 1685 store_outside_penalty += store_outside_cost; 1686 } 1687 1688 if (load_inside_penalty > store_inside_penalty 1689 || (load_inside_penalty == store_inside_penalty 1690 && load_outside_penalty > store_outside_penalty)) 1691 dr0 = first_store; 1692 } 1693 1694 /* In case there are only loads with different unknown misalignments, use 1695 peeling only if it may help to align other accesses in the loop. */ 1696 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS 1697 (vinfo_for_stmt (DR_STMT (dr0)))) 1698 && vect_supportable_dr_alignment (dr0, false) 1699 != dr_unaligned_supported) 1700 do_peeling = false; 1701 } 1702 1703 if (do_peeling && !dr0) 1704 { 1705 /* Peeling is possible, but there is no data access that is not supported 1706 unless aligned. So we try to choose the best possible peeling. */ 1707 1708 /* We should get here only if there are drs with known misalignment. */ 1709 gcc_assert (!all_misalignments_unknown); 1710 1711 /* Choose the best peeling from the hash table. */ 1712 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel); 1713 if (!dr0 || !npeel) 1714 do_peeling = false; 1715 } 1716 1717 if (do_peeling) 1718 { 1719 stmt = DR_STMT (dr0); 1720 stmt_info = vinfo_for_stmt (stmt); 1721 vectype = STMT_VINFO_VECTYPE (stmt_info); 1722 nelements = TYPE_VECTOR_SUBPARTS (vectype); 1723 1724 if (known_alignment_for_access_p (dr0)) 1725 { 1726 bool negative = tree_int_cst_compare (DR_STEP (dr0), 1727 size_zero_node) < 0; 1728 if (!npeel) 1729 { 1730 /* Since it's known at compile time, compute the number of 1731 iterations in the peeled loop (the peeling factor) for use in 1732 updating DR_MISALIGNMENT values. The peeling factor is the 1733 vectorization factor minus the misalignment as an element 1734 count. */ 1735 mis = DR_MISALIGNMENT (dr0); 1736 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); 1737 npeel = ((negative ? mis - nelements : nelements - mis) 1738 & (nelements - 1)); 1739 } 1740 1741 /* For interleaved data access every iteration accesses all the 1742 members of the group, therefore we divide the number of iterations 1743 by the group size. */ 1744 stmt_info = vinfo_for_stmt (DR_STMT (dr0)); 1745 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 1746 npeel /= GROUP_SIZE (stmt_info); 1747 1748 if (vect_print_dump_info (REPORT_DETAILS)) 1749 fprintf (vect_dump, "Try peeling by %d", npeel); 1750 } 1751 1752 /* Ensure that all data refs can be vectorized after the peel. */ 1753 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1754 { 1755 int save_misalignment; 1756 1757 if (dr == dr0) 1758 continue; 1759 1760 stmt = DR_STMT (dr); 1761 stmt_info = vinfo_for_stmt (stmt); 1762 /* For interleaving, only the alignment of the first access 1763 matters. */ 1764 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1765 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) 1766 continue; 1767 1768 save_misalignment = DR_MISALIGNMENT (dr); 1769 vect_update_misalignment_for_peel (dr, dr0, npeel); 1770 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); 1771 SET_DR_MISALIGNMENT (dr, save_misalignment); 1772 1773 if (!supportable_dr_alignment) 1774 { 1775 do_peeling = false; 1776 break; 1777 } 1778 } 1779 1780 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0) 1781 { 1782 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1783 if (!stat) 1784 do_peeling = false; 1785 else 1786 return stat; 1787 } 1788 1789 if (do_peeling) 1790 { 1791 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. 1792 If the misalignment of DR_i is identical to that of dr0 then set 1793 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and 1794 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i) 1795 by the peeling factor times the element size of DR_i (MOD the 1796 vectorization factor times the size). Otherwise, the 1797 misalignment of DR_i must be set to unknown. */ 1798 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1799 if (dr != dr0) 1800 vect_update_misalignment_for_peel (dr, dr0, npeel); 1801 1802 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; 1803 if (npeel) 1804 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel; 1805 else 1806 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); 1807 SET_DR_MISALIGNMENT (dr0, 0); 1808 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1809 fprintf (vect_dump, "Alignment of access forced using peeling."); 1810 1811 if (vect_print_dump_info (REPORT_DETAILS)) 1812 fprintf (vect_dump, "Peeling for alignment will be applied."); 1813 1814 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1815 gcc_assert (stat); 1816 return stat; 1817 } 1818 } 1819 1820 1821 /* (2) Versioning to force alignment. */ 1822 1823 /* Try versioning if: 1824 1) flag_tree_vect_loop_version is TRUE 1825 2) optimize loop for speed 1826 3) there is at least one unsupported misaligned data ref with an unknown 1827 misalignment, and 1828 4) all misaligned data refs with a known misalignment are supported, and 1829 5) the number of runtime alignment checks is within reason. */ 1830 1831 do_versioning = 1832 flag_tree_vect_loop_version 1833 && optimize_loop_nest_for_speed_p (loop) 1834 && (!loop->inner); /* FORNOW */ 1835 1836 if (do_versioning) 1837 { 1838 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1839 { 1840 stmt = DR_STMT (dr); 1841 stmt_info = vinfo_for_stmt (stmt); 1842 1843 /* For interleaving, only the alignment of the first access 1844 matters. */ 1845 if (aligned_access_p (dr) 1846 || (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1847 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)) 1848 continue; 1849 1850 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); 1851 1852 if (!supportable_dr_alignment) 1853 { 1854 gimple stmt; 1855 int mask; 1856 tree vectype; 1857 1858 if (known_alignment_for_access_p (dr) 1859 || VEC_length (gimple, 1860 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) 1861 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)) 1862 { 1863 do_versioning = false; 1864 break; 1865 } 1866 1867 stmt = DR_STMT (dr); 1868 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1869 gcc_assert (vectype); 1870 1871 /* The rightmost bits of an aligned address must be zeros. 1872 Construct the mask needed for this test. For example, 1873 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the 1874 mask must be 15 = 0xf. */ 1875 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1; 1876 1877 /* FORNOW: use the same mask to test all potentially unaligned 1878 references in the loop. The vectorizer currently supports 1879 a single vector size, see the reference to 1880 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the 1881 vectorization factor is computed. */ 1882 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) 1883 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); 1884 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; 1885 VEC_safe_push (gimple, heap, 1886 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 1887 DR_STMT (dr)); 1888 } 1889 } 1890 1891 /* Versioning requires at least one misaligned data reference. */ 1892 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) 1893 do_versioning = false; 1894 else if (!do_versioning) 1895 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0); 1896 } 1897 1898 if (do_versioning) 1899 { 1900 VEC(gimple,heap) *may_misalign_stmts 1901 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 1902 gimple stmt; 1903 1904 /* It can now be assumed that the data references in the statements 1905 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version 1906 of the loop being vectorized. */ 1907 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt) 1908 { 1909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1910 dr = STMT_VINFO_DATA_REF (stmt_info); 1911 SET_DR_MISALIGNMENT (dr, 0); 1912 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1913 fprintf (vect_dump, "Alignment of access forced using versioning."); 1914 } 1915 1916 if (vect_print_dump_info (REPORT_DETAILS)) 1917 fprintf (vect_dump, "Versioning for alignment will be applied."); 1918 1919 /* Peeling and versioning can't be done together at this time. */ 1920 gcc_assert (! (do_peeling && do_versioning)); 1921 1922 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1923 gcc_assert (stat); 1924 return stat; 1925 } 1926 1927 /* This point is reached if neither peeling nor versioning is being done. */ 1928 gcc_assert (! (do_peeling || do_versioning)); 1929 1930 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1931 return stat; 1932 } 1933 1934 1935 /* Function vect_find_same_alignment_drs. 1936 1937 Update group and alignment relations according to the chosen 1938 vectorization factor. */ 1939 1940 static void 1941 vect_find_same_alignment_drs (struct data_dependence_relation *ddr, 1942 loop_vec_info loop_vinfo) 1943 { 1944 unsigned int i; 1945 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1946 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1947 struct data_reference *dra = DDR_A (ddr); 1948 struct data_reference *drb = DDR_B (ddr); 1949 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 1950 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 1951 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra)))); 1952 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb)))); 1953 lambda_vector dist_v; 1954 unsigned int loop_depth; 1955 1956 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 1957 return; 1958 1959 if (dra == drb) 1960 return; 1961 1962 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1963 return; 1964 1965 /* Loop-based vectorization and known data dependence. */ 1966 if (DDR_NUM_DIST_VECTS (ddr) == 0) 1967 return; 1968 1969 /* Data-dependence analysis reports a distance vector of zero 1970 for data-references that overlap only in the first iteration 1971 but have different sign step (see PR45764). 1972 So as a sanity check require equal DR_STEP. */ 1973 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) 1974 return; 1975 1976 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); 1977 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v) 1978 { 1979 int dist = dist_v[loop_depth]; 1980 1981 if (vect_print_dump_info (REPORT_DR_DETAILS)) 1982 fprintf (vect_dump, "dependence distance = %d.", dist); 1983 1984 /* Same loop iteration. */ 1985 if (dist == 0 1986 || (dist % vectorization_factor == 0 && dra_size == drb_size)) 1987 { 1988 /* Two references with distance zero have the same alignment. */ 1989 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb); 1990 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra); 1991 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1992 fprintf (vect_dump, "accesses have the same alignment."); 1993 if (vect_print_dump_info (REPORT_DR_DETAILS)) 1994 { 1995 fprintf (vect_dump, "dependence distance modulo vf == 0 between "); 1996 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 1997 fprintf (vect_dump, " and "); 1998 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 1999 } 2000 } 2001 } 2002 } 2003 2004 2005 /* Function vect_analyze_data_refs_alignment 2006 2007 Analyze the alignment of the data-references in the loop. 2008 Return FALSE if a data reference is found that cannot be vectorized. */ 2009 2010 bool 2011 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo, 2012 bb_vec_info bb_vinfo) 2013 { 2014 if (vect_print_dump_info (REPORT_DETAILS)) 2015 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ==="); 2016 2017 /* Mark groups of data references with same alignment using 2018 data dependence information. */ 2019 if (loop_vinfo) 2020 { 2021 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); 2022 struct data_dependence_relation *ddr; 2023 unsigned int i; 2024 2025 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr) 2026 vect_find_same_alignment_drs (ddr, loop_vinfo); 2027 } 2028 2029 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo)) 2030 { 2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2032 fprintf (vect_dump, 2033 "not vectorized: can't calculate alignment for data ref."); 2034 return false; 2035 } 2036 2037 return true; 2038 } 2039 2040 2041 /* Analyze groups of strided accesses: check that DR belongs to a group of 2042 strided accesses of legal size, step, etc. Detect gaps, single element 2043 interleaving, and other special cases. Set strided access info. 2044 Collect groups of strided stores for further use in SLP analysis. */ 2045 2046 static bool 2047 vect_analyze_group_access (struct data_reference *dr) 2048 { 2049 tree step = DR_STEP (dr); 2050 tree scalar_type = TREE_TYPE (DR_REF (dr)); 2051 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); 2052 gimple stmt = DR_STMT (dr); 2053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2056 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); 2057 HOST_WIDE_INT stride, last_accessed_element = 1; 2058 bool slp_impossible = false; 2059 struct loop *loop = NULL; 2060 2061 if (loop_vinfo) 2062 loop = LOOP_VINFO_LOOP (loop_vinfo); 2063 2064 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the 2065 interleaving group (including gaps). */ 2066 stride = dr_step / type_size; 2067 2068 /* Not consecutive access is possible only if it is a part of interleaving. */ 2069 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 2070 { 2071 /* Check if it this DR is a part of interleaving, and is a single 2072 element of the group that is accessed in the loop. */ 2073 2074 /* Gaps are supported only for loads. STEP must be a multiple of the type 2075 size. The size of the group must be a power of 2. */ 2076 if (DR_IS_READ (dr) 2077 && (dr_step % type_size) == 0 2078 && stride > 0 2079 && exact_log2 (stride) != -1) 2080 { 2081 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt; 2082 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; 2083 if (vect_print_dump_info (REPORT_DR_DETAILS)) 2084 { 2085 fprintf (vect_dump, "Detected single element interleaving "); 2086 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 2087 fprintf (vect_dump, " step "); 2088 print_generic_expr (vect_dump, step, TDF_SLIM); 2089 } 2090 2091 if (loop_vinfo) 2092 { 2093 if (vect_print_dump_info (REPORT_DETAILS)) 2094 fprintf (vect_dump, "Data access with gaps requires scalar " 2095 "epilogue loop"); 2096 if (loop->inner) 2097 { 2098 if (vect_print_dump_info (REPORT_DETAILS)) 2099 fprintf (vect_dump, "Peeling for outer loop is not" 2100 " supported"); 2101 return false; 2102 } 2103 2104 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 2105 } 2106 2107 return true; 2108 } 2109 2110 if (vect_print_dump_info (REPORT_DETAILS)) 2111 { 2112 fprintf (vect_dump, "not consecutive access "); 2113 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2114 } 2115 2116 if (bb_vinfo) 2117 { 2118 /* Mark the statement as unvectorizable. */ 2119 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; 2120 return true; 2121 } 2122 2123 return false; 2124 } 2125 2126 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt) 2127 { 2128 /* First stmt in the interleaving chain. Check the chain. */ 2129 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); 2130 struct data_reference *data_ref = dr; 2131 unsigned int count = 1; 2132 tree next_step; 2133 tree prev_init = DR_INIT (data_ref); 2134 gimple prev = stmt; 2135 HOST_WIDE_INT diff, count_in_bytes, gaps = 0; 2136 2137 while (next) 2138 { 2139 /* Skip same data-refs. In case that two or more stmts share 2140 data-ref (supported only for loads), we vectorize only the first 2141 stmt, and the rest get their vectorized loads from the first 2142 one. */ 2143 if (!tree_int_cst_compare (DR_INIT (data_ref), 2144 DR_INIT (STMT_VINFO_DATA_REF ( 2145 vinfo_for_stmt (next))))) 2146 { 2147 if (DR_IS_WRITE (data_ref)) 2148 { 2149 if (vect_print_dump_info (REPORT_DETAILS)) 2150 fprintf (vect_dump, "Two store stmts share the same dr."); 2151 return false; 2152 } 2153 2154 /* Check that there is no load-store dependencies for this loads 2155 to prevent a case of load-store-load to the same location. */ 2156 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next)) 2157 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev))) 2158 { 2159 if (vect_print_dump_info (REPORT_DETAILS)) 2160 fprintf (vect_dump, 2161 "READ_WRITE dependence in interleaving."); 2162 return false; 2163 } 2164 2165 /* For load use the same data-ref load. */ 2166 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; 2167 2168 prev = next; 2169 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); 2170 continue; 2171 } 2172 2173 prev = next; 2174 2175 /* Check that all the accesses have the same STEP. */ 2176 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 2177 if (tree_int_cst_compare (step, next_step)) 2178 { 2179 if (vect_print_dump_info (REPORT_DETAILS)) 2180 fprintf (vect_dump, "not consecutive access in interleaving"); 2181 return false; 2182 } 2183 2184 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); 2185 /* Check that the distance between two accesses is equal to the type 2186 size. Otherwise, we have gaps. */ 2187 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) 2188 - TREE_INT_CST_LOW (prev_init)) / type_size; 2189 if (diff != 1) 2190 { 2191 /* FORNOW: SLP of accesses with gaps is not supported. */ 2192 slp_impossible = true; 2193 if (DR_IS_WRITE (data_ref)) 2194 { 2195 if (vect_print_dump_info (REPORT_DETAILS)) 2196 fprintf (vect_dump, "interleaved store with gaps"); 2197 return false; 2198 } 2199 2200 gaps += diff - 1; 2201 } 2202 2203 last_accessed_element += diff; 2204 2205 /* Store the gap from the previous member of the group. If there is no 2206 gap in the access, GROUP_GAP is always 1. */ 2207 GROUP_GAP (vinfo_for_stmt (next)) = diff; 2208 2209 prev_init = DR_INIT (data_ref); 2210 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); 2211 /* Count the number of data-refs in the chain. */ 2212 count++; 2213 } 2214 2215 /* COUNT is the number of accesses found, we multiply it by the size of 2216 the type to get COUNT_IN_BYTES. */ 2217 count_in_bytes = type_size * count; 2218 2219 /* Check that the size of the interleaving (including gaps) is not 2220 greater than STEP. */ 2221 if (dr_step && dr_step < count_in_bytes + gaps * type_size) 2222 { 2223 if (vect_print_dump_info (REPORT_DETAILS)) 2224 { 2225 fprintf (vect_dump, "interleaving size is greater than step for "); 2226 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 2227 } 2228 return false; 2229 } 2230 2231 /* Check that the size of the interleaving is equal to STEP for stores, 2232 i.e., that there are no gaps. */ 2233 if (dr_step && dr_step != count_in_bytes) 2234 { 2235 if (DR_IS_READ (dr)) 2236 { 2237 slp_impossible = true; 2238 /* There is a gap after the last load in the group. This gap is a 2239 difference between the stride and the number of elements. When 2240 there is no gap, this difference should be 0. */ 2241 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count; 2242 } 2243 else 2244 { 2245 if (vect_print_dump_info (REPORT_DETAILS)) 2246 fprintf (vect_dump, "interleaved store with gaps"); 2247 return false; 2248 } 2249 } 2250 2251 /* Check that STEP is a multiple of type size. */ 2252 if (dr_step && (dr_step % type_size) != 0) 2253 { 2254 if (vect_print_dump_info (REPORT_DETAILS)) 2255 { 2256 fprintf (vect_dump, "step is not a multiple of type size: step "); 2257 print_generic_expr (vect_dump, step, TDF_SLIM); 2258 fprintf (vect_dump, " size "); 2259 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type), 2260 TDF_SLIM); 2261 } 2262 return false; 2263 } 2264 2265 if (stride == 0) 2266 stride = count; 2267 2268 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; 2269 if (vect_print_dump_info (REPORT_DETAILS)) 2270 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride); 2271 2272 /* SLP: create an SLP data structure for every interleaving group of 2273 stores for further analysis in vect_analyse_slp. */ 2274 if (DR_IS_WRITE (dr) && !slp_impossible) 2275 { 2276 if (loop_vinfo) 2277 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), 2278 stmt); 2279 if (bb_vinfo) 2280 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo), 2281 stmt); 2282 } 2283 2284 /* There is a gap in the end of the group. */ 2285 if (stride - last_accessed_element > 0 && loop_vinfo) 2286 { 2287 if (vect_print_dump_info (REPORT_DETAILS)) 2288 fprintf (vect_dump, "Data access with gaps requires scalar " 2289 "epilogue loop"); 2290 if (loop->inner) 2291 { 2292 if (vect_print_dump_info (REPORT_DETAILS)) 2293 fprintf (vect_dump, "Peeling for outer loop is not supported"); 2294 return false; 2295 } 2296 2297 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 2298 } 2299 } 2300 2301 return true; 2302 } 2303 2304 2305 /* Analyze the access pattern of the data-reference DR. 2306 In case of non-consecutive accesses call vect_analyze_group_access() to 2307 analyze groups of strided accesses. */ 2308 2309 static bool 2310 vect_analyze_data_ref_access (struct data_reference *dr) 2311 { 2312 tree step = DR_STEP (dr); 2313 tree scalar_type = TREE_TYPE (DR_REF (dr)); 2314 gimple stmt = DR_STMT (dr); 2315 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2317 struct loop *loop = NULL; 2318 HOST_WIDE_INT dr_step; 2319 2320 if (loop_vinfo) 2321 loop = LOOP_VINFO_LOOP (loop_vinfo); 2322 2323 if (loop_vinfo && !step) 2324 { 2325 if (vect_print_dump_info (REPORT_DETAILS)) 2326 fprintf (vect_dump, "bad data-ref access in loop"); 2327 return false; 2328 } 2329 2330 /* Allow invariant loads in loops. */ 2331 dr_step = TREE_INT_CST_LOW (step); 2332 if (loop_vinfo && dr_step == 0) 2333 { 2334 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL; 2335 if (nested_in_vect_loop_p (loop, stmt)) 2336 { 2337 if (vect_print_dump_info (REPORT_DETAILS)) 2338 fprintf (vect_dump, "zero step in inner loop of nest"); 2339 return false; 2340 } 2341 return DR_IS_READ (dr); 2342 } 2343 2344 if (loop && nested_in_vect_loop_p (loop, stmt)) 2345 { 2346 /* Interleaved accesses are not yet supported within outer-loop 2347 vectorization for references in the inner-loop. */ 2348 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL; 2349 2350 /* For the rest of the analysis we use the outer-loop step. */ 2351 step = STMT_VINFO_DR_STEP (stmt_info); 2352 dr_step = TREE_INT_CST_LOW (step); 2353 2354 if (dr_step == 0) 2355 { 2356 if (vect_print_dump_info (REPORT_ALIGNMENT)) 2357 fprintf (vect_dump, "zero step in outer loop."); 2358 if (DR_IS_READ (dr)) 2359 return true; 2360 else 2361 return false; 2362 } 2363 } 2364 2365 /* Consecutive? */ 2366 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) 2367 || (dr_step < 0 2368 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) 2369 { 2370 /* Mark that it is not interleaving. */ 2371 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL; 2372 return true; 2373 } 2374 2375 if (loop && nested_in_vect_loop_p (loop, stmt)) 2376 { 2377 if (vect_print_dump_info (REPORT_ALIGNMENT)) 2378 fprintf (vect_dump, "strided access in outer loop."); 2379 return false; 2380 } 2381 2382 /* Not consecutive access - check if it's a part of interleaving group. */ 2383 return vect_analyze_group_access (dr); 2384 } 2385 2386 2387 /* Function vect_analyze_data_ref_accesses. 2388 2389 Analyze the access pattern of all the data references in the loop. 2390 2391 FORNOW: the only access pattern that is considered vectorizable is a 2392 simple step 1 (consecutive) access. 2393 2394 FORNOW: handle only arrays and pointer accesses. */ 2395 2396 bool 2397 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 2398 { 2399 unsigned int i; 2400 VEC (data_reference_p, heap) *datarefs; 2401 struct data_reference *dr; 2402 2403 if (vect_print_dump_info (REPORT_DETAILS)) 2404 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ==="); 2405 2406 if (loop_vinfo) 2407 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 2408 else 2409 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 2410 2411 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 2412 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) 2413 && !vect_analyze_data_ref_access (dr)) 2414 { 2415 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2416 fprintf (vect_dump, "not vectorized: complicated access pattern."); 2417 2418 if (bb_vinfo) 2419 { 2420 /* Mark the statement as not vectorizable. */ 2421 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; 2422 continue; 2423 } 2424 else 2425 return false; 2426 } 2427 2428 return true; 2429 } 2430 2431 /* Function vect_prune_runtime_alias_test_list. 2432 2433 Prune a list of ddrs to be tested at run-time by versioning for alias. 2434 Return FALSE if resulting list of ddrs is longer then allowed by 2435 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */ 2436 2437 bool 2438 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) 2439 { 2440 VEC (ddr_p, heap) * ddrs = 2441 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); 2442 unsigned i, j; 2443 2444 if (vect_print_dump_info (REPORT_DETAILS)) 2445 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ==="); 2446 2447 for (i = 0; i < VEC_length (ddr_p, ddrs); ) 2448 { 2449 bool found; 2450 ddr_p ddr_i; 2451 2452 ddr_i = VEC_index (ddr_p, ddrs, i); 2453 found = false; 2454 2455 for (j = 0; j < i; j++) 2456 { 2457 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j); 2458 2459 if (vect_vfa_range_equal (ddr_i, ddr_j)) 2460 { 2461 if (vect_print_dump_info (REPORT_DR_DETAILS)) 2462 { 2463 fprintf (vect_dump, "found equal ranges "); 2464 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM); 2465 fprintf (vect_dump, ", "); 2466 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM); 2467 fprintf (vect_dump, " and "); 2468 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM); 2469 fprintf (vect_dump, ", "); 2470 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM); 2471 } 2472 found = true; 2473 break; 2474 } 2475 } 2476 2477 if (found) 2478 { 2479 VEC_ordered_remove (ddr_p, ddrs, i); 2480 continue; 2481 } 2482 i++; 2483 } 2484 2485 if (VEC_length (ddr_p, ddrs) > 2486 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) 2487 { 2488 if (vect_print_dump_info (REPORT_DR_DETAILS)) 2489 { 2490 fprintf (vect_dump, 2491 "disable versioning for alias - max number of generated " 2492 "checks exceeded."); 2493 } 2494 2495 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0); 2496 2497 return false; 2498 } 2499 2500 return true; 2501 } 2502 2503 /* Check whether a non-affine read in stmt is suitable for gather load 2504 and if so, return a builtin decl for that operation. */ 2505 2506 tree 2507 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep, 2508 tree *offp, int *scalep) 2509 { 2510 HOST_WIDE_INT scale = 1, pbitpos, pbitsize; 2511 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2512 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2513 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2514 tree offtype = NULL_TREE; 2515 tree decl, base, off; 2516 enum machine_mode pmode; 2517 int punsignedp, pvolatilep; 2518 2519 /* The gather builtins need address of the form 2520 loop_invariant + vector * {1, 2, 4, 8} 2521 or 2522 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }. 2523 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture 2524 of loop invariants/SSA_NAMEs defined in the loop, with casts, 2525 multiplications and additions in it. To get a vector, we need 2526 a single SSA_NAME that will be defined in the loop and will 2527 contain everything that is not loop invariant and that can be 2528 vectorized. The following code attempts to find such a preexistng 2529 SSA_NAME OFF and put the loop invariants into a tree BASE 2530 that can be gimplified before the loop. */ 2531 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off, 2532 &pmode, &punsignedp, &pvolatilep, false); 2533 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0); 2534 2535 if (TREE_CODE (base) == MEM_REF) 2536 { 2537 if (!integer_zerop (TREE_OPERAND (base, 1))) 2538 { 2539 if (off == NULL_TREE) 2540 { 2541 double_int moff = mem_ref_offset (base); 2542 off = double_int_to_tree (sizetype, moff); 2543 } 2544 else 2545 off = size_binop (PLUS_EXPR, off, 2546 fold_convert (sizetype, TREE_OPERAND (base, 1))); 2547 } 2548 base = TREE_OPERAND (base, 0); 2549 } 2550 else 2551 base = build_fold_addr_expr (base); 2552 2553 if (off == NULL_TREE) 2554 off = size_zero_node; 2555 2556 /* If base is not loop invariant, either off is 0, then we start with just 2557 the constant offset in the loop invariant BASE and continue with base 2558 as OFF, otherwise give up. 2559 We could handle that case by gimplifying the addition of base + off 2560 into some SSA_NAME and use that as off, but for now punt. */ 2561 if (!expr_invariant_in_loop_p (loop, base)) 2562 { 2563 if (!integer_zerop (off)) 2564 return NULL_TREE; 2565 off = base; 2566 base = size_int (pbitpos / BITS_PER_UNIT); 2567 } 2568 /* Otherwise put base + constant offset into the loop invariant BASE 2569 and continue with OFF. */ 2570 else 2571 { 2572 base = fold_convert (sizetype, base); 2573 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT)); 2574 } 2575 2576 /* OFF at this point may be either a SSA_NAME or some tree expression 2577 from get_inner_reference. Try to peel off loop invariants from it 2578 into BASE as long as possible. */ 2579 STRIP_NOPS (off); 2580 while (offtype == NULL_TREE) 2581 { 2582 enum tree_code code; 2583 tree op0, op1, add = NULL_TREE; 2584 2585 if (TREE_CODE (off) == SSA_NAME) 2586 { 2587 gimple def_stmt = SSA_NAME_DEF_STMT (off); 2588 2589 if (expr_invariant_in_loop_p (loop, off)) 2590 return NULL_TREE; 2591 2592 if (gimple_code (def_stmt) != GIMPLE_ASSIGN) 2593 break; 2594 2595 op0 = gimple_assign_rhs1 (def_stmt); 2596 code = gimple_assign_rhs_code (def_stmt); 2597 op1 = gimple_assign_rhs2 (def_stmt); 2598 } 2599 else 2600 { 2601 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS) 2602 return NULL_TREE; 2603 code = TREE_CODE (off); 2604 extract_ops_from_tree (off, &code, &op0, &op1); 2605 } 2606 switch (code) 2607 { 2608 case POINTER_PLUS_EXPR: 2609 case PLUS_EXPR: 2610 if (expr_invariant_in_loop_p (loop, op0)) 2611 { 2612 add = op0; 2613 off = op1; 2614 do_add: 2615 add = fold_convert (sizetype, add); 2616 if (scale != 1) 2617 add = size_binop (MULT_EXPR, add, size_int (scale)); 2618 base = size_binop (PLUS_EXPR, base, add); 2619 continue; 2620 } 2621 if (expr_invariant_in_loop_p (loop, op1)) 2622 { 2623 add = op1; 2624 off = op0; 2625 goto do_add; 2626 } 2627 break; 2628 case MINUS_EXPR: 2629 if (expr_invariant_in_loop_p (loop, op1)) 2630 { 2631 add = fold_convert (sizetype, op1); 2632 add = size_binop (MINUS_EXPR, size_zero_node, add); 2633 off = op0; 2634 goto do_add; 2635 } 2636 break; 2637 case MULT_EXPR: 2638 if (scale == 1 && host_integerp (op1, 0)) 2639 { 2640 scale = tree_low_cst (op1, 0); 2641 off = op0; 2642 continue; 2643 } 2644 break; 2645 case SSA_NAME: 2646 off = op0; 2647 continue; 2648 CASE_CONVERT: 2649 if (!POINTER_TYPE_P (TREE_TYPE (op0)) 2650 && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) 2651 break; 2652 if (TYPE_PRECISION (TREE_TYPE (op0)) 2653 == TYPE_PRECISION (TREE_TYPE (off))) 2654 { 2655 off = op0; 2656 continue; 2657 } 2658 if (TYPE_PRECISION (TREE_TYPE (op0)) 2659 < TYPE_PRECISION (TREE_TYPE (off))) 2660 { 2661 off = op0; 2662 offtype = TREE_TYPE (off); 2663 STRIP_NOPS (off); 2664 continue; 2665 } 2666 break; 2667 default: 2668 break; 2669 } 2670 break; 2671 } 2672 2673 /* If at the end OFF still isn't a SSA_NAME or isn't 2674 defined in the loop, punt. */ 2675 if (TREE_CODE (off) != SSA_NAME 2676 || expr_invariant_in_loop_p (loop, off)) 2677 return NULL_TREE; 2678 2679 if (offtype == NULL_TREE) 2680 offtype = TREE_TYPE (off); 2681 2682 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info), 2683 offtype, scale); 2684 if (decl == NULL_TREE) 2685 return NULL_TREE; 2686 2687 if (basep) 2688 *basep = base; 2689 if (offp) 2690 *offp = off; 2691 if (scalep) 2692 *scalep = scale; 2693 return decl; 2694 } 2695 2696 2697 /* Function vect_analyze_data_refs. 2698 2699 Find all the data references in the loop or basic block. 2700 2701 The general structure of the analysis of data refs in the vectorizer is as 2702 follows: 2703 1- vect_analyze_data_refs(loop/bb): call 2704 compute_data_dependences_for_loop/bb to find and analyze all data-refs 2705 in the loop/bb and their dependences. 2706 2- vect_analyze_dependences(): apply dependence testing using ddrs. 2707 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. 2708 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. 2709 2710 */ 2711 2712 bool 2713 vect_analyze_data_refs (loop_vec_info loop_vinfo, 2714 bb_vec_info bb_vinfo, 2715 int *min_vf) 2716 { 2717 struct loop *loop = NULL; 2718 basic_block bb = NULL; 2719 unsigned int i; 2720 VEC (data_reference_p, heap) *datarefs; 2721 struct data_reference *dr; 2722 tree scalar_type; 2723 bool res, stop_bb_analysis = false; 2724 2725 if (vect_print_dump_info (REPORT_DETAILS)) 2726 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n"); 2727 2728 if (loop_vinfo) 2729 { 2730 loop = LOOP_VINFO_LOOP (loop_vinfo); 2731 res = compute_data_dependences_for_loop 2732 (loop, true, 2733 &LOOP_VINFO_LOOP_NEST (loop_vinfo), 2734 &LOOP_VINFO_DATAREFS (loop_vinfo), 2735 &LOOP_VINFO_DDRS (loop_vinfo)); 2736 2737 if (!res) 2738 { 2739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2740 fprintf (vect_dump, "not vectorized: loop contains function calls" 2741 " or data references that cannot be analyzed"); 2742 return false; 2743 } 2744 2745 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 2746 } 2747 else 2748 { 2749 bb = BB_VINFO_BB (bb_vinfo); 2750 res = compute_data_dependences_for_bb (bb, true, 2751 &BB_VINFO_DATAREFS (bb_vinfo), 2752 &BB_VINFO_DDRS (bb_vinfo)); 2753 if (!res) 2754 { 2755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2756 fprintf (vect_dump, "not vectorized: basic block contains function" 2757 " calls or data references that cannot be analyzed"); 2758 return false; 2759 } 2760 2761 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 2762 } 2763 2764 /* Go through the data-refs, check that the analysis succeeded. Update 2765 pointer from stmt_vec_info struct to DR and vectype. */ 2766 2767 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 2768 { 2769 gimple stmt; 2770 stmt_vec_info stmt_info; 2771 tree base, offset, init; 2772 bool gather = false; 2773 int vf; 2774 2775 if (!dr || !DR_REF (dr)) 2776 { 2777 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2778 fprintf (vect_dump, "not vectorized: unhandled data-ref "); 2779 2780 return false; 2781 } 2782 2783 stmt = DR_STMT (dr); 2784 stmt_info = vinfo_for_stmt (stmt); 2785 2786 if (stop_bb_analysis) 2787 { 2788 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2789 continue; 2790 } 2791 2792 /* Check that analysis of the data-ref succeeded. */ 2793 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) 2794 || !DR_STEP (dr)) 2795 { 2796 /* If target supports vector gather loads, see if they can't 2797 be used. */ 2798 if (loop_vinfo 2799 && DR_IS_READ (dr) 2800 && !TREE_THIS_VOLATILE (DR_REF (dr)) 2801 && targetm.vectorize.builtin_gather != NULL 2802 && !nested_in_vect_loop_p (loop, stmt)) 2803 { 2804 struct data_reference *newdr 2805 = create_data_ref (NULL, loop_containing_stmt (stmt), 2806 DR_REF (dr), stmt, true); 2807 gcc_assert (newdr != NULL && DR_REF (newdr)); 2808 if (DR_BASE_ADDRESS (newdr) 2809 && DR_OFFSET (newdr) 2810 && DR_INIT (newdr) 2811 && DR_STEP (newdr) 2812 && integer_zerop (DR_STEP (newdr))) 2813 { 2814 dr = newdr; 2815 gather = true; 2816 } 2817 else 2818 free_data_ref (newdr); 2819 } 2820 2821 if (!gather) 2822 { 2823 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2824 { 2825 fprintf (vect_dump, "not vectorized: data ref analysis " 2826 "failed "); 2827 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2828 } 2829 2830 if (bb_vinfo) 2831 { 2832 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2833 stop_bb_analysis = true; 2834 continue; 2835 } 2836 2837 return false; 2838 } 2839 } 2840 2841 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST) 2842 { 2843 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2844 fprintf (vect_dump, "not vectorized: base addr of dr is a " 2845 "constant"); 2846 2847 if (bb_vinfo) 2848 { 2849 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2850 stop_bb_analysis = true; 2851 continue; 2852 } 2853 2854 if (gather) 2855 free_data_ref (dr); 2856 return false; 2857 } 2858 2859 if (TREE_THIS_VOLATILE (DR_REF (dr))) 2860 { 2861 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2862 { 2863 fprintf (vect_dump, "not vectorized: volatile type "); 2864 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2865 } 2866 2867 if (bb_vinfo) 2868 { 2869 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2870 stop_bb_analysis = true; 2871 continue; 2872 } 2873 2874 return false; 2875 } 2876 2877 if (stmt_can_throw_internal (stmt)) 2878 { 2879 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2880 { 2881 fprintf (vect_dump, "not vectorized: statement can throw an " 2882 "exception "); 2883 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2884 } 2885 2886 if (bb_vinfo) 2887 { 2888 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2889 stop_bb_analysis = true; 2890 continue; 2891 } 2892 2893 if (gather) 2894 free_data_ref (dr); 2895 return false; 2896 } 2897 2898 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF 2899 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) 2900 { 2901 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2902 { 2903 fprintf (vect_dump, "not vectorized: statement is bitfield " 2904 "access "); 2905 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2906 } 2907 2908 if (bb_vinfo) 2909 { 2910 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2911 stop_bb_analysis = true; 2912 continue; 2913 } 2914 2915 if (gather) 2916 free_data_ref (dr); 2917 return false; 2918 } 2919 2920 base = unshare_expr (DR_BASE_ADDRESS (dr)); 2921 offset = unshare_expr (DR_OFFSET (dr)); 2922 init = unshare_expr (DR_INIT (dr)); 2923 2924 if (is_gimple_call (stmt)) 2925 { 2926 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2927 { 2928 fprintf (vect_dump, "not vectorized: dr in a call "); 2929 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2930 } 2931 2932 if (bb_vinfo) 2933 { 2934 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 2935 stop_bb_analysis = true; 2936 continue; 2937 } 2938 2939 if (gather) 2940 free_data_ref (dr); 2941 return false; 2942 } 2943 2944 /* Update DR field in stmt_vec_info struct. */ 2945 2946 /* If the dataref is in an inner-loop of the loop that is considered for 2947 for vectorization, we also want to analyze the access relative to 2948 the outer-loop (DR contains information only relative to the 2949 inner-most enclosing loop). We do that by building a reference to the 2950 first location accessed by the inner-loop, and analyze it relative to 2951 the outer-loop. */ 2952 if (loop && nested_in_vect_loop_p (loop, stmt)) 2953 { 2954 tree outer_step, outer_base, outer_init; 2955 HOST_WIDE_INT pbitsize, pbitpos; 2956 tree poffset; 2957 enum machine_mode pmode; 2958 int punsignedp, pvolatilep; 2959 affine_iv base_iv, offset_iv; 2960 tree dinit; 2961 2962 /* Build a reference to the first location accessed by the 2963 inner-loop: *(BASE+INIT). (The first location is actually 2964 BASE+INIT+OFFSET, but we add OFFSET separately later). */ 2965 tree inner_base = build_fold_indirect_ref 2966 (fold_build_pointer_plus (base, init)); 2967 2968 if (vect_print_dump_info (REPORT_DETAILS)) 2969 { 2970 fprintf (vect_dump, "analyze in outer-loop: "); 2971 print_generic_expr (vect_dump, inner_base, TDF_SLIM); 2972 } 2973 2974 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos, 2975 &poffset, &pmode, &punsignedp, &pvolatilep, false); 2976 gcc_assert (outer_base != NULL_TREE); 2977 2978 if (pbitpos % BITS_PER_UNIT != 0) 2979 { 2980 if (vect_print_dump_info (REPORT_DETAILS)) 2981 fprintf (vect_dump, "failed: bit offset alignment.\n"); 2982 return false; 2983 } 2984 2985 outer_base = build_fold_addr_expr (outer_base); 2986 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base, 2987 &base_iv, false)) 2988 { 2989 if (vect_print_dump_info (REPORT_DETAILS)) 2990 fprintf (vect_dump, "failed: evolution of base is not affine.\n"); 2991 return false; 2992 } 2993 2994 if (offset) 2995 { 2996 if (poffset) 2997 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, 2998 poffset); 2999 else 3000 poffset = offset; 3001 } 3002 3003 if (!poffset) 3004 { 3005 offset_iv.base = ssize_int (0); 3006 offset_iv.step = ssize_int (0); 3007 } 3008 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset, 3009 &offset_iv, false)) 3010 { 3011 if (vect_print_dump_info (REPORT_DETAILS)) 3012 fprintf (vect_dump, "evolution of offset is not affine.\n"); 3013 return false; 3014 } 3015 3016 outer_init = ssize_int (pbitpos / BITS_PER_UNIT); 3017 split_constant_offset (base_iv.base, &base_iv.base, &dinit); 3018 outer_init = size_binop (PLUS_EXPR, outer_init, dinit); 3019 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 3020 outer_init = size_binop (PLUS_EXPR, outer_init, dinit); 3021 3022 outer_step = size_binop (PLUS_EXPR, 3023 fold_convert (ssizetype, base_iv.step), 3024 fold_convert (ssizetype, offset_iv.step)); 3025 3026 STMT_VINFO_DR_STEP (stmt_info) = outer_step; 3027 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */ 3028 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base; 3029 STMT_VINFO_DR_INIT (stmt_info) = outer_init; 3030 STMT_VINFO_DR_OFFSET (stmt_info) = 3031 fold_convert (ssizetype, offset_iv.base); 3032 STMT_VINFO_DR_ALIGNED_TO (stmt_info) = 3033 size_int (highest_pow2_factor (offset_iv.base)); 3034 3035 if (vect_print_dump_info (REPORT_DETAILS)) 3036 { 3037 fprintf (vect_dump, "\touter base_address: "); 3038 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM); 3039 fprintf (vect_dump, "\n\touter offset from base address: "); 3040 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM); 3041 fprintf (vect_dump, "\n\touter constant offset from base address: "); 3042 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM); 3043 fprintf (vect_dump, "\n\touter step: "); 3044 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM); 3045 fprintf (vect_dump, "\n\touter aligned to: "); 3046 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM); 3047 } 3048 } 3049 3050 if (STMT_VINFO_DATA_REF (stmt_info)) 3051 { 3052 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 3053 { 3054 fprintf (vect_dump, 3055 "not vectorized: more than one data ref in stmt: "); 3056 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 3057 } 3058 3059 if (bb_vinfo) 3060 { 3061 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 3062 stop_bb_analysis = true; 3063 continue; 3064 } 3065 3066 if (gather) 3067 free_data_ref (dr); 3068 return false; 3069 } 3070 3071 STMT_VINFO_DATA_REF (stmt_info) = dr; 3072 3073 /* Set vectype for STMT. */ 3074 scalar_type = TREE_TYPE (DR_REF (dr)); 3075 STMT_VINFO_VECTYPE (stmt_info) = 3076 get_vectype_for_scalar_type (scalar_type); 3077 if (!STMT_VINFO_VECTYPE (stmt_info)) 3078 { 3079 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 3080 { 3081 fprintf (vect_dump, 3082 "not vectorized: no vectype for stmt: "); 3083 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 3084 fprintf (vect_dump, " scalar_type: "); 3085 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS); 3086 } 3087 3088 if (bb_vinfo) 3089 { 3090 /* Mark the statement as not vectorizable. */ 3091 STMT_VINFO_VECTORIZABLE (stmt_info) = false; 3092 stop_bb_analysis = true; 3093 continue; 3094 } 3095 3096 if (gather) 3097 { 3098 STMT_VINFO_DATA_REF (stmt_info) = NULL; 3099 free_data_ref (dr); 3100 } 3101 return false; 3102 } 3103 3104 /* Adjust the minimal vectorization factor according to the 3105 vector type. */ 3106 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); 3107 if (vf > *min_vf) 3108 *min_vf = vf; 3109 3110 if (gather) 3111 { 3112 unsigned int j, k, n; 3113 struct data_reference *olddr 3114 = VEC_index (data_reference_p, datarefs, i); 3115 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); 3116 struct data_dependence_relation *ddr, *newddr; 3117 bool bad = false; 3118 tree off; 3119 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo); 3120 3121 if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL) 3122 || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE) 3123 { 3124 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 3125 { 3126 fprintf (vect_dump, 3127 "not vectorized: not suitable for gather "); 3128 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 3129 } 3130 return false; 3131 } 3132 3133 n = VEC_length (data_reference_p, datarefs) - 1; 3134 for (j = 0, k = i - 1; j < i; j++) 3135 { 3136 ddr = VEC_index (ddr_p, ddrs, k); 3137 gcc_assert (DDR_B (ddr) == olddr); 3138 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr, 3139 nest); 3140 VEC_replace (ddr_p, ddrs, k, newddr); 3141 free_dependence_relation (ddr); 3142 if (!bad 3143 && DR_IS_WRITE (DDR_A (newddr)) 3144 && DDR_ARE_DEPENDENT (newddr) != chrec_known) 3145 bad = true; 3146 k += --n; 3147 } 3148 3149 k++; 3150 n = k + VEC_length (data_reference_p, datarefs) - i - 1; 3151 for (; k < n; k++) 3152 { 3153 ddr = VEC_index (ddr_p, ddrs, k); 3154 gcc_assert (DDR_A (ddr) == olddr); 3155 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr), 3156 nest); 3157 VEC_replace (ddr_p, ddrs, k, newddr); 3158 free_dependence_relation (ddr); 3159 if (!bad 3160 && DR_IS_WRITE (DDR_B (newddr)) 3161 && DDR_ARE_DEPENDENT (newddr) != chrec_known) 3162 bad = true; 3163 } 3164 3165 k = VEC_length (ddr_p, ddrs) 3166 - VEC_length (data_reference_p, datarefs) + i; 3167 ddr = VEC_index (ddr_p, ddrs, k); 3168 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr); 3169 newddr = initialize_data_dependence_relation (dr, dr, nest); 3170 VEC_replace (ddr_p, ddrs, k, newddr); 3171 free_dependence_relation (ddr); 3172 VEC_replace (data_reference_p, datarefs, i, dr); 3173 3174 if (bad) 3175 { 3176 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 3177 { 3178 fprintf (vect_dump, 3179 "not vectorized: data dependence conflict" 3180 " prevents gather"); 3181 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 3182 } 3183 return false; 3184 } 3185 3186 STMT_VINFO_GATHER_P (stmt_info) = true; 3187 } 3188 } 3189 3190 return true; 3191 } 3192 3193 3194 /* Function vect_get_new_vect_var. 3195 3196 Returns a name for a new variable. The current naming scheme appends the 3197 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to 3198 the name of vectorizer generated variables, and appends that to NAME if 3199 provided. */ 3200 3201 tree 3202 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) 3203 { 3204 const char *prefix; 3205 tree new_vect_var; 3206 3207 switch (var_kind) 3208 { 3209 case vect_simple_var: 3210 prefix = "vect_"; 3211 break; 3212 case vect_scalar_var: 3213 prefix = "stmp_"; 3214 break; 3215 case vect_pointer_var: 3216 prefix = "vect_p"; 3217 break; 3218 default: 3219 gcc_unreachable (); 3220 } 3221 3222 if (name) 3223 { 3224 char* tmp = concat (prefix, name, NULL); 3225 new_vect_var = create_tmp_var (type, tmp); 3226 free (tmp); 3227 } 3228 else 3229 new_vect_var = create_tmp_var (type, prefix); 3230 3231 /* Mark vector typed variable as a gimple register variable. */ 3232 if (TREE_CODE (type) == VECTOR_TYPE) 3233 DECL_GIMPLE_REG_P (new_vect_var) = true; 3234 3235 return new_vect_var; 3236 } 3237 3238 3239 /* Function vect_create_addr_base_for_vector_ref. 3240 3241 Create an expression that computes the address of the first memory location 3242 that will be accessed for a data reference. 3243 3244 Input: 3245 STMT: The statement containing the data reference. 3246 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. 3247 OFFSET: Optional. If supplied, it is be added to the initial address. 3248 LOOP: Specify relative to which loop-nest should the address be computed. 3249 For example, when the dataref is in an inner-loop nested in an 3250 outer-loop that is now being vectorized, LOOP can be either the 3251 outer-loop, or the inner-loop. The first memory location accessed 3252 by the following dataref ('in' points to short): 3253 3254 for (i=0; i<N; i++) 3255 for (j=0; j<M; j++) 3256 s += in[i+j] 3257 3258 is as follows: 3259 if LOOP=i_loop: &in (relative to i_loop) 3260 if LOOP=j_loop: &in+i*2B (relative to j_loop) 3261 3262 Output: 3263 1. Return an SSA_NAME whose value is the address of the memory location of 3264 the first vector of the data reference. 3265 2. If new_stmt_list is not NULL_TREE after return then the caller must insert 3266 these statement(s) which define the returned SSA_NAME. 3267 3268 FORNOW: We are only handling array accesses with step 1. */ 3269 3270 tree 3271 vect_create_addr_base_for_vector_ref (gimple stmt, 3272 gimple_seq *new_stmt_list, 3273 tree offset, 3274 struct loop *loop) 3275 { 3276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3277 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3278 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr)); 3279 tree base_name; 3280 tree data_ref_base_var; 3281 tree vec_stmt; 3282 tree addr_base, addr_expr; 3283 tree dest; 3284 gimple_seq seq = NULL; 3285 tree base_offset = unshare_expr (DR_OFFSET (dr)); 3286 tree init = unshare_expr (DR_INIT (dr)); 3287 tree vect_ptr_type; 3288 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); 3289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3290 tree base; 3291 3292 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father) 3293 { 3294 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo); 3295 3296 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt)); 3297 3298 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info)); 3299 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info)); 3300 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info)); 3301 } 3302 3303 if (loop_vinfo) 3304 base_name = build_fold_indirect_ref (data_ref_base); 3305 else 3306 { 3307 base_offset = ssize_int (0); 3308 init = ssize_int (0); 3309 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr))); 3310 } 3311 3312 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp"); 3313 add_referenced_var (data_ref_base_var); 3314 data_ref_base = force_gimple_operand (data_ref_base, &seq, true, 3315 data_ref_base_var); 3316 gimple_seq_add_seq (new_stmt_list, seq); 3317 3318 /* Create base_offset */ 3319 base_offset = size_binop (PLUS_EXPR, 3320 fold_convert (sizetype, base_offset), 3321 fold_convert (sizetype, init)); 3322 dest = create_tmp_var (sizetype, "base_off"); 3323 add_referenced_var (dest); 3324 base_offset = force_gimple_operand (base_offset, &seq, true, dest); 3325 gimple_seq_add_seq (new_stmt_list, seq); 3326 3327 if (offset) 3328 { 3329 tree tmp = create_tmp_var (sizetype, "offset"); 3330 3331 add_referenced_var (tmp); 3332 offset = fold_build2 (MULT_EXPR, sizetype, 3333 fold_convert (sizetype, offset), step); 3334 base_offset = fold_build2 (PLUS_EXPR, sizetype, 3335 base_offset, offset); 3336 base_offset = force_gimple_operand (base_offset, &seq, false, tmp); 3337 gimple_seq_add_seq (new_stmt_list, seq); 3338 } 3339 3340 /* base + base_offset */ 3341 if (loop_vinfo) 3342 addr_base = fold_build_pointer_plus (data_ref_base, base_offset); 3343 else 3344 { 3345 addr_base = build1 (ADDR_EXPR, 3346 build_pointer_type (TREE_TYPE (DR_REF (dr))), 3347 unshare_expr (DR_REF (dr))); 3348 } 3349 3350 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info)); 3351 base = get_base_address (DR_REF (dr)); 3352 if (base 3353 && TREE_CODE (base) == MEM_REF) 3354 vect_ptr_type 3355 = build_qualified_type (vect_ptr_type, 3356 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0)))); 3357 3358 vec_stmt = fold_convert (vect_ptr_type, addr_base); 3359 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 3360 get_name (base_name)); 3361 add_referenced_var (addr_expr); 3362 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr); 3363 gimple_seq_add_seq (new_stmt_list, seq); 3364 3365 if (DR_PTR_INFO (dr) 3366 && TREE_CODE (vec_stmt) == SSA_NAME) 3367 { 3368 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr)); 3369 if (offset) 3370 { 3371 SSA_NAME_PTR_INFO (vec_stmt)->align = 1; 3372 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0; 3373 } 3374 } 3375 3376 if (vect_print_dump_info (REPORT_DETAILS)) 3377 { 3378 fprintf (vect_dump, "created "); 3379 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 3380 } 3381 3382 return vec_stmt; 3383 } 3384 3385 3386 /* Function vect_create_data_ref_ptr. 3387 3388 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first 3389 location accessed in the loop by STMT, along with the def-use update 3390 chain to appropriately advance the pointer through the loop iterations. 3391 Also set aliasing information for the pointer. This pointer is used by 3392 the callers to this function to create a memory reference expression for 3393 vector load/store access. 3394 3395 Input: 3396 1. STMT: a stmt that references memory. Expected to be of the form 3397 GIMPLE_ASSIGN <name, data-ref> or 3398 GIMPLE_ASSIGN <data-ref, name>. 3399 2. AGGR_TYPE: the type of the reference, which should be either a vector 3400 or an array. 3401 3. AT_LOOP: the loop where the vector memref is to be created. 3402 4. OFFSET (optional): an offset to be added to the initial address accessed 3403 by the data-ref in STMT. 3404 5. BSI: location where the new stmts are to be placed if there is no loop 3405 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain 3406 pointing to the initial address. 3407 3408 Output: 3409 1. Declare a new ptr to vector_type, and have it point to the base of the 3410 data reference (initial addressed accessed by the data reference). 3411 For example, for vector of type V8HI, the following code is generated: 3412 3413 v8hi *ap; 3414 ap = (v8hi *)initial_address; 3415 3416 if OFFSET is not supplied: 3417 initial_address = &a[init]; 3418 if OFFSET is supplied: 3419 initial_address = &a[init + OFFSET]; 3420 3421 Return the initial_address in INITIAL_ADDRESS. 3422 3423 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also 3424 update the pointer in each iteration of the loop. 3425 3426 Return the increment stmt that updates the pointer in PTR_INCR. 3427 3428 3. Set INV_P to true if the access pattern of the data reference in the 3429 vectorized loop is invariant. Set it to false otherwise. 3430 3431 4. Return the pointer. */ 3432 3433 tree 3434 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop, 3435 tree offset, tree *initial_address, 3436 gimple_stmt_iterator *gsi, gimple *ptr_incr, 3437 bool only_init, bool *inv_p) 3438 { 3439 tree base_name; 3440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3441 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3442 struct loop *loop = NULL; 3443 bool nested_in_vect_loop = false; 3444 struct loop *containing_loop = NULL; 3445 tree aggr_ptr_type; 3446 tree aggr_ptr; 3447 tree new_temp; 3448 gimple vec_stmt; 3449 gimple_seq new_stmt_list = NULL; 3450 edge pe = NULL; 3451 basic_block new_bb; 3452 tree aggr_ptr_init; 3453 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3454 tree aptr; 3455 gimple_stmt_iterator incr_gsi; 3456 bool insert_after; 3457 bool negative; 3458 tree indx_before_incr, indx_after_incr; 3459 gimple incr; 3460 tree step; 3461 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3462 tree base; 3463 3464 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE 3465 || TREE_CODE (aggr_type) == VECTOR_TYPE); 3466 3467 if (loop_vinfo) 3468 { 3469 loop = LOOP_VINFO_LOOP (loop_vinfo); 3470 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 3471 containing_loop = (gimple_bb (stmt))->loop_father; 3472 pe = loop_preheader_edge (loop); 3473 } 3474 else 3475 { 3476 gcc_assert (bb_vinfo); 3477 only_init = true; 3478 *ptr_incr = NULL; 3479 } 3480 3481 /* Check the step (evolution) of the load in LOOP, and record 3482 whether it's invariant. */ 3483 if (nested_in_vect_loop) 3484 step = STMT_VINFO_DR_STEP (stmt_info); 3485 else 3486 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info)); 3487 3488 if (tree_int_cst_compare (step, size_zero_node) == 0) 3489 *inv_p = true; 3490 else 3491 *inv_p = false; 3492 negative = tree_int_cst_compare (step, size_zero_node) < 0; 3493 3494 /* Create an expression for the first address accessed by this load 3495 in LOOP. */ 3496 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr))); 3497 3498 if (vect_print_dump_info (REPORT_DETAILS)) 3499 { 3500 tree data_ref_base = base_name; 3501 fprintf (vect_dump, "create %s-pointer variable to type: ", 3502 tree_code_name[(int) TREE_CODE (aggr_type)]); 3503 print_generic_expr (vect_dump, aggr_type, TDF_SLIM); 3504 if (TREE_CODE (data_ref_base) == VAR_DECL 3505 || TREE_CODE (data_ref_base) == ARRAY_REF) 3506 fprintf (vect_dump, " vectorizing an array ref: "); 3507 else if (TREE_CODE (data_ref_base) == COMPONENT_REF) 3508 fprintf (vect_dump, " vectorizing a record based array ref: "); 3509 else if (TREE_CODE (data_ref_base) == SSA_NAME) 3510 fprintf (vect_dump, " vectorizing a pointer ref: "); 3511 print_generic_expr (vect_dump, base_name, TDF_SLIM); 3512 } 3513 3514 /* (1) Create the new aggregate-pointer variable. */ 3515 aggr_ptr_type = build_pointer_type (aggr_type); 3516 base = get_base_address (DR_REF (dr)); 3517 if (base 3518 && TREE_CODE (base) == MEM_REF) 3519 aggr_ptr_type 3520 = build_qualified_type (aggr_ptr_type, 3521 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0)))); 3522 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, 3523 get_name (base_name)); 3524 3525 /* Vector and array types inherit the alias set of their component 3526 type by default so we need to use a ref-all pointer if the data 3527 reference does not conflict with the created aggregated data 3528 reference because it is not addressable. */ 3529 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), 3530 get_alias_set (DR_REF (dr)))) 3531 { 3532 aggr_ptr_type 3533 = build_pointer_type_for_mode (aggr_type, 3534 TYPE_MODE (aggr_ptr_type), true); 3535 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, 3536 get_name (base_name)); 3537 } 3538 3539 /* Likewise for any of the data references in the stmt group. */ 3540 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1) 3541 { 3542 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info); 3543 do 3544 { 3545 tree lhs = gimple_assign_lhs (orig_stmt); 3546 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), 3547 get_alias_set (lhs))) 3548 { 3549 aggr_ptr_type 3550 = build_pointer_type_for_mode (aggr_type, 3551 TYPE_MODE (aggr_ptr_type), true); 3552 aggr_ptr 3553 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, 3554 get_name (base_name)); 3555 break; 3556 } 3557 3558 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt)); 3559 } 3560 while (orig_stmt); 3561 } 3562 3563 add_referenced_var (aggr_ptr); 3564 3565 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are 3566 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two 3567 def-use update cycles for the pointer: one relative to the outer-loop 3568 (LOOP), which is what steps (3) and (4) below do. The other is relative 3569 to the inner-loop (which is the inner-most loop containing the dataref), 3570 and this is done be step (5) below. 3571 3572 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the 3573 inner-most loop, and so steps (3),(4) work the same, and step (5) is 3574 redundant. Steps (3),(4) create the following: 3575 3576 vp0 = &base_addr; 3577 LOOP: vp1 = phi(vp0,vp2) 3578 ... 3579 ... 3580 vp2 = vp1 + step 3581 goto LOOP 3582 3583 If there is an inner-loop nested in loop, then step (5) will also be 3584 applied, and an additional update in the inner-loop will be created: 3585 3586 vp0 = &base_addr; 3587 LOOP: vp1 = phi(vp0,vp2) 3588 ... 3589 inner: vp3 = phi(vp1,vp4) 3590 vp4 = vp3 + inner_step 3591 if () goto inner 3592 ... 3593 vp2 = vp1 + step 3594 if () goto LOOP */ 3595 3596 /* (2) Calculate the initial address of the aggregate-pointer, and set 3597 the aggregate-pointer to point to it before the loop. */ 3598 3599 /* Create: (&(base[init_val+offset]) in the loop preheader. */ 3600 3601 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, 3602 offset, loop); 3603 if (new_stmt_list) 3604 { 3605 if (pe) 3606 { 3607 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list); 3608 gcc_assert (!new_bb); 3609 } 3610 else 3611 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT); 3612 } 3613 3614 *initial_address = new_temp; 3615 3616 /* Create: p = (aggr_type *) initial_base */ 3617 if (TREE_CODE (new_temp) != SSA_NAME 3618 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp))) 3619 { 3620 vec_stmt = gimple_build_assign (aggr_ptr, 3621 fold_convert (aggr_ptr_type, new_temp)); 3622 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt); 3623 /* Copy the points-to information if it exists. */ 3624 if (DR_PTR_INFO (dr)) 3625 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr)); 3626 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init); 3627 if (pe) 3628 { 3629 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt); 3630 gcc_assert (!new_bb); 3631 } 3632 else 3633 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 3634 } 3635 else 3636 aggr_ptr_init = new_temp; 3637 3638 /* (3) Handle the updating of the aggregate-pointer inside the loop. 3639 This is needed when ONLY_INIT is false, and also when AT_LOOP is the 3640 inner-loop nested in LOOP (during outer-loop vectorization). */ 3641 3642 /* No update in loop is required. */ 3643 if (only_init && (!loop_vinfo || at_loop == loop)) 3644 aptr = aggr_ptr_init; 3645 else 3646 { 3647 /* The step of the aggregate pointer is the type size. */ 3648 tree step = TYPE_SIZE_UNIT (aggr_type); 3649 /* One exception to the above is when the scalar step of the load in 3650 LOOP is zero. In this case the step here is also zero. */ 3651 if (*inv_p) 3652 step = size_zero_node; 3653 else if (negative) 3654 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 3655 3656 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 3657 3658 create_iv (aggr_ptr_init, 3659 fold_convert (aggr_ptr_type, step), 3660 aggr_ptr, loop, &incr_gsi, insert_after, 3661 &indx_before_incr, &indx_after_incr); 3662 incr = gsi_stmt (incr_gsi); 3663 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 3664 3665 /* Copy the points-to information if it exists. */ 3666 if (DR_PTR_INFO (dr)) 3667 { 3668 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 3669 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 3670 } 3671 if (ptr_incr) 3672 *ptr_incr = incr; 3673 3674 aptr = indx_before_incr; 3675 } 3676 3677 if (!nested_in_vect_loop || only_init) 3678 return aptr; 3679 3680 3681 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop 3682 nested in LOOP, if exists. */ 3683 3684 gcc_assert (nested_in_vect_loop); 3685 if (!only_init) 3686 { 3687 standard_iv_increment_position (containing_loop, &incr_gsi, 3688 &insert_after); 3689 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr, 3690 containing_loop, &incr_gsi, insert_after, &indx_before_incr, 3691 &indx_after_incr); 3692 incr = gsi_stmt (incr_gsi); 3693 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 3694 3695 /* Copy the points-to information if it exists. */ 3696 if (DR_PTR_INFO (dr)) 3697 { 3698 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 3699 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 3700 } 3701 if (ptr_incr) 3702 *ptr_incr = incr; 3703 3704 return indx_before_incr; 3705 } 3706 else 3707 gcc_unreachable (); 3708 } 3709 3710 3711 /* Function bump_vector_ptr 3712 3713 Increment a pointer (to a vector type) by vector-size. If requested, 3714 i.e. if PTR-INCR is given, then also connect the new increment stmt 3715 to the existing def-use update-chain of the pointer, by modifying 3716 the PTR_INCR as illustrated below: 3717 3718 The pointer def-use update-chain before this function: 3719 DATAREF_PTR = phi (p_0, p_2) 3720 .... 3721 PTR_INCR: p_2 = DATAREF_PTR + step 3722 3723 The pointer def-use update-chain after this function: 3724 DATAREF_PTR = phi (p_0, p_2) 3725 .... 3726 NEW_DATAREF_PTR = DATAREF_PTR + BUMP 3727 .... 3728 PTR_INCR: p_2 = NEW_DATAREF_PTR + step 3729 3730 Input: 3731 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated 3732 in the loop. 3733 PTR_INCR - optional. The stmt that updates the pointer in each iteration of 3734 the loop. The increment amount across iterations is expected 3735 to be vector_size. 3736 BSI - location where the new update stmt is to be placed. 3737 STMT - the original scalar memory-access stmt that is being vectorized. 3738 BUMP - optional. The offset by which to bump the pointer. If not given, 3739 the offset is assumed to be vector_size. 3740 3741 Output: Return NEW_DATAREF_PTR as illustrated above. 3742 3743 */ 3744 3745 tree 3746 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi, 3747 gimple stmt, tree bump) 3748 { 3749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3750 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3751 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3752 tree ptr_var = SSA_NAME_VAR (dataref_ptr); 3753 tree update = TYPE_SIZE_UNIT (vectype); 3754 gimple incr_stmt; 3755 ssa_op_iter iter; 3756 use_operand_p use_p; 3757 tree new_dataref_ptr; 3758 3759 if (bump) 3760 update = bump; 3761 3762 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var, 3763 dataref_ptr, update); 3764 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt); 3765 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr); 3766 vect_finish_stmt_generation (stmt, incr_stmt, gsi); 3767 3768 /* Copy the points-to information if it exists. */ 3769 if (DR_PTR_INFO (dr)) 3770 { 3771 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr)); 3772 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1; 3773 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0; 3774 } 3775 3776 if (!ptr_incr) 3777 return new_dataref_ptr; 3778 3779 /* Update the vector-pointer's cross-iteration increment. */ 3780 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE) 3781 { 3782 tree use = USE_FROM_PTR (use_p); 3783 3784 if (use == dataref_ptr) 3785 SET_USE (use_p, new_dataref_ptr); 3786 else 3787 gcc_assert (tree_int_cst_compare (use, update) == 0); 3788 } 3789 3790 return new_dataref_ptr; 3791 } 3792 3793 3794 /* Function vect_create_destination_var. 3795 3796 Create a new temporary of type VECTYPE. */ 3797 3798 tree 3799 vect_create_destination_var (tree scalar_dest, tree vectype) 3800 { 3801 tree vec_dest; 3802 const char *new_name; 3803 tree type; 3804 enum vect_var_kind kind; 3805 3806 kind = vectype ? vect_simple_var : vect_scalar_var; 3807 type = vectype ? vectype : TREE_TYPE (scalar_dest); 3808 3809 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); 3810 3811 new_name = get_name (scalar_dest); 3812 if (!new_name) 3813 new_name = "var_"; 3814 vec_dest = vect_get_new_vect_var (type, kind, new_name); 3815 add_referenced_var (vec_dest); 3816 3817 return vec_dest; 3818 } 3819 3820 /* Function vect_strided_store_supported. 3821 3822 Returns TRUE if interleave high and interleave low permutations 3823 are supported, and FALSE otherwise. */ 3824 3825 bool 3826 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) 3827 { 3828 enum machine_mode mode = TYPE_MODE (vectype); 3829 3830 /* vect_permute_store_chain requires the group size to be a power of two. */ 3831 if (exact_log2 (count) == -1) 3832 { 3833 if (vect_print_dump_info (REPORT_DETAILS)) 3834 fprintf (vect_dump, "the size of the group of strided accesses" 3835 " is not a power of 2"); 3836 return false; 3837 } 3838 3839 /* Check that the permutation is supported. */ 3840 if (VECTOR_MODE_P (mode)) 3841 { 3842 unsigned int i, nelt = GET_MODE_NUNITS (mode); 3843 unsigned char *sel = XALLOCAVEC (unsigned char, nelt); 3844 for (i = 0; i < nelt / 2; i++) 3845 { 3846 sel[i * 2] = i; 3847 sel[i * 2 + 1] = i + nelt; 3848 } 3849 if (can_vec_perm_p (mode, false, sel)) 3850 { 3851 for (i = 0; i < nelt; i++) 3852 sel[i] += nelt / 2; 3853 if (can_vec_perm_p (mode, false, sel)) 3854 return true; 3855 } 3856 } 3857 3858 if (vect_print_dump_info (REPORT_DETAILS)) 3859 fprintf (vect_dump, "interleave op not supported by target."); 3860 return false; 3861 } 3862 3863 3864 /* Return TRUE if vec_store_lanes is available for COUNT vectors of 3865 type VECTYPE. */ 3866 3867 bool 3868 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) 3869 { 3870 return vect_lanes_optab_supported_p ("vec_store_lanes", 3871 vec_store_lanes_optab, 3872 vectype, count); 3873 } 3874 3875 3876 /* Function vect_permute_store_chain. 3877 3878 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be 3879 a power of 2, generate interleave_high/low stmts to reorder the data 3880 correctly for the stores. Return the final references for stores in 3881 RESULT_CHAIN. 3882 3883 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. 3884 The input is 4 vectors each containing 8 elements. We assign a number to 3885 each element, the input sequence is: 3886 3887 1st vec: 0 1 2 3 4 5 6 7 3888 2nd vec: 8 9 10 11 12 13 14 15 3889 3rd vec: 16 17 18 19 20 21 22 23 3890 4th vec: 24 25 26 27 28 29 30 31 3891 3892 The output sequence should be: 3893 3894 1st vec: 0 8 16 24 1 9 17 25 3895 2nd vec: 2 10 18 26 3 11 19 27 3896 3rd vec: 4 12 20 28 5 13 21 30 3897 4th vec: 6 14 22 30 7 15 23 31 3898 3899 i.e., we interleave the contents of the four vectors in their order. 3900 3901 We use interleave_high/low instructions to create such output. The input of 3902 each interleave_high/low operation is two vectors: 3903 1st vec 2nd vec 3904 0 1 2 3 4 5 6 7 3905 the even elements of the result vector are obtained left-to-right from the 3906 high/low elements of the first vector. The odd elements of the result are 3907 obtained left-to-right from the high/low elements of the second vector. 3908 The output of interleave_high will be: 0 4 1 5 3909 and of interleave_low: 2 6 3 7 3910 3911 3912 The permutation is done in log LENGTH stages. In each stage interleave_high 3913 and interleave_low stmts are created for each pair of vectors in DR_CHAIN, 3914 where the first argument is taken from the first half of DR_CHAIN and the 3915 second argument from it's second half. 3916 In our example, 3917 3918 I1: interleave_high (1st vec, 3rd vec) 3919 I2: interleave_low (1st vec, 3rd vec) 3920 I3: interleave_high (2nd vec, 4th vec) 3921 I4: interleave_low (2nd vec, 4th vec) 3922 3923 The output for the first stage is: 3924 3925 I1: 0 16 1 17 2 18 3 19 3926 I2: 4 20 5 21 6 22 7 23 3927 I3: 8 24 9 25 10 26 11 27 3928 I4: 12 28 13 29 14 30 15 31 3929 3930 The output of the second stage, i.e. the final result is: 3931 3932 I1: 0 8 16 24 1 9 17 25 3933 I2: 2 10 18 26 3 11 19 27 3934 I3: 4 12 20 28 5 13 21 30 3935 I4: 6 14 22 30 7 15 23 31. */ 3936 3937 void 3938 vect_permute_store_chain (VEC(tree,heap) *dr_chain, 3939 unsigned int length, 3940 gimple stmt, 3941 gimple_stmt_iterator *gsi, 3942 VEC(tree,heap) **result_chain) 3943 { 3944 tree perm_dest, vect1, vect2, high, low; 3945 gimple perm_stmt; 3946 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 3947 tree perm_mask_low, perm_mask_high; 3948 unsigned int i, n; 3949 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype); 3950 unsigned char *sel = XALLOCAVEC (unsigned char, nelt); 3951 3952 *result_chain = VEC_copy (tree, heap, dr_chain); 3953 3954 for (i = 0, n = nelt / 2; i < n; i++) 3955 { 3956 sel[i * 2] = i; 3957 sel[i * 2 + 1] = i + nelt; 3958 } 3959 perm_mask_high = vect_gen_perm_mask (vectype, sel); 3960 gcc_assert (perm_mask_high != NULL); 3961 3962 for (i = 0; i < nelt; i++) 3963 sel[i] += nelt / 2; 3964 perm_mask_low = vect_gen_perm_mask (vectype, sel); 3965 gcc_assert (perm_mask_low != NULL); 3966 3967 for (i = 0, n = exact_log2 (length); i < n; i++) 3968 { 3969 for (j = 0; j < length/2; j++) 3970 { 3971 vect1 = VEC_index (tree, dr_chain, j); 3972 vect2 = VEC_index (tree, dr_chain, j+length/2); 3973 3974 /* Create interleaving stmt: 3975 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */ 3976 perm_dest = create_tmp_var (vectype, "vect_inter_high"); 3977 DECL_GIMPLE_REG_P (perm_dest) = 1; 3978 add_referenced_var (perm_dest); 3979 high = make_ssa_name (perm_dest, NULL); 3980 perm_stmt 3981 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high, 3982 vect1, vect2, perm_mask_high); 3983 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 3984 VEC_replace (tree, *result_chain, 2*j, high); 3985 3986 /* Create interleaving stmt: 3987 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1, 3988 nelt*3/2+1, ...}> */ 3989 perm_dest = create_tmp_var (vectype, "vect_inter_low"); 3990 DECL_GIMPLE_REG_P (perm_dest) = 1; 3991 add_referenced_var (perm_dest); 3992 low = make_ssa_name (perm_dest, NULL); 3993 perm_stmt 3994 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low, 3995 vect1, vect2, perm_mask_low); 3996 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 3997 VEC_replace (tree, *result_chain, 2*j+1, low); 3998 } 3999 dr_chain = VEC_copy (tree, heap, *result_chain); 4000 } 4001 } 4002 4003 /* Function vect_setup_realignment 4004 4005 This function is called when vectorizing an unaligned load using 4006 the dr_explicit_realign[_optimized] scheme. 4007 This function generates the following code at the loop prolog: 4008 4009 p = initial_addr; 4010 x msq_init = *(floor(p)); # prolog load 4011 realignment_token = call target_builtin; 4012 loop: 4013 x msq = phi (msq_init, ---) 4014 4015 The stmts marked with x are generated only for the case of 4016 dr_explicit_realign_optimized. 4017 4018 The code above sets up a new (vector) pointer, pointing to the first 4019 location accessed by STMT, and a "floor-aligned" load using that pointer. 4020 It also generates code to compute the "realignment-token" (if the relevant 4021 target hook was defined), and creates a phi-node at the loop-header bb 4022 whose arguments are the result of the prolog-load (created by this 4023 function) and the result of a load that takes place in the loop (to be 4024 created by the caller to this function). 4025 4026 For the case of dr_explicit_realign_optimized: 4027 The caller to this function uses the phi-result (msq) to create the 4028 realignment code inside the loop, and sets up the missing phi argument, 4029 as follows: 4030 loop: 4031 msq = phi (msq_init, lsq) 4032 lsq = *(floor(p')); # load in loop 4033 result = realign_load (msq, lsq, realignment_token); 4034 4035 For the case of dr_explicit_realign: 4036 loop: 4037 msq = *(floor(p)); # load in loop 4038 p' = p + (VS-1); 4039 lsq = *(floor(p')); # load in loop 4040 result = realign_load (msq, lsq, realignment_token); 4041 4042 Input: 4043 STMT - (scalar) load stmt to be vectorized. This load accesses 4044 a memory location that may be unaligned. 4045 BSI - place where new code is to be inserted. 4046 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes 4047 is used. 4048 4049 Output: 4050 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load 4051 target hook, if defined. 4052 Return value - the result of the loop-header phi node. */ 4053 4054 tree 4055 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, 4056 tree *realignment_token, 4057 enum dr_alignment_support alignment_support_scheme, 4058 tree init_addr, 4059 struct loop **at_loop) 4060 { 4061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4062 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4063 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4064 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 4065 struct loop *loop = NULL; 4066 edge pe = NULL; 4067 tree scalar_dest = gimple_assign_lhs (stmt); 4068 tree vec_dest; 4069 gimple inc; 4070 tree ptr; 4071 tree data_ref; 4072 gimple new_stmt; 4073 basic_block new_bb; 4074 tree msq_init = NULL_TREE; 4075 tree new_temp; 4076 gimple phi_stmt; 4077 tree msq = NULL_TREE; 4078 gimple_seq stmts = NULL; 4079 bool inv_p; 4080 bool compute_in_loop = false; 4081 bool nested_in_vect_loop = false; 4082 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 4083 struct loop *loop_for_initial_load = NULL; 4084 4085 if (loop_vinfo) 4086 { 4087 loop = LOOP_VINFO_LOOP (loop_vinfo); 4088 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 4089 } 4090 4091 gcc_assert (alignment_support_scheme == dr_explicit_realign 4092 || alignment_support_scheme == dr_explicit_realign_optimized); 4093 4094 /* We need to generate three things: 4095 1. the misalignment computation 4096 2. the extra vector load (for the optimized realignment scheme). 4097 3. the phi node for the two vectors from which the realignment is 4098 done (for the optimized realignment scheme). */ 4099 4100 /* 1. Determine where to generate the misalignment computation. 4101 4102 If INIT_ADDR is NULL_TREE, this indicates that the misalignment 4103 calculation will be generated by this function, outside the loop (in the 4104 preheader). Otherwise, INIT_ADDR had already been computed for us by the 4105 caller, inside the loop. 4106 4107 Background: If the misalignment remains fixed throughout the iterations of 4108 the loop, then both realignment schemes are applicable, and also the 4109 misalignment computation can be done outside LOOP. This is because we are 4110 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that 4111 are a multiple of VS (the Vector Size), and therefore the misalignment in 4112 different vectorized LOOP iterations is always the same. 4113 The problem arises only if the memory access is in an inner-loop nested 4114 inside LOOP, which is now being vectorized using outer-loop vectorization. 4115 This is the only case when the misalignment of the memory access may not 4116 remain fixed throughout the iterations of the inner-loop (as explained in 4117 detail in vect_supportable_dr_alignment). In this case, not only is the 4118 optimized realignment scheme not applicable, but also the misalignment 4119 computation (and generation of the realignment token that is passed to 4120 REALIGN_LOAD) have to be done inside the loop. 4121 4122 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode 4123 or not, which in turn determines if the misalignment is computed inside 4124 the inner-loop, or outside LOOP. */ 4125 4126 if (init_addr != NULL_TREE || !loop_vinfo) 4127 { 4128 compute_in_loop = true; 4129 gcc_assert (alignment_support_scheme == dr_explicit_realign); 4130 } 4131 4132 4133 /* 2. Determine where to generate the extra vector load. 4134 4135 For the optimized realignment scheme, instead of generating two vector 4136 loads in each iteration, we generate a single extra vector load in the 4137 preheader of the loop, and in each iteration reuse the result of the 4138 vector load from the previous iteration. In case the memory access is in 4139 an inner-loop nested inside LOOP, which is now being vectorized using 4140 outer-loop vectorization, we need to determine whether this initial vector 4141 load should be generated at the preheader of the inner-loop, or can be 4142 generated at the preheader of LOOP. If the memory access has no evolution 4143 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has 4144 to be generated inside LOOP (in the preheader of the inner-loop). */ 4145 4146 if (nested_in_vect_loop) 4147 { 4148 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info); 4149 bool invariant_in_outerloop = 4150 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0); 4151 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner); 4152 } 4153 else 4154 loop_for_initial_load = loop; 4155 if (at_loop) 4156 *at_loop = loop_for_initial_load; 4157 4158 if (loop_for_initial_load) 4159 pe = loop_preheader_edge (loop_for_initial_load); 4160 4161 /* 3. For the case of the optimized realignment, create the first vector 4162 load at the loop preheader. */ 4163 4164 if (alignment_support_scheme == dr_explicit_realign_optimized) 4165 { 4166 /* Create msq_init = *(floor(p1)) in the loop preheader */ 4167 4168 gcc_assert (!compute_in_loop); 4169 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4170 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load, 4171 NULL_TREE, &init_addr, NULL, &inc, 4172 true, &inv_p); 4173 new_stmt = gimple_build_assign_with_ops 4174 (BIT_AND_EXPR, NULL_TREE, ptr, 4175 build_int_cst (TREE_TYPE (ptr), 4176 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 4177 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt); 4178 gimple_assign_set_lhs (new_stmt, new_temp); 4179 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 4180 gcc_assert (!new_bb); 4181 data_ref 4182 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp, 4183 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0)); 4184 new_stmt = gimple_build_assign (vec_dest, data_ref); 4185 new_temp = make_ssa_name (vec_dest, new_stmt); 4186 gimple_assign_set_lhs (new_stmt, new_temp); 4187 mark_symbols_for_renaming (new_stmt); 4188 if (pe) 4189 { 4190 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 4191 gcc_assert (!new_bb); 4192 } 4193 else 4194 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 4195 4196 msq_init = gimple_assign_lhs (new_stmt); 4197 } 4198 4199 /* 4. Create realignment token using a target builtin, if available. 4200 It is done either inside the containing loop, or before LOOP (as 4201 determined above). */ 4202 4203 if (targetm.vectorize.builtin_mask_for_load) 4204 { 4205 tree builtin_decl; 4206 4207 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */ 4208 if (!init_addr) 4209 { 4210 /* Generate the INIT_ADDR computation outside LOOP. */ 4211 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts, 4212 NULL_TREE, loop); 4213 if (loop) 4214 { 4215 pe = loop_preheader_edge (loop); 4216 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 4217 gcc_assert (!new_bb); 4218 } 4219 else 4220 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 4221 } 4222 4223 builtin_decl = targetm.vectorize.builtin_mask_for_load (); 4224 new_stmt = gimple_build_call (builtin_decl, 1, init_addr); 4225 vec_dest = 4226 vect_create_destination_var (scalar_dest, 4227 gimple_call_return_type (new_stmt)); 4228 new_temp = make_ssa_name (vec_dest, new_stmt); 4229 gimple_call_set_lhs (new_stmt, new_temp); 4230 4231 if (compute_in_loop) 4232 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 4233 else 4234 { 4235 /* Generate the misalignment computation outside LOOP. */ 4236 pe = loop_preheader_edge (loop); 4237 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 4238 gcc_assert (!new_bb); 4239 } 4240 4241 *realignment_token = gimple_call_lhs (new_stmt); 4242 4243 /* The result of the CALL_EXPR to this builtin is determined from 4244 the value of the parameter and no global variables are touched 4245 which makes the builtin a "const" function. Requiring the 4246 builtin to have the "const" attribute makes it unnecessary 4247 to call mark_call_clobbered. */ 4248 gcc_assert (TREE_READONLY (builtin_decl)); 4249 } 4250 4251 if (alignment_support_scheme == dr_explicit_realign) 4252 return msq; 4253 4254 gcc_assert (!compute_in_loop); 4255 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized); 4256 4257 4258 /* 5. Create msq = phi <msq_init, lsq> in loop */ 4259 4260 pe = loop_preheader_edge (containing_loop); 4261 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4262 msq = make_ssa_name (vec_dest, NULL); 4263 phi_stmt = create_phi_node (msq, containing_loop->header); 4264 SSA_NAME_DEF_STMT (msq) = phi_stmt; 4265 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION); 4266 4267 return msq; 4268 } 4269 4270 4271 /* Function vect_strided_load_supported. 4272 4273 Returns TRUE if even and odd permutations are supported, 4274 and FALSE otherwise. */ 4275 4276 bool 4277 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) 4278 { 4279 enum machine_mode mode = TYPE_MODE (vectype); 4280 4281 /* vect_permute_load_chain requires the group size to be a power of two. */ 4282 if (exact_log2 (count) == -1) 4283 { 4284 if (vect_print_dump_info (REPORT_DETAILS)) 4285 fprintf (vect_dump, "the size of the group of strided accesses" 4286 " is not a power of 2"); 4287 return false; 4288 } 4289 4290 /* Check that the permutation is supported. */ 4291 if (VECTOR_MODE_P (mode)) 4292 { 4293 unsigned int i, nelt = GET_MODE_NUNITS (mode); 4294 unsigned char *sel = XALLOCAVEC (unsigned char, nelt); 4295 4296 for (i = 0; i < nelt; i++) 4297 sel[i] = i * 2; 4298 if (can_vec_perm_p (mode, false, sel)) 4299 { 4300 for (i = 0; i < nelt; i++) 4301 sel[i] = i * 2 + 1; 4302 if (can_vec_perm_p (mode, false, sel)) 4303 return true; 4304 } 4305 } 4306 4307 if (vect_print_dump_info (REPORT_DETAILS)) 4308 fprintf (vect_dump, "extract even/odd not supported by target"); 4309 return false; 4310 } 4311 4312 /* Return TRUE if vec_load_lanes is available for COUNT vectors of 4313 type VECTYPE. */ 4314 4315 bool 4316 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) 4317 { 4318 return vect_lanes_optab_supported_p ("vec_load_lanes", 4319 vec_load_lanes_optab, 4320 vectype, count); 4321 } 4322 4323 /* Function vect_permute_load_chain. 4324 4325 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be 4326 a power of 2, generate extract_even/odd stmts to reorder the input data 4327 correctly. Return the final references for loads in RESULT_CHAIN. 4328 4329 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. 4330 The input is 4 vectors each containing 8 elements. We assign a number to each 4331 element, the input sequence is: 4332 4333 1st vec: 0 1 2 3 4 5 6 7 4334 2nd vec: 8 9 10 11 12 13 14 15 4335 3rd vec: 16 17 18 19 20 21 22 23 4336 4th vec: 24 25 26 27 28 29 30 31 4337 4338 The output sequence should be: 4339 4340 1st vec: 0 4 8 12 16 20 24 28 4341 2nd vec: 1 5 9 13 17 21 25 29 4342 3rd vec: 2 6 10 14 18 22 26 30 4343 4th vec: 3 7 11 15 19 23 27 31 4344 4345 i.e., the first output vector should contain the first elements of each 4346 interleaving group, etc. 4347 4348 We use extract_even/odd instructions to create such output. The input of 4349 each extract_even/odd operation is two vectors 4350 1st vec 2nd vec 4351 0 1 2 3 4 5 6 7 4352 4353 and the output is the vector of extracted even/odd elements. The output of 4354 extract_even will be: 0 2 4 6 4355 and of extract_odd: 1 3 5 7 4356 4357 4358 The permutation is done in log LENGTH stages. In each stage extract_even 4359 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in 4360 their order. In our example, 4361 4362 E1: extract_even (1st vec, 2nd vec) 4363 E2: extract_odd (1st vec, 2nd vec) 4364 E3: extract_even (3rd vec, 4th vec) 4365 E4: extract_odd (3rd vec, 4th vec) 4366 4367 The output for the first stage will be: 4368 4369 E1: 0 2 4 6 8 10 12 14 4370 E2: 1 3 5 7 9 11 13 15 4371 E3: 16 18 20 22 24 26 28 30 4372 E4: 17 19 21 23 25 27 29 31 4373 4374 In order to proceed and create the correct sequence for the next stage (or 4375 for the correct output, if the second stage is the last one, as in our 4376 example), we first put the output of extract_even operation and then the 4377 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN). 4378 The input for the second stage is: 4379 4380 1st vec (E1): 0 2 4 6 8 10 12 14 4381 2nd vec (E3): 16 18 20 22 24 26 28 30 4382 3rd vec (E2): 1 3 5 7 9 11 13 15 4383 4th vec (E4): 17 19 21 23 25 27 29 31 4384 4385 The output of the second stage: 4386 4387 E1: 0 4 8 12 16 20 24 28 4388 E2: 2 6 10 14 18 22 26 30 4389 E3: 1 5 9 13 17 21 25 29 4390 E4: 3 7 11 15 19 23 27 31 4391 4392 And RESULT_CHAIN after reordering: 4393 4394 1st vec (E1): 0 4 8 12 16 20 24 28 4395 2nd vec (E3): 1 5 9 13 17 21 25 29 4396 3rd vec (E2): 2 6 10 14 18 22 26 30 4397 4th vec (E4): 3 7 11 15 19 23 27 31. */ 4398 4399 static void 4400 vect_permute_load_chain (VEC(tree,heap) *dr_chain, 4401 unsigned int length, 4402 gimple stmt, 4403 gimple_stmt_iterator *gsi, 4404 VEC(tree,heap) **result_chain) 4405 { 4406 tree perm_dest, data_ref, first_vect, second_vect; 4407 tree perm_mask_even, perm_mask_odd; 4408 gimple perm_stmt; 4409 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 4410 unsigned int i, j, log_length = exact_log2 (length); 4411 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); 4412 unsigned char *sel = XALLOCAVEC (unsigned char, nelt); 4413 4414 *result_chain = VEC_copy (tree, heap, dr_chain); 4415 4416 for (i = 0; i < nelt; ++i) 4417 sel[i] = i * 2; 4418 perm_mask_even = vect_gen_perm_mask (vectype, sel); 4419 gcc_assert (perm_mask_even != NULL); 4420 4421 for (i = 0; i < nelt; ++i) 4422 sel[i] = i * 2 + 1; 4423 perm_mask_odd = vect_gen_perm_mask (vectype, sel); 4424 gcc_assert (perm_mask_odd != NULL); 4425 4426 for (i = 0; i < log_length; i++) 4427 { 4428 for (j = 0; j < length; j += 2) 4429 { 4430 first_vect = VEC_index (tree, dr_chain, j); 4431 second_vect = VEC_index (tree, dr_chain, j+1); 4432 4433 /* data_ref = permute_even (first_data_ref, second_data_ref); */ 4434 perm_dest = create_tmp_var (vectype, "vect_perm_even"); 4435 DECL_GIMPLE_REG_P (perm_dest) = 1; 4436 add_referenced_var (perm_dest); 4437 4438 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest, 4439 first_vect, second_vect, 4440 perm_mask_even); 4441 4442 data_ref = make_ssa_name (perm_dest, perm_stmt); 4443 gimple_assign_set_lhs (perm_stmt, data_ref); 4444 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 4445 mark_symbols_for_renaming (perm_stmt); 4446 4447 VEC_replace (tree, *result_chain, j/2, data_ref); 4448 4449 /* data_ref = permute_odd (first_data_ref, second_data_ref); */ 4450 perm_dest = create_tmp_var (vectype, "vect_perm_odd"); 4451 DECL_GIMPLE_REG_P (perm_dest) = 1; 4452 add_referenced_var (perm_dest); 4453 4454 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest, 4455 first_vect, second_vect, 4456 perm_mask_odd); 4457 4458 data_ref = make_ssa_name (perm_dest, perm_stmt); 4459 gimple_assign_set_lhs (perm_stmt, data_ref); 4460 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 4461 mark_symbols_for_renaming (perm_stmt); 4462 4463 VEC_replace (tree, *result_chain, j/2+length/2, data_ref); 4464 } 4465 dr_chain = VEC_copy (tree, heap, *result_chain); 4466 } 4467 } 4468 4469 4470 /* Function vect_transform_strided_load. 4471 4472 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements 4473 to perform their permutation and ascribe the result vectorized statements to 4474 the scalar statements. 4475 */ 4476 4477 void 4478 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, 4479 gimple_stmt_iterator *gsi) 4480 { 4481 VEC(tree,heap) *result_chain = NULL; 4482 4483 /* DR_CHAIN contains input data-refs that are a part of the interleaving. 4484 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted 4485 vectors, that are ready for vector computation. */ 4486 result_chain = VEC_alloc (tree, heap, size); 4487 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); 4488 vect_record_strided_load_vectors (stmt, result_chain); 4489 VEC_free (tree, heap, result_chain); 4490 } 4491 4492 /* RESULT_CHAIN contains the output of a group of strided loads that were 4493 generated as part of the vectorization of STMT. Assign the statement 4494 for each vector to the associated scalar statement. */ 4495 4496 void 4497 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain) 4498 { 4499 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); 4500 gimple next_stmt, new_stmt; 4501 unsigned int i, gap_count; 4502 tree tmp_data_ref; 4503 4504 /* Put a permuted data-ref in the VECTORIZED_STMT field. 4505 Since we scan the chain starting from it's first node, their order 4506 corresponds the order of data-refs in RESULT_CHAIN. */ 4507 next_stmt = first_stmt; 4508 gap_count = 1; 4509 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref) 4510 { 4511 if (!next_stmt) 4512 break; 4513 4514 /* Skip the gaps. Loads created for the gaps will be removed by dead 4515 code elimination pass later. No need to check for the first stmt in 4516 the group, since it always exists. 4517 GROUP_GAP is the number of steps in elements from the previous 4518 access (if there is no gap GROUP_GAP is 1). We skip loads that 4519 correspond to the gaps. */ 4520 if (next_stmt != first_stmt 4521 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt))) 4522 { 4523 gap_count++; 4524 continue; 4525 } 4526 4527 while (next_stmt) 4528 { 4529 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref); 4530 /* We assume that if VEC_STMT is not NULL, this is a case of multiple 4531 copies, and we put the new vector statement in the first available 4532 RELATED_STMT. */ 4533 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt))) 4534 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt; 4535 else 4536 { 4537 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt))) 4538 { 4539 gimple prev_stmt = 4540 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)); 4541 gimple rel_stmt = 4542 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)); 4543 while (rel_stmt) 4544 { 4545 prev_stmt = rel_stmt; 4546 rel_stmt = 4547 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt)); 4548 } 4549 4550 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = 4551 new_stmt; 4552 } 4553 } 4554 4555 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 4556 gap_count = 1; 4557 /* If NEXT_STMT accesses the same DR as the previous statement, 4558 put the same TMP_DATA_REF as its vectorized statement; otherwise 4559 get the next data-ref from RESULT_CHAIN. */ 4560 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt))) 4561 break; 4562 } 4563 } 4564 } 4565 4566 /* Function vect_force_dr_alignment_p. 4567 4568 Returns whether the alignment of a DECL can be forced to be aligned 4569 on ALIGNMENT bit boundary. */ 4570 4571 bool 4572 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) 4573 { 4574 if (TREE_CODE (decl) != VAR_DECL) 4575 return false; 4576 4577 if (DECL_EXTERNAL (decl)) 4578 return false; 4579 4580 if (TREE_ASM_WRITTEN (decl)) 4581 return false; 4582 4583 /* Do not override explicit alignment set by the user when an explicit 4584 section name is also used. This is a common idiom used by many 4585 software projects. */ 4586 if (DECL_SECTION_NAME (decl) != NULL_TREE 4587 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl)) 4588 return false; 4589 4590 if (TREE_STATIC (decl)) 4591 return (alignment <= MAX_OFILE_ALIGNMENT); 4592 else 4593 return (alignment <= MAX_STACK_ALIGNMENT); 4594 } 4595 4596 4597 /* Return whether the data reference DR is supported with respect to its 4598 alignment. 4599 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even 4600 it is aligned, i.e., check if it is possible to vectorize it with different 4601 alignment. */ 4602 4603 enum dr_alignment_support 4604 vect_supportable_dr_alignment (struct data_reference *dr, 4605 bool check_aligned_accesses) 4606 { 4607 gimple stmt = DR_STMT (dr); 4608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4609 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4610 enum machine_mode mode = TYPE_MODE (vectype); 4611 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4612 struct loop *vect_loop = NULL; 4613 bool nested_in_vect_loop = false; 4614 4615 if (aligned_access_p (dr) && !check_aligned_accesses) 4616 return dr_aligned; 4617 4618 if (loop_vinfo) 4619 { 4620 vect_loop = LOOP_VINFO_LOOP (loop_vinfo); 4621 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); 4622 } 4623 4624 /* Possibly unaligned access. */ 4625 4626 /* We can choose between using the implicit realignment scheme (generating 4627 a misaligned_move stmt) and the explicit realignment scheme (generating 4628 aligned loads with a REALIGN_LOAD). There are two variants to the 4629 explicit realignment scheme: optimized, and unoptimized. 4630 We can optimize the realignment only if the step between consecutive 4631 vector loads is equal to the vector size. Since the vector memory 4632 accesses advance in steps of VS (Vector Size) in the vectorized loop, it 4633 is guaranteed that the misalignment amount remains the same throughout the 4634 execution of the vectorized loop. Therefore, we can create the 4635 "realignment token" (the permutation mask that is passed to REALIGN_LOAD) 4636 at the loop preheader. 4637 4638 However, in the case of outer-loop vectorization, when vectorizing a 4639 memory access in the inner-loop nested within the LOOP that is now being 4640 vectorized, while it is guaranteed that the misalignment of the 4641 vectorized memory access will remain the same in different outer-loop 4642 iterations, it is *not* guaranteed that is will remain the same throughout 4643 the execution of the inner-loop. This is because the inner-loop advances 4644 with the original scalar step (and not in steps of VS). If the inner-loop 4645 step happens to be a multiple of VS, then the misalignment remains fixed 4646 and we can use the optimized realignment scheme. For example: 4647 4648 for (i=0; i<N; i++) 4649 for (j=0; j<M; j++) 4650 s += a[i+j]; 4651 4652 When vectorizing the i-loop in the above example, the step between 4653 consecutive vector loads is 1, and so the misalignment does not remain 4654 fixed across the execution of the inner-loop, and the realignment cannot 4655 be optimized (as illustrated in the following pseudo vectorized loop): 4656 4657 for (i=0; i<N; i+=4) 4658 for (j=0; j<M; j++){ 4659 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...} 4660 // when j is {0,1,2,3,4,5,6,7,...} respectively. 4661 // (assuming that we start from an aligned address). 4662 } 4663 4664 We therefore have to use the unoptimized realignment scheme: 4665 4666 for (i=0; i<N; i+=4) 4667 for (j=k; j<M; j+=4) 4668 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming 4669 // that the misalignment of the initial address is 4670 // 0). 4671 4672 The loop can then be vectorized as follows: 4673 4674 for (k=0; k<4; k++){ 4675 rt = get_realignment_token (&vp[k]); 4676 for (i=0; i<N; i+=4){ 4677 v1 = vp[i+k]; 4678 for (j=k; j<M; j+=4){ 4679 v2 = vp[i+j+VS-1]; 4680 va = REALIGN_LOAD <v1,v2,rt>; 4681 vs += va; 4682 v1 = v2; 4683 } 4684 } 4685 } */ 4686 4687 if (DR_IS_READ (dr)) 4688 { 4689 bool is_packed = false; 4690 tree type = (TREE_TYPE (DR_REF (dr))); 4691 4692 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing 4693 && (!targetm.vectorize.builtin_mask_for_load 4694 || targetm.vectorize.builtin_mask_for_load ())) 4695 { 4696 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4697 if ((nested_in_vect_loop 4698 && (TREE_INT_CST_LOW (DR_STEP (dr)) 4699 != GET_MODE_SIZE (TYPE_MODE (vectype)))) 4700 || !loop_vinfo) 4701 return dr_explicit_realign; 4702 else 4703 return dr_explicit_realign_optimized; 4704 } 4705 if (!known_alignment_for_access_p (dr)) 4706 is_packed = contains_packed_reference (DR_REF (dr)); 4707 4708 if (targetm.vectorize. 4709 support_vector_misalignment (mode, type, 4710 DR_MISALIGNMENT (dr), is_packed)) 4711 /* Can't software pipeline the loads, but can at least do them. */ 4712 return dr_unaligned_supported; 4713 } 4714 else 4715 { 4716 bool is_packed = false; 4717 tree type = (TREE_TYPE (DR_REF (dr))); 4718 4719 if (!known_alignment_for_access_p (dr)) 4720 is_packed = contains_packed_reference (DR_REF (dr)); 4721 4722 if (targetm.vectorize. 4723 support_vector_misalignment (mode, type, 4724 DR_MISALIGNMENT (dr), is_packed)) 4725 return dr_unaligned_supported; 4726 } 4727 4728 /* Unsupported. */ 4729 return dr_unaligned_unsupported; 4730 } 4731