1 /* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "ggc.h" 28 #include "tree.h" 29 #include "target.h" 30 #include "basic-block.h" 31 #include "tree-pretty-print.h" 32 #include "gimple-pretty-print.h" 33 #include "tree-flow.h" 34 #include "tree-dump.h" 35 #include "cfgloop.h" 36 #include "cfglayout.h" 37 #include "expr.h" 38 #include "recog.h" 39 #include "optabs.h" 40 #include "diagnostic-core.h" 41 #include "tree-vectorizer.h" 42 #include "langhooks.h" 43 44 45 /* Return a variable of type ELEM_TYPE[NELEMS]. */ 46 47 static tree 48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 49 { 50 return create_tmp_var (build_array_type_nelts (elem_type, nelems), 51 "vect_array"); 52 } 53 54 /* ARRAY is an array of vectors created by create_vector_array. 55 Return an SSA_NAME for the vector in index N. The reference 56 is part of the vectorization of STMT and the vector is associated 57 with scalar destination SCALAR_DEST. */ 58 59 static tree 60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, 61 tree array, unsigned HOST_WIDE_INT n) 62 { 63 tree vect_type, vect, vect_name, array_ref; 64 gimple new_stmt; 65 66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 67 vect_type = TREE_TYPE (TREE_TYPE (array)); 68 vect = vect_create_destination_var (scalar_dest, vect_type); 69 array_ref = build4 (ARRAY_REF, vect_type, array, 70 build_int_cst (size_type_node, n), 71 NULL_TREE, NULL_TREE); 72 73 new_stmt = gimple_build_assign (vect, array_ref); 74 vect_name = make_ssa_name (vect, new_stmt); 75 gimple_assign_set_lhs (new_stmt, vect_name); 76 vect_finish_stmt_generation (stmt, new_stmt, gsi); 77 mark_symbols_for_renaming (new_stmt); 78 79 return vect_name; 80 } 81 82 /* ARRAY is an array of vectors created by create_vector_array. 83 Emit code to store SSA_NAME VECT in index N of the array. 84 The store is part of the vectorization of STMT. */ 85 86 static void 87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, 88 tree array, unsigned HOST_WIDE_INT n) 89 { 90 tree array_ref; 91 gimple new_stmt; 92 93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 94 build_int_cst (size_type_node, n), 95 NULL_TREE, NULL_TREE); 96 97 new_stmt = gimple_build_assign (array_ref, vect); 98 vect_finish_stmt_generation (stmt, new_stmt, gsi); 99 mark_symbols_for_renaming (new_stmt); 100 } 101 102 /* PTR is a pointer to an array of type TYPE. Return a representation 103 of *PTR. The memory reference replaces those in FIRST_DR 104 (and its group). */ 105 106 static tree 107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr) 108 { 109 struct ptr_info_def *pi; 110 tree mem_ref, alias_ptr_type; 111 112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); 113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 114 /* Arrays have the same alignment as their type. */ 115 pi = get_ptr_info (ptr); 116 pi->align = TYPE_ALIGN_UNIT (type); 117 pi->misalign = 0; 118 return mem_ref; 119 } 120 121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 122 123 /* Function vect_mark_relevant. 124 125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 126 127 static void 128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, 129 enum vect_relevant relevant, bool live_p, 130 bool used_in_pattern) 131 { 132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 135 gimple pattern_stmt; 136 137 if (vect_print_dump_info (REPORT_DETAILS)) 138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); 139 140 /* If this stmt is an original stmt in a pattern, we might need to mark its 141 related pattern stmt instead of the original stmt. However, such stmts 142 may have their own uses that are not in any pattern, in such cases the 143 stmt itself should be marked. */ 144 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 145 { 146 bool found = false; 147 if (!used_in_pattern) 148 { 149 imm_use_iterator imm_iter; 150 use_operand_p use_p; 151 gimple use_stmt; 152 tree lhs; 153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 155 156 if (is_gimple_assign (stmt)) 157 lhs = gimple_assign_lhs (stmt); 158 else 159 lhs = gimple_call_lhs (stmt); 160 161 /* This use is out of pattern use, if LHS has other uses that are 162 pattern uses, we should mark the stmt itself, and not the pattern 163 stmt. */ 164 if (TREE_CODE (lhs) == SSA_NAME) 165 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 166 { 167 if (is_gimple_debug (USE_STMT (use_p))) 168 continue; 169 use_stmt = USE_STMT (use_p); 170 171 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 172 continue; 173 174 if (vinfo_for_stmt (use_stmt) 175 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 176 { 177 found = true; 178 break; 179 } 180 } 181 } 182 183 if (!found) 184 { 185 /* This is the last stmt in a sequence that was detected as a 186 pattern that can potentially be vectorized. Don't mark the stmt 187 as relevant/live because it's not going to be vectorized. 188 Instead mark the pattern-stmt that replaces it. */ 189 190 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 191 192 if (vect_print_dump_info (REPORT_DETAILS)) 193 fprintf (vect_dump, "last stmt in pattern. don't mark" 194 " relevant/live."); 195 stmt_info = vinfo_for_stmt (pattern_stmt); 196 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 197 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 198 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 199 stmt = pattern_stmt; 200 } 201 } 202 203 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 204 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 205 STMT_VINFO_RELEVANT (stmt_info) = relevant; 206 207 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 208 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 209 { 210 if (vect_print_dump_info (REPORT_DETAILS)) 211 fprintf (vect_dump, "already marked relevant/live."); 212 return; 213 } 214 215 VEC_safe_push (gimple, heap, *worklist, stmt); 216 } 217 218 219 /* Function vect_stmt_relevant_p. 220 221 Return true if STMT in loop that is represented by LOOP_VINFO is 222 "relevant for vectorization". 223 224 A stmt is considered "relevant for vectorization" if: 225 - it has uses outside the loop. 226 - it has vdefs (it alters memory). 227 - control stmts in the loop (except for the exit condition). 228 229 CHECKME: what other side effects would the vectorizer allow? */ 230 231 static bool 232 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo, 233 enum vect_relevant *relevant, bool *live_p) 234 { 235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 236 ssa_op_iter op_iter; 237 imm_use_iterator imm_iter; 238 use_operand_p use_p; 239 def_operand_p def_p; 240 241 *relevant = vect_unused_in_scope; 242 *live_p = false; 243 244 /* cond stmt other than loop exit cond. */ 245 if (is_ctrl_stmt (stmt) 246 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 247 != loop_exit_ctrl_vec_info_type) 248 *relevant = vect_used_in_scope; 249 250 /* changing memory. */ 251 if (gimple_code (stmt) != GIMPLE_PHI) 252 if (gimple_vdef (stmt)) 253 { 254 if (vect_print_dump_info (REPORT_DETAILS)) 255 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs."); 256 *relevant = vect_used_in_scope; 257 } 258 259 /* uses outside the loop. */ 260 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 261 { 262 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 263 { 264 basic_block bb = gimple_bb (USE_STMT (use_p)); 265 if (!flow_bb_inside_loop_p (loop, bb)) 266 { 267 if (vect_print_dump_info (REPORT_DETAILS)) 268 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); 269 270 if (is_gimple_debug (USE_STMT (use_p))) 271 continue; 272 273 /* We expect all such uses to be in the loop exit phis 274 (because of loop closed form) */ 275 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 276 gcc_assert (bb == single_exit (loop)->dest); 277 278 *live_p = true; 279 } 280 } 281 } 282 283 return (*live_p || *relevant); 284 } 285 286 287 /* Function exist_non_indexing_operands_for_use_p 288 289 USE is one of the uses attached to STMT. Check if USE is 290 used in STMT for anything other than indexing an array. */ 291 292 static bool 293 exist_non_indexing_operands_for_use_p (tree use, gimple stmt) 294 { 295 tree operand; 296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 297 298 /* USE corresponds to some operand in STMT. If there is no data 299 reference in STMT, then any operand that corresponds to USE 300 is not indexing an array. */ 301 if (!STMT_VINFO_DATA_REF (stmt_info)) 302 return true; 303 304 /* STMT has a data_ref. FORNOW this means that its of one of 305 the following forms: 306 -1- ARRAY_REF = var 307 -2- var = ARRAY_REF 308 (This should have been verified in analyze_data_refs). 309 310 'var' in the second case corresponds to a def, not a use, 311 so USE cannot correspond to any operands that are not used 312 for array indexing. 313 314 Therefore, all we need to check is if STMT falls into the 315 first case, and whether var corresponds to USE. */ 316 317 if (!gimple_assign_copy_p (stmt)) 318 return false; 319 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) 320 return false; 321 operand = gimple_assign_rhs1 (stmt); 322 if (TREE_CODE (operand) != SSA_NAME) 323 return false; 324 325 if (operand == use) 326 return true; 327 328 return false; 329 } 330 331 332 /* 333 Function process_use. 334 335 Inputs: 336 - a USE in STMT in a loop represented by LOOP_VINFO 337 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt 338 that defined USE. This is done by calling mark_relevant and passing it 339 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 340 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 341 be performed. 342 343 Outputs: 344 Generally, LIVE_P and RELEVANT are used to define the liveness and 345 relevance info of the DEF_STMT of this USE: 346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 347 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant 348 Exceptions: 349 - case 1: If USE is used only for address computations (e.g. array indexing), 350 which does not need to be directly vectorized, then the liveness/relevance 351 of the respective DEF_STMT is left unchanged. 352 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we 353 skip DEF_STMT cause it had already been processed. 354 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will 355 be modified accordingly. 356 357 Return true if everything is as expected. Return false otherwise. */ 358 359 static bool 360 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, 361 enum vect_relevant relevant, VEC(gimple,heap) **worklist, 362 bool force) 363 { 364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 365 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 366 stmt_vec_info dstmt_vinfo; 367 basic_block bb, def_bb; 368 tree def; 369 gimple def_stmt; 370 enum vect_def_type dt; 371 372 /* case 1: we are only interested in uses that need to be vectorized. Uses 373 that are used for address computation are not considered relevant. */ 374 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt)) 375 return true; 376 377 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt)) 378 { 379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 380 fprintf (vect_dump, "not vectorized: unsupported use in stmt."); 381 return false; 382 } 383 384 if (!def_stmt || gimple_nop_p (def_stmt)) 385 return true; 386 387 def_bb = gimple_bb (def_stmt); 388 if (!flow_bb_inside_loop_p (loop, def_bb)) 389 { 390 if (vect_print_dump_info (REPORT_DETAILS)) 391 fprintf (vect_dump, "def_stmt is out of loop."); 392 return true; 393 } 394 395 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). 396 DEF_STMT must have already been processed, because this should be the 397 only way that STMT, which is a reduction-phi, was put in the worklist, 398 as there should be no other uses for DEF_STMT in the loop. So we just 399 check that everything is as expected, and we are done. */ 400 dstmt_vinfo = vinfo_for_stmt (def_stmt); 401 bb = gimple_bb (stmt); 402 if (gimple_code (stmt) == GIMPLE_PHI 403 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 404 && gimple_code (def_stmt) != GIMPLE_PHI 405 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 406 && bb->loop_father == def_bb->loop_father) 407 { 408 if (vect_print_dump_info (REPORT_DETAILS)) 409 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest."); 410 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) 411 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); 412 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); 413 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 414 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); 415 return true; 416 } 417 418 /* case 3a: outer-loop stmt defining an inner-loop stmt: 419 outer-loop-header-bb: 420 d = def_stmt 421 inner-loop: 422 stmt # use (d) 423 outer-loop-tail-bb: 424 ... */ 425 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 426 { 427 if (vect_print_dump_info (REPORT_DETAILS)) 428 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt."); 429 430 switch (relevant) 431 { 432 case vect_unused_in_scope: 433 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 434 vect_used_in_scope : vect_unused_in_scope; 435 break; 436 437 case vect_used_in_outer_by_reduction: 438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 439 relevant = vect_used_by_reduction; 440 break; 441 442 case vect_used_in_outer: 443 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 444 relevant = vect_used_in_scope; 445 break; 446 447 case vect_used_in_scope: 448 break; 449 450 default: 451 gcc_unreachable (); 452 } 453 } 454 455 /* case 3b: inner-loop stmt defining an outer-loop stmt: 456 outer-loop-header-bb: 457 ... 458 inner-loop: 459 d = def_stmt 460 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 461 stmt # use (d) */ 462 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 463 { 464 if (vect_print_dump_info (REPORT_DETAILS)) 465 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt."); 466 467 switch (relevant) 468 { 469 case vect_unused_in_scope: 470 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 471 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 472 vect_used_in_outer_by_reduction : vect_unused_in_scope; 473 break; 474 475 case vect_used_by_reduction: 476 relevant = vect_used_in_outer_by_reduction; 477 break; 478 479 case vect_used_in_scope: 480 relevant = vect_used_in_outer; 481 break; 482 483 default: 484 gcc_unreachable (); 485 } 486 } 487 488 vect_mark_relevant (worklist, def_stmt, relevant, live_p, 489 is_pattern_stmt_p (stmt_vinfo)); 490 return true; 491 } 492 493 494 /* Function vect_mark_stmts_to_be_vectorized. 495 496 Not all stmts in the loop need to be vectorized. For example: 497 498 for i... 499 for j... 500 1. T0 = i + j 501 2. T1 = a[T0] 502 503 3. j = j + 1 504 505 Stmt 1 and 3 do not need to be vectorized, because loop control and 506 addressing of vectorized data-refs are handled differently. 507 508 This pass detects such stmts. */ 509 510 bool 511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 512 { 513 VEC(gimple,heap) *worklist; 514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 515 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 516 unsigned int nbbs = loop->num_nodes; 517 gimple_stmt_iterator si; 518 gimple stmt; 519 unsigned int i; 520 stmt_vec_info stmt_vinfo; 521 basic_block bb; 522 gimple phi; 523 bool live_p; 524 enum vect_relevant relevant, tmp_relevant; 525 enum vect_def_type def_type; 526 527 if (vect_print_dump_info (REPORT_DETAILS)) 528 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); 529 530 worklist = VEC_alloc (gimple, heap, 64); 531 532 /* 1. Init worklist. */ 533 for (i = 0; i < nbbs; i++) 534 { 535 bb = bbs[i]; 536 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 537 { 538 phi = gsi_stmt (si); 539 if (vect_print_dump_info (REPORT_DETAILS)) 540 { 541 fprintf (vect_dump, "init: phi relevant? "); 542 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); 543 } 544 545 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) 546 vect_mark_relevant (&worklist, phi, relevant, live_p, false); 547 } 548 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 549 { 550 stmt = gsi_stmt (si); 551 if (vect_print_dump_info (REPORT_DETAILS)) 552 { 553 fprintf (vect_dump, "init: stmt relevant? "); 554 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 555 } 556 557 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) 558 vect_mark_relevant (&worklist, stmt, relevant, live_p, false); 559 } 560 } 561 562 /* 2. Process_worklist */ 563 while (VEC_length (gimple, worklist) > 0) 564 { 565 use_operand_p use_p; 566 ssa_op_iter iter; 567 568 stmt = VEC_pop (gimple, worklist); 569 if (vect_print_dump_info (REPORT_DETAILS)) 570 { 571 fprintf (vect_dump, "worklist: examine stmt: "); 572 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 573 } 574 575 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 576 (DEF_STMT) as relevant/irrelevant and live/dead according to the 577 liveness and relevance properties of STMT. */ 578 stmt_vinfo = vinfo_for_stmt (stmt); 579 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 580 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 581 582 /* Generally, the liveness and relevance properties of STMT are 583 propagated as is to the DEF_STMTs of its USEs: 584 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO) 585 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO) 586 587 One exception is when STMT has been identified as defining a reduction 588 variable; in this case we set the liveness/relevance as follows: 589 live_p = false 590 relevant = vect_used_by_reduction 591 This is because we distinguish between two kinds of relevant stmts - 592 those that are used by a reduction computation, and those that are 593 (also) used by a regular computation. This allows us later on to 594 identify stmts that are used solely by a reduction, and therefore the 595 order of the results that they produce does not have to be kept. */ 596 597 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); 598 tmp_relevant = relevant; 599 switch (def_type) 600 { 601 case vect_reduction_def: 602 switch (tmp_relevant) 603 { 604 case vect_unused_in_scope: 605 relevant = vect_used_by_reduction; 606 break; 607 608 case vect_used_by_reduction: 609 if (gimple_code (stmt) == GIMPLE_PHI) 610 break; 611 /* fall through */ 612 613 default: 614 if (vect_print_dump_info (REPORT_DETAILS)) 615 fprintf (vect_dump, "unsupported use of reduction."); 616 617 VEC_free (gimple, heap, worklist); 618 return false; 619 } 620 621 live_p = false; 622 break; 623 624 case vect_nested_cycle: 625 if (tmp_relevant != vect_unused_in_scope 626 && tmp_relevant != vect_used_in_outer_by_reduction 627 && tmp_relevant != vect_used_in_outer) 628 { 629 if (vect_print_dump_info (REPORT_DETAILS)) 630 fprintf (vect_dump, "unsupported use of nested cycle."); 631 632 VEC_free (gimple, heap, worklist); 633 return false; 634 } 635 636 live_p = false; 637 break; 638 639 case vect_double_reduction_def: 640 if (tmp_relevant != vect_unused_in_scope 641 && tmp_relevant != vect_used_by_reduction) 642 { 643 if (vect_print_dump_info (REPORT_DETAILS)) 644 fprintf (vect_dump, "unsupported use of double reduction."); 645 646 VEC_free (gimple, heap, worklist); 647 return false; 648 } 649 650 live_p = false; 651 break; 652 653 default: 654 break; 655 } 656 657 if (is_pattern_stmt_p (stmt_vinfo)) 658 { 659 /* Pattern statements are not inserted into the code, so 660 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 661 have to scan the RHS or function arguments instead. */ 662 if (is_gimple_assign (stmt)) 663 { 664 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 665 tree op = gimple_assign_rhs1 (stmt); 666 667 i = 1; 668 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 669 { 670 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, 671 live_p, relevant, &worklist, false) 672 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, 673 live_p, relevant, &worklist, false)) 674 { 675 VEC_free (gimple, heap, worklist); 676 return false; 677 } 678 i = 2; 679 } 680 for (; i < gimple_num_ops (stmt); i++) 681 { 682 op = gimple_op (stmt, i); 683 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 684 &worklist, false)) 685 { 686 VEC_free (gimple, heap, worklist); 687 return false; 688 } 689 } 690 } 691 else if (is_gimple_call (stmt)) 692 { 693 for (i = 0; i < gimple_call_num_args (stmt); i++) 694 { 695 tree arg = gimple_call_arg (stmt, i); 696 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, 697 &worklist, false)) 698 { 699 VEC_free (gimple, heap, worklist); 700 return false; 701 } 702 } 703 } 704 } 705 else 706 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 707 { 708 tree op = USE_FROM_PTR (use_p); 709 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, 710 &worklist, false)) 711 { 712 VEC_free (gimple, heap, worklist); 713 return false; 714 } 715 } 716 717 if (STMT_VINFO_GATHER_P (stmt_vinfo)) 718 { 719 tree off; 720 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); 721 gcc_assert (decl); 722 if (!process_use (stmt, off, loop_vinfo, live_p, relevant, 723 &worklist, true)) 724 { 725 VEC_free (gimple, heap, worklist); 726 return false; 727 } 728 } 729 } /* while worklist */ 730 731 VEC_free (gimple, heap, worklist); 732 return true; 733 } 734 735 736 /* Get cost by calling cost target builtin. */ 737 738 static inline 739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) 740 { 741 tree dummy_type = NULL; 742 int dummy = 0; 743 744 return targetm.vectorize.builtin_vectorization_cost (type_of_cost, 745 dummy_type, dummy); 746 } 747 748 749 /* Get cost for STMT. */ 750 751 int 752 cost_for_stmt (gimple stmt) 753 { 754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 755 756 switch (STMT_VINFO_TYPE (stmt_info)) 757 { 758 case load_vec_info_type: 759 return vect_get_stmt_cost (scalar_load); 760 case store_vec_info_type: 761 return vect_get_stmt_cost (scalar_store); 762 case op_vec_info_type: 763 case condition_vec_info_type: 764 case assignment_vec_info_type: 765 case reduc_vec_info_type: 766 case induc_vec_info_type: 767 case type_promotion_vec_info_type: 768 case type_demotion_vec_info_type: 769 case type_conversion_vec_info_type: 770 case call_vec_info_type: 771 return vect_get_stmt_cost (scalar_stmt); 772 case undef_vec_info_type: 773 default: 774 gcc_unreachable (); 775 } 776 } 777 778 /* Function vect_model_simple_cost. 779 780 Models cost for simple operations, i.e. those that only emit ncopies of a 781 single op. Right now, this does not account for multiple insns that could 782 be generated for the single vector op. We will handle that shortly. */ 783 784 void 785 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 786 enum vect_def_type *dt, slp_tree slp_node) 787 { 788 int i; 789 int inside_cost = 0, outside_cost = 0; 790 791 /* The SLP costs were already calculated during SLP tree build. */ 792 if (PURE_SLP_STMT (stmt_info)) 793 return; 794 795 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt); 796 797 /* FORNOW: Assuming maximum 2 args per stmts. */ 798 for (i = 0; i < 2; i++) 799 { 800 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 801 outside_cost += vect_get_stmt_cost (vector_stmt); 802 } 803 804 if (vect_print_dump_info (REPORT_COST)) 805 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " 806 "outside_cost = %d .", inside_cost, outside_cost); 807 808 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 809 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 810 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 811 } 812 813 814 /* Model cost for type demotion and promotion operations. PWR is normally 815 zero for single-step promotions and demotions. It will be one if 816 two-step promotion/demotion is required, and so on. Each additional 817 step doubles the number of instructions required. */ 818 819 static void 820 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 821 enum vect_def_type *dt, int pwr) 822 { 823 int i, tmp; 824 int inside_cost = 0, outside_cost = 0, single_stmt_cost; 825 826 /* The SLP costs were already calculated during SLP tree build. */ 827 if (PURE_SLP_STMT (stmt_info)) 828 return; 829 830 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote); 831 for (i = 0; i < pwr + 1; i++) 832 { 833 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? 834 (i + 1) : i; 835 inside_cost += vect_pow2 (tmp) * single_stmt_cost; 836 } 837 838 /* FORNOW: Assuming maximum 2 args per stmts. */ 839 for (i = 0; i < 2; i++) 840 { 841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 842 outside_cost += vect_get_stmt_cost (vector_stmt); 843 } 844 845 if (vect_print_dump_info (REPORT_COST)) 846 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, " 847 "outside_cost = %d .", inside_cost, outside_cost); 848 849 /* Set the costs in STMT_INFO. */ 850 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost); 851 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost); 852 } 853 854 /* Function vect_cost_strided_group_size 855 856 For strided load or store, return the group_size only if it is the first 857 load or store of a group, else return 1. This ensures that group size is 858 only returned once per group. */ 859 860 static int 861 vect_cost_strided_group_size (stmt_vec_info stmt_info) 862 { 863 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 864 865 if (first_stmt == STMT_VINFO_STMT (stmt_info)) 866 return GROUP_SIZE (stmt_info); 867 868 return 1; 869 } 870 871 872 /* Function vect_model_store_cost 873 874 Models cost for stores. In the case of strided accesses, one access 875 has the overhead of the strided access attributed to it. */ 876 877 void 878 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 879 bool store_lanes_p, enum vect_def_type dt, 880 slp_tree slp_node) 881 { 882 int group_size; 883 unsigned int inside_cost = 0, outside_cost = 0; 884 struct data_reference *first_dr; 885 gimple first_stmt; 886 887 /* The SLP costs were already calculated during SLP tree build. */ 888 if (PURE_SLP_STMT (stmt_info)) 889 return; 890 891 if (dt == vect_constant_def || dt == vect_external_def) 892 outside_cost = vect_get_stmt_cost (scalar_to_vec); 893 894 /* Strided access? */ 895 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 896 { 897 if (slp_node) 898 { 899 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 900 group_size = 1; 901 } 902 else 903 { 904 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 905 group_size = vect_cost_strided_group_size (stmt_info); 906 } 907 908 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 909 } 910 /* Not a strided access. */ 911 else 912 { 913 group_size = 1; 914 first_dr = STMT_VINFO_DATA_REF (stmt_info); 915 } 916 917 /* We assume that the cost of a single store-lanes instruction is 918 equivalent to the cost of GROUP_SIZE separate stores. If a strided 919 access is instead being provided by a permute-and-store operation, 920 include the cost of the permutes. */ 921 if (!store_lanes_p && group_size > 1) 922 { 923 /* Uses a high and low interleave operation for each needed permute. */ 924 inside_cost = ncopies * exact_log2(group_size) * group_size 925 * vect_get_stmt_cost (vec_perm); 926 927 if (vect_print_dump_info (REPORT_COST)) 928 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", 929 group_size); 930 } 931 932 /* Costs of the stores. */ 933 vect_get_store_cost (first_dr, ncopies, &inside_cost); 934 935 if (vect_print_dump_info (REPORT_COST)) 936 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, " 937 "outside_cost = %d .", inside_cost, outside_cost); 938 939 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 940 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 941 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 942 } 943 944 945 /* Calculate cost of DR's memory access. */ 946 void 947 vect_get_store_cost (struct data_reference *dr, int ncopies, 948 unsigned int *inside_cost) 949 { 950 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 951 952 switch (alignment_support_scheme) 953 { 954 case dr_aligned: 955 { 956 *inside_cost += ncopies * vect_get_stmt_cost (vector_store); 957 958 if (vect_print_dump_info (REPORT_COST)) 959 fprintf (vect_dump, "vect_model_store_cost: aligned."); 960 961 break; 962 } 963 964 case dr_unaligned_supported: 965 { 966 gimple stmt = DR_STMT (dr); 967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 968 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 969 970 /* Here, we assign an additional cost for the unaligned store. */ 971 *inside_cost += ncopies 972 * targetm.vectorize.builtin_vectorization_cost (unaligned_store, 973 vectype, DR_MISALIGNMENT (dr)); 974 975 if (vect_print_dump_info (REPORT_COST)) 976 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by " 977 "hardware."); 978 979 break; 980 } 981 982 default: 983 gcc_unreachable (); 984 } 985 } 986 987 988 /* Function vect_model_load_cost 989 990 Models cost for loads. In the case of strided accesses, the last access 991 has the overhead of the strided access attributed to it. Since unaligned 992 accesses are supported for loads, we also account for the costs of the 993 access scheme chosen. */ 994 995 void 996 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p, 997 slp_tree slp_node) 998 { 999 int group_size; 1000 gimple first_stmt; 1001 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 1002 unsigned int inside_cost = 0, outside_cost = 0; 1003 1004 /* The SLP costs were already calculated during SLP tree build. */ 1005 if (PURE_SLP_STMT (stmt_info)) 1006 return; 1007 1008 /* Strided accesses? */ 1009 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node) 1011 { 1012 group_size = vect_cost_strided_group_size (stmt_info); 1013 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 1014 } 1015 /* Not a strided access. */ 1016 else 1017 { 1018 group_size = 1; 1019 first_dr = dr; 1020 } 1021 1022 /* We assume that the cost of a single load-lanes instruction is 1023 equivalent to the cost of GROUP_SIZE separate loads. If a strided 1024 access is instead being provided by a load-and-permute operation, 1025 include the cost of the permutes. */ 1026 if (!load_lanes_p && group_size > 1) 1027 { 1028 /* Uses an even and odd extract operations for each needed permute. */ 1029 inside_cost = ncopies * exact_log2(group_size) * group_size 1030 * vect_get_stmt_cost (vec_perm); 1031 1032 if (vect_print_dump_info (REPORT_COST)) 1033 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", 1034 group_size); 1035 } 1036 1037 /* The loads themselves. */ 1038 vect_get_load_cost (first_dr, ncopies, 1039 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1 1040 || slp_node), 1041 &inside_cost, &outside_cost); 1042 1043 if (vect_print_dump_info (REPORT_COST)) 1044 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, " 1045 "outside_cost = %d .", inside_cost, outside_cost); 1046 1047 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 1048 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 1049 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 1050 } 1051 1052 1053 /* Calculate cost of DR's memory access. */ 1054 void 1055 vect_get_load_cost (struct data_reference *dr, int ncopies, 1056 bool add_realign_cost, unsigned int *inside_cost, 1057 unsigned int *outside_cost) 1058 { 1059 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 1060 1061 switch (alignment_support_scheme) 1062 { 1063 case dr_aligned: 1064 { 1065 *inside_cost += ncopies * vect_get_stmt_cost (vector_load); 1066 1067 if (vect_print_dump_info (REPORT_COST)) 1068 fprintf (vect_dump, "vect_model_load_cost: aligned."); 1069 1070 break; 1071 } 1072 case dr_unaligned_supported: 1073 { 1074 gimple stmt = DR_STMT (dr); 1075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1076 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1077 1078 /* Here, we assign an additional cost for the unaligned load. */ 1079 *inside_cost += ncopies 1080 * targetm.vectorize.builtin_vectorization_cost (unaligned_load, 1081 vectype, DR_MISALIGNMENT (dr)); 1082 if (vect_print_dump_info (REPORT_COST)) 1083 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by " 1084 "hardware."); 1085 1086 break; 1087 } 1088 case dr_explicit_realign: 1089 { 1090 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load) 1091 + vect_get_stmt_cost (vec_perm)); 1092 1093 /* FIXME: If the misalignment remains fixed across the iterations of 1094 the containing loop, the following cost should be added to the 1095 outside costs. */ 1096 if (targetm.vectorize.builtin_mask_for_load) 1097 *inside_cost += vect_get_stmt_cost (vector_stmt); 1098 1099 if (vect_print_dump_info (REPORT_COST)) 1100 fprintf (vect_dump, "vect_model_load_cost: explicit realign"); 1101 1102 break; 1103 } 1104 case dr_explicit_realign_optimized: 1105 { 1106 if (vect_print_dump_info (REPORT_COST)) 1107 fprintf (vect_dump, "vect_model_load_cost: unaligned software " 1108 "pipelined."); 1109 1110 /* Unaligned software pipeline has a load of an address, an initial 1111 load, and possibly a mask operation to "prime" the loop. However, 1112 if this is an access in a group of loads, which provide strided 1113 access, then the above cost should only be considered for one 1114 access in the group. Inside the loop, there is a load op 1115 and a realignment op. */ 1116 1117 if (add_realign_cost) 1118 { 1119 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt); 1120 if (targetm.vectorize.builtin_mask_for_load) 1121 *outside_cost += vect_get_stmt_cost (vector_stmt); 1122 } 1123 1124 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load) 1125 + vect_get_stmt_cost (vec_perm)); 1126 1127 if (vect_print_dump_info (REPORT_COST)) 1128 fprintf (vect_dump, 1129 "vect_model_load_cost: explicit realign optimized"); 1130 1131 break; 1132 } 1133 1134 default: 1135 gcc_unreachable (); 1136 } 1137 } 1138 1139 1140 /* Function vect_init_vector. 1141 1142 Insert a new stmt (INIT_STMT) that initializes a new vector variable with 1143 the vector elements of VECTOR_VAR. Place the initialization at BSI if it 1144 is not NULL. Otherwise, place the initialization at the loop preheader. 1145 Return the DEF of INIT_STMT. 1146 It will be used in the vectorization of STMT. */ 1147 1148 tree 1149 vect_init_vector (gimple stmt, tree vector_var, tree vector_type, 1150 gimple_stmt_iterator *gsi) 1151 { 1152 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1153 tree new_var; 1154 gimple init_stmt; 1155 tree vec_oprnd; 1156 edge pe; 1157 tree new_temp; 1158 basic_block new_bb; 1159 1160 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_"); 1161 add_referenced_var (new_var); 1162 init_stmt = gimple_build_assign (new_var, vector_var); 1163 new_temp = make_ssa_name (new_var, init_stmt); 1164 gimple_assign_set_lhs (init_stmt, new_temp); 1165 1166 if (gsi) 1167 vect_finish_stmt_generation (stmt, init_stmt, gsi); 1168 else 1169 { 1170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1171 1172 if (loop_vinfo) 1173 { 1174 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1175 1176 if (nested_in_vect_loop_p (loop, stmt)) 1177 loop = loop->inner; 1178 1179 pe = loop_preheader_edge (loop); 1180 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt); 1181 gcc_assert (!new_bb); 1182 } 1183 else 1184 { 1185 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1186 basic_block bb; 1187 gimple_stmt_iterator gsi_bb_start; 1188 1189 gcc_assert (bb_vinfo); 1190 bb = BB_VINFO_BB (bb_vinfo); 1191 gsi_bb_start = gsi_after_labels (bb); 1192 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT); 1193 } 1194 } 1195 1196 if (vect_print_dump_info (REPORT_DETAILS)) 1197 { 1198 fprintf (vect_dump, "created new init_stmt: "); 1199 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM); 1200 } 1201 1202 vec_oprnd = gimple_assign_lhs (init_stmt); 1203 return vec_oprnd; 1204 } 1205 1206 1207 /* Function vect_get_vec_def_for_operand. 1208 1209 OP is an operand in STMT. This function returns a (vector) def that will be 1210 used in the vectorized stmt for STMT. 1211 1212 In the case that OP is an SSA_NAME which is defined in the loop, then 1213 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 1214 1215 In case OP is an invariant or constant, a new stmt that creates a vector def 1216 needs to be introduced. */ 1217 1218 tree 1219 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) 1220 { 1221 tree vec_oprnd; 1222 gimple vec_stmt; 1223 gimple def_stmt; 1224 stmt_vec_info def_stmt_info = NULL; 1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1226 unsigned int nunits; 1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1228 tree vec_inv; 1229 tree vec_cst; 1230 tree t = NULL_TREE; 1231 tree def; 1232 int i; 1233 enum vect_def_type dt; 1234 bool is_simple_use; 1235 tree vector_type; 1236 1237 if (vect_print_dump_info (REPORT_DETAILS)) 1238 { 1239 fprintf (vect_dump, "vect_get_vec_def_for_operand: "); 1240 print_generic_expr (vect_dump, op, TDF_SLIM); 1241 } 1242 1243 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL, 1244 &def_stmt, &def, &dt); 1245 gcc_assert (is_simple_use); 1246 if (vect_print_dump_info (REPORT_DETAILS)) 1247 { 1248 if (def) 1249 { 1250 fprintf (vect_dump, "def = "); 1251 print_generic_expr (vect_dump, def, TDF_SLIM); 1252 } 1253 if (def_stmt) 1254 { 1255 fprintf (vect_dump, " def_stmt = "); 1256 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); 1257 } 1258 } 1259 1260 switch (dt) 1261 { 1262 /* Case 1: operand is a constant. */ 1263 case vect_constant_def: 1264 { 1265 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1266 gcc_assert (vector_type); 1267 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1268 1269 if (scalar_def) 1270 *scalar_def = op; 1271 1272 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 1273 if (vect_print_dump_info (REPORT_DETAILS)) 1274 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 1275 1276 vec_cst = build_vector_from_val (vector_type, 1277 fold_convert (TREE_TYPE (vector_type), 1278 op)); 1279 return vect_init_vector (stmt, vec_cst, vector_type, NULL); 1280 } 1281 1282 /* Case 2: operand is defined outside the loop - loop invariant. */ 1283 case vect_external_def: 1284 { 1285 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); 1286 gcc_assert (vector_type); 1287 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 1288 1289 if (scalar_def) 1290 *scalar_def = def; 1291 1292 /* Create 'vec_inv = {inv,inv,..,inv}' */ 1293 if (vect_print_dump_info (REPORT_DETAILS)) 1294 fprintf (vect_dump, "Create vector_inv."); 1295 1296 for (i = nunits - 1; i >= 0; --i) 1297 { 1298 t = tree_cons (NULL_TREE, def, t); 1299 } 1300 1301 /* FIXME: use build_constructor directly. */ 1302 vec_inv = build_constructor_from_list (vector_type, t); 1303 return vect_init_vector (stmt, vec_inv, vector_type, NULL); 1304 } 1305 1306 /* Case 3: operand is defined inside the loop. */ 1307 case vect_internal_def: 1308 { 1309 if (scalar_def) 1310 *scalar_def = NULL/* FIXME tuples: def_stmt*/; 1311 1312 /* Get the def from the vectorized stmt. */ 1313 def_stmt_info = vinfo_for_stmt (def_stmt); 1314 1315 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1316 /* Get vectorized pattern statement. */ 1317 if (!vec_stmt 1318 && STMT_VINFO_IN_PATTERN_P (def_stmt_info) 1319 && !STMT_VINFO_RELEVANT (def_stmt_info)) 1320 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( 1321 STMT_VINFO_RELATED_STMT (def_stmt_info))); 1322 gcc_assert (vec_stmt); 1323 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1324 vec_oprnd = PHI_RESULT (vec_stmt); 1325 else if (is_gimple_call (vec_stmt)) 1326 vec_oprnd = gimple_call_lhs (vec_stmt); 1327 else 1328 vec_oprnd = gimple_assign_lhs (vec_stmt); 1329 return vec_oprnd; 1330 } 1331 1332 /* Case 4: operand is defined by a loop header phi - reduction */ 1333 case vect_reduction_def: 1334 case vect_double_reduction_def: 1335 case vect_nested_cycle: 1336 { 1337 struct loop *loop; 1338 1339 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1340 loop = (gimple_bb (def_stmt))->loop_father; 1341 1342 /* Get the def before the loop */ 1343 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 1344 return get_initial_def_for_reduction (stmt, op, scalar_def); 1345 } 1346 1347 /* Case 5: operand is defined by loop-header phi - induction. */ 1348 case vect_induction_def: 1349 { 1350 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1351 1352 /* Get the def from the vectorized stmt. */ 1353 def_stmt_info = vinfo_for_stmt (def_stmt); 1354 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1355 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1356 vec_oprnd = PHI_RESULT (vec_stmt); 1357 else 1358 vec_oprnd = gimple_get_lhs (vec_stmt); 1359 return vec_oprnd; 1360 } 1361 1362 default: 1363 gcc_unreachable (); 1364 } 1365 } 1366 1367 1368 /* Function vect_get_vec_def_for_stmt_copy 1369 1370 Return a vector-def for an operand. This function is used when the 1371 vectorized stmt to be created (by the caller to this function) is a "copy" 1372 created in case the vectorized result cannot fit in one vector, and several 1373 copies of the vector-stmt are required. In this case the vector-def is 1374 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1375 of the stmt that defines VEC_OPRND. 1376 DT is the type of the vector def VEC_OPRND. 1377 1378 Context: 1379 In case the vectorization factor (VF) is bigger than the number 1380 of elements that can fit in a vectype (nunits), we have to generate 1381 more than one vector stmt to vectorize the scalar stmt. This situation 1382 arises when there are multiple data-types operated upon in the loop; the 1383 smallest data-type determines the VF, and as a result, when vectorizing 1384 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1385 vector stmt (each computing a vector of 'nunits' results, and together 1386 computing 'VF' results in each iteration). This function is called when 1387 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1388 which VF=16 and nunits=4, so the number of copies required is 4): 1389 1390 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1391 1392 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1393 VS1.1: vx.1 = memref1 VS1.2 1394 VS1.2: vx.2 = memref2 VS1.3 1395 VS1.3: vx.3 = memref3 1396 1397 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1398 VSnew.1: vz1 = vx.1 + ... VSnew.2 1399 VSnew.2: vz2 = vx.2 + ... VSnew.3 1400 VSnew.3: vz3 = vx.3 + ... 1401 1402 The vectorization of S1 is explained in vectorizable_load. 1403 The vectorization of S2: 1404 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1405 the function 'vect_get_vec_def_for_operand' is called to 1406 get the relevant vector-def for each operand of S2. For operand x it 1407 returns the vector-def 'vx.0'. 1408 1409 To create the remaining copies of the vector-stmt (VSnew.j), this 1410 function is called to get the relevant vector-def for each operand. It is 1411 obtained from the respective VS1.j stmt, which is recorded in the 1412 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1413 1414 For example, to obtain the vector-def 'vx.1' in order to create the 1415 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1416 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1417 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1418 and return its def ('vx.1'). 1419 Overall, to create the above sequence this function will be called 3 times: 1420 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); 1421 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); 1422 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ 1423 1424 tree 1425 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) 1426 { 1427 gimple vec_stmt_for_operand; 1428 stmt_vec_info def_stmt_info; 1429 1430 /* Do nothing; can reuse same def. */ 1431 if (dt == vect_external_def || dt == vect_constant_def ) 1432 return vec_oprnd; 1433 1434 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); 1435 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); 1436 gcc_assert (def_stmt_info); 1437 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); 1438 gcc_assert (vec_stmt_for_operand); 1439 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1440 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) 1441 vec_oprnd = PHI_RESULT (vec_stmt_for_operand); 1442 else 1443 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1444 return vec_oprnd; 1445 } 1446 1447 1448 /* Get vectorized definitions for the operands to create a copy of an original 1449 stmt. See vect_get_vec_def_for_stmt_copy () for details. */ 1450 1451 static void 1452 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1453 VEC(tree,heap) **vec_oprnds0, 1454 VEC(tree,heap) **vec_oprnds1) 1455 { 1456 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0); 1457 1458 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); 1459 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); 1460 1461 if (vec_oprnds1 && *vec_oprnds1) 1462 { 1463 vec_oprnd = VEC_pop (tree, *vec_oprnds1); 1464 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); 1465 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); 1466 } 1467 } 1468 1469 1470 /* Get vectorized definitions for OP0 and OP1. 1471 REDUC_INDEX is the index of reduction operand in case of reduction, 1472 and -1 otherwise. */ 1473 1474 void 1475 vect_get_vec_defs (tree op0, tree op1, gimple stmt, 1476 VEC (tree, heap) **vec_oprnds0, 1477 VEC (tree, heap) **vec_oprnds1, 1478 slp_tree slp_node, int reduc_index) 1479 { 1480 if (slp_node) 1481 { 1482 int nops = (op1 == NULL_TREE) ? 1 : 2; 1483 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); 1484 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); 1485 1486 VEC_quick_push (tree, ops, op0); 1487 if (op1) 1488 VEC_quick_push (tree, ops, op1); 1489 1490 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); 1491 1492 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); 1493 if (op1) 1494 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); 1495 1496 VEC_free (tree, heap, ops); 1497 VEC_free (slp_void_p, heap, vec_defs); 1498 } 1499 else 1500 { 1501 tree vec_oprnd; 1502 1503 *vec_oprnds0 = VEC_alloc (tree, heap, 1); 1504 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL); 1505 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); 1506 1507 if (op1) 1508 { 1509 *vec_oprnds1 = VEC_alloc (tree, heap, 1); 1510 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL); 1511 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); 1512 } 1513 } 1514 } 1515 1516 1517 /* Function vect_finish_stmt_generation. 1518 1519 Insert a new stmt. */ 1520 1521 void 1522 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, 1523 gimple_stmt_iterator *gsi) 1524 { 1525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1526 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1527 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1528 1529 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL); 1530 1531 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1532 1533 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo, 1534 bb_vinfo)); 1535 1536 if (vect_print_dump_info (REPORT_DETAILS)) 1537 { 1538 fprintf (vect_dump, "add new stmt: "); 1539 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM); 1540 } 1541 1542 gimple_set_location (vec_stmt, gimple_location (stmt)); 1543 } 1544 1545 /* Checks if CALL can be vectorized in type VECTYPE. Returns 1546 a function declaration if the target has a vectorized version 1547 of the function, or NULL_TREE if the function cannot be vectorized. */ 1548 1549 tree 1550 vectorizable_function (gimple call, tree vectype_out, tree vectype_in) 1551 { 1552 tree fndecl = gimple_call_fndecl (call); 1553 1554 /* We only handle functions that do not read or clobber memory -- i.e. 1555 const or novops ones. */ 1556 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS))) 1557 return NULL_TREE; 1558 1559 if (!fndecl 1560 || TREE_CODE (fndecl) != FUNCTION_DECL 1561 || !DECL_BUILT_IN (fndecl)) 1562 return NULL_TREE; 1563 1564 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out, 1565 vectype_in); 1566 } 1567 1568 /* Function vectorizable_call. 1569 1570 Check if STMT performs a function call that can be vectorized. 1571 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1572 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1573 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1574 1575 static bool 1576 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 1577 slp_tree slp_node) 1578 { 1579 tree vec_dest; 1580 tree scalar_dest; 1581 tree op, type; 1582 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 1583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; 1584 tree vectype_out, vectype_in; 1585 int nunits_in; 1586 int nunits_out; 1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1589 tree fndecl, new_temp, def, rhs_type; 1590 gimple def_stmt; 1591 enum vect_def_type dt[3] 1592 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 1593 gimple new_stmt = NULL; 1594 int ncopies, j; 1595 VEC(tree, heap) *vargs = NULL; 1596 enum { NARROW, NONE, WIDEN } modifier; 1597 size_t i, nargs; 1598 tree lhs; 1599 1600 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 1601 return false; 1602 1603 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1604 return false; 1605 1606 /* Is STMT a vectorizable call? */ 1607 if (!is_gimple_call (stmt)) 1608 return false; 1609 1610 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 1611 return false; 1612 1613 if (stmt_can_throw_internal (stmt)) 1614 return false; 1615 1616 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 1617 1618 /* Process function arguments. */ 1619 rhs_type = NULL_TREE; 1620 vectype_in = NULL_TREE; 1621 nargs = gimple_call_num_args (stmt); 1622 1623 /* Bail out if the function has more than three arguments, we do not have 1624 interesting builtin functions to vectorize with more than two arguments 1625 except for fma. No arguments is also not good. */ 1626 if (nargs == 0 || nargs > 3) 1627 return false; 1628 1629 for (i = 0; i < nargs; i++) 1630 { 1631 tree opvectype; 1632 1633 op = gimple_call_arg (stmt, i); 1634 1635 /* We can only handle calls with arguments of the same type. */ 1636 if (rhs_type 1637 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 1638 { 1639 if (vect_print_dump_info (REPORT_DETAILS)) 1640 fprintf (vect_dump, "argument types differ."); 1641 return false; 1642 } 1643 if (!rhs_type) 1644 rhs_type = TREE_TYPE (op); 1645 1646 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 1647 &def_stmt, &def, &dt[i], &opvectype)) 1648 { 1649 if (vect_print_dump_info (REPORT_DETAILS)) 1650 fprintf (vect_dump, "use not simple."); 1651 return false; 1652 } 1653 1654 if (!vectype_in) 1655 vectype_in = opvectype; 1656 else if (opvectype 1657 && opvectype != vectype_in) 1658 { 1659 if (vect_print_dump_info (REPORT_DETAILS)) 1660 fprintf (vect_dump, "argument vector types differ."); 1661 return false; 1662 } 1663 } 1664 /* If all arguments are external or constant defs use a vector type with 1665 the same size as the output vector type. */ 1666 if (!vectype_in) 1667 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 1668 if (vec_stmt) 1669 gcc_assert (vectype_in); 1670 if (!vectype_in) 1671 { 1672 if (vect_print_dump_info (REPORT_DETAILS)) 1673 { 1674 fprintf (vect_dump, "no vectype for scalar type "); 1675 print_generic_expr (vect_dump, rhs_type, TDF_SLIM); 1676 } 1677 1678 return false; 1679 } 1680 1681 /* FORNOW */ 1682 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 1683 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 1684 if (nunits_in == nunits_out / 2) 1685 modifier = NARROW; 1686 else if (nunits_out == nunits_in) 1687 modifier = NONE; 1688 else if (nunits_out == nunits_in / 2) 1689 modifier = WIDEN; 1690 else 1691 return false; 1692 1693 /* For now, we only vectorize functions if a target specific builtin 1694 is available. TODO -- in some cases, it might be profitable to 1695 insert the calls for pieces of the vector, in order to be able 1696 to vectorize other operations in the loop. */ 1697 fndecl = vectorizable_function (stmt, vectype_out, vectype_in); 1698 if (fndecl == NULL_TREE) 1699 { 1700 if (vect_print_dump_info (REPORT_DETAILS)) 1701 fprintf (vect_dump, "function is not vectorizable."); 1702 1703 return false; 1704 } 1705 1706 gcc_assert (!gimple_vuse (stmt)); 1707 1708 if (slp_node || PURE_SLP_STMT (stmt_info)) 1709 ncopies = 1; 1710 else if (modifier == NARROW) 1711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 1712 else 1713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1714 1715 /* Sanity check: make sure that at least one copy of the vectorized stmt 1716 needs to be generated. */ 1717 gcc_assert (ncopies >= 1); 1718 1719 if (!vec_stmt) /* transformation not required. */ 1720 { 1721 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 1722 if (vect_print_dump_info (REPORT_DETAILS)) 1723 fprintf (vect_dump, "=== vectorizable_call ==="); 1724 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 1725 return true; 1726 } 1727 1728 /** Transform. **/ 1729 1730 if (vect_print_dump_info (REPORT_DETAILS)) 1731 fprintf (vect_dump, "transform call."); 1732 1733 /* Handle def. */ 1734 scalar_dest = gimple_call_lhs (stmt); 1735 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 1736 1737 prev_stmt_info = NULL; 1738 switch (modifier) 1739 { 1740 case NONE: 1741 for (j = 0; j < ncopies; ++j) 1742 { 1743 /* Build argument list for the vectorized call. */ 1744 if (j == 0) 1745 vargs = VEC_alloc (tree, heap, nargs); 1746 else 1747 VEC_truncate (tree, vargs, 0); 1748 1749 if (slp_node) 1750 { 1751 VEC (slp_void_p, heap) *vec_defs 1752 = VEC_alloc (slp_void_p, heap, nargs); 1753 VEC (tree, heap) *vec_oprnds0; 1754 1755 for (i = 0; i < nargs; i++) 1756 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); 1757 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 1758 vec_oprnds0 1759 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); 1760 1761 /* Arguments are ready. Create the new vector stmt. */ 1762 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) 1763 { 1764 size_t k; 1765 for (k = 0; k < nargs; k++) 1766 { 1767 VEC (tree, heap) *vec_oprndsk 1768 = (VEC (tree, heap) *) 1769 VEC_index (slp_void_p, vec_defs, k); 1770 VEC_replace (tree, vargs, k, 1771 VEC_index (tree, vec_oprndsk, i)); 1772 } 1773 new_stmt = gimple_build_call_vec (fndecl, vargs); 1774 new_temp = make_ssa_name (vec_dest, new_stmt); 1775 gimple_call_set_lhs (new_stmt, new_temp); 1776 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1777 mark_symbols_for_renaming (new_stmt); 1778 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), 1779 new_stmt); 1780 } 1781 1782 for (i = 0; i < nargs; i++) 1783 { 1784 VEC (tree, heap) *vec_oprndsi 1785 = (VEC (tree, heap) *) 1786 VEC_index (slp_void_p, vec_defs, i); 1787 VEC_free (tree, heap, vec_oprndsi); 1788 } 1789 VEC_free (slp_void_p, heap, vec_defs); 1790 continue; 1791 } 1792 1793 for (i = 0; i < nargs; i++) 1794 { 1795 op = gimple_call_arg (stmt, i); 1796 if (j == 0) 1797 vec_oprnd0 1798 = vect_get_vec_def_for_operand (op, stmt, NULL); 1799 else 1800 { 1801 vec_oprnd0 = gimple_call_arg (new_stmt, i); 1802 vec_oprnd0 1803 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1804 } 1805 1806 VEC_quick_push (tree, vargs, vec_oprnd0); 1807 } 1808 1809 new_stmt = gimple_build_call_vec (fndecl, vargs); 1810 new_temp = make_ssa_name (vec_dest, new_stmt); 1811 gimple_call_set_lhs (new_stmt, new_temp); 1812 1813 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1814 mark_symbols_for_renaming (new_stmt); 1815 1816 if (j == 0) 1817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 1818 else 1819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1820 1821 prev_stmt_info = vinfo_for_stmt (new_stmt); 1822 } 1823 1824 break; 1825 1826 case NARROW: 1827 for (j = 0; j < ncopies; ++j) 1828 { 1829 /* Build argument list for the vectorized call. */ 1830 if (j == 0) 1831 vargs = VEC_alloc (tree, heap, nargs * 2); 1832 else 1833 VEC_truncate (tree, vargs, 0); 1834 1835 if (slp_node) 1836 { 1837 VEC (slp_void_p, heap) *vec_defs 1838 = VEC_alloc (slp_void_p, heap, nargs); 1839 VEC (tree, heap) *vec_oprnds0; 1840 1841 for (i = 0; i < nargs; i++) 1842 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); 1843 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); 1844 vec_oprnds0 1845 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); 1846 1847 /* Arguments are ready. Create the new vector stmt. */ 1848 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0); 1849 i += 2) 1850 { 1851 size_t k; 1852 VEC_truncate (tree, vargs, 0); 1853 for (k = 0; k < nargs; k++) 1854 { 1855 VEC (tree, heap) *vec_oprndsk 1856 = (VEC (tree, heap) *) 1857 VEC_index (slp_void_p, vec_defs, k); 1858 VEC_quick_push (tree, vargs, 1859 VEC_index (tree, vec_oprndsk, i)); 1860 VEC_quick_push (tree, vargs, 1861 VEC_index (tree, vec_oprndsk, i + 1)); 1862 } 1863 new_stmt = gimple_build_call_vec (fndecl, vargs); 1864 new_temp = make_ssa_name (vec_dest, new_stmt); 1865 gimple_call_set_lhs (new_stmt, new_temp); 1866 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1867 mark_symbols_for_renaming (new_stmt); 1868 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), 1869 new_stmt); 1870 } 1871 1872 for (i = 0; i < nargs; i++) 1873 { 1874 VEC (tree, heap) *vec_oprndsi 1875 = (VEC (tree, heap) *) 1876 VEC_index (slp_void_p, vec_defs, i); 1877 VEC_free (tree, heap, vec_oprndsi); 1878 } 1879 VEC_free (slp_void_p, heap, vec_defs); 1880 continue; 1881 } 1882 1883 for (i = 0; i < nargs; i++) 1884 { 1885 op = gimple_call_arg (stmt, i); 1886 if (j == 0) 1887 { 1888 vec_oprnd0 1889 = vect_get_vec_def_for_operand (op, stmt, NULL); 1890 vec_oprnd1 1891 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1892 } 1893 else 1894 { 1895 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1); 1896 vec_oprnd0 1897 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1); 1898 vec_oprnd1 1899 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1900 } 1901 1902 VEC_quick_push (tree, vargs, vec_oprnd0); 1903 VEC_quick_push (tree, vargs, vec_oprnd1); 1904 } 1905 1906 new_stmt = gimple_build_call_vec (fndecl, vargs); 1907 new_temp = make_ssa_name (vec_dest, new_stmt); 1908 gimple_call_set_lhs (new_stmt, new_temp); 1909 1910 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1911 mark_symbols_for_renaming (new_stmt); 1912 1913 if (j == 0) 1914 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 1915 else 1916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1917 1918 prev_stmt_info = vinfo_for_stmt (new_stmt); 1919 } 1920 1921 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 1922 1923 break; 1924 1925 case WIDEN: 1926 /* No current target implements this case. */ 1927 return false; 1928 } 1929 1930 VEC_free (tree, heap, vargs); 1931 1932 /* Update the exception handling table with the vector stmt if necessary. */ 1933 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) 1934 gimple_purge_dead_eh_edges (gimple_bb (stmt)); 1935 1936 /* The call in STMT might prevent it from being removed in dce. 1937 We however cannot remove it here, due to the way the ssa name 1938 it defines is mapped to the new definition. So just replace 1939 rhs of the statement with something harmless. */ 1940 1941 if (slp_node) 1942 return true; 1943 1944 type = TREE_TYPE (scalar_dest); 1945 if (is_pattern_stmt_p (stmt_info)) 1946 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 1947 else 1948 lhs = gimple_call_lhs (stmt); 1949 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 1950 set_vinfo_for_stmt (new_stmt, stmt_info); 1951 set_vinfo_for_stmt (stmt, NULL); 1952 STMT_VINFO_STMT (stmt_info) = new_stmt; 1953 gsi_replace (gsi, new_stmt, false); 1954 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; 1955 1956 return true; 1957 } 1958 1959 1960 /* Function vect_gen_widened_results_half 1961 1962 Create a vector stmt whose code, type, number of arguments, and result 1963 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 1964 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 1965 In the case that CODE is a CALL_EXPR, this means that a call to DECL 1966 needs to be created (DECL is a function-decl of a target-builtin). 1967 STMT is the original scalar stmt that we are vectorizing. */ 1968 1969 static gimple 1970 vect_gen_widened_results_half (enum tree_code code, 1971 tree decl, 1972 tree vec_oprnd0, tree vec_oprnd1, int op_type, 1973 tree vec_dest, gimple_stmt_iterator *gsi, 1974 gimple stmt) 1975 { 1976 gimple new_stmt; 1977 tree new_temp; 1978 1979 /* Generate half of the widened result: */ 1980 if (code == CALL_EXPR) 1981 { 1982 /* Target specific support */ 1983 if (op_type == binary_op) 1984 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); 1985 else 1986 new_stmt = gimple_build_call (decl, 1, vec_oprnd0); 1987 new_temp = make_ssa_name (vec_dest, new_stmt); 1988 gimple_call_set_lhs (new_stmt, new_temp); 1989 } 1990 else 1991 { 1992 /* Generic support */ 1993 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 1994 if (op_type != binary_op) 1995 vec_oprnd1 = NULL; 1996 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0, 1997 vec_oprnd1); 1998 new_temp = make_ssa_name (vec_dest, new_stmt); 1999 gimple_assign_set_lhs (new_stmt, new_temp); 2000 } 2001 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2002 2003 return new_stmt; 2004 } 2005 2006 2007 /* Get vectorized definitions for loop-based vectorization. For the first 2008 operand we call vect_get_vec_def_for_operand() (with OPRND containing 2009 scalar operand), and for the rest we get a copy with 2010 vect_get_vec_def_for_stmt_copy() using the previous vector definition 2011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 2012 The vectors are collected into VEC_OPRNDS. */ 2013 2014 static void 2015 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt, 2016 VEC (tree, heap) **vec_oprnds, int multi_step_cvt) 2017 { 2018 tree vec_oprnd; 2019 2020 /* Get first vector operand. */ 2021 /* All the vector operands except the very first one (that is scalar oprnd) 2022 are stmt copies. */ 2023 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 2024 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL); 2025 else 2026 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd); 2027 2028 VEC_quick_push (tree, *vec_oprnds, vec_oprnd); 2029 2030 /* Get second vector operand. */ 2031 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); 2032 VEC_quick_push (tree, *vec_oprnds, vec_oprnd); 2033 2034 *oprnd = vec_oprnd; 2035 2036 /* For conversion in multiple steps, continue to get operands 2037 recursively. */ 2038 if (multi_step_cvt) 2039 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1); 2040 } 2041 2042 2043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 2044 For multi-step conversions store the resulting vectors and call the function 2045 recursively. */ 2046 2047 static void 2048 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds, 2049 int multi_step_cvt, gimple stmt, 2050 VEC (tree, heap) *vec_dsts, 2051 gimple_stmt_iterator *gsi, 2052 slp_tree slp_node, enum tree_code code, 2053 stmt_vec_info *prev_stmt_info) 2054 { 2055 unsigned int i; 2056 tree vop0, vop1, new_tmp, vec_dest; 2057 gimple new_stmt; 2058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2059 2060 vec_dest = VEC_pop (tree, vec_dsts); 2061 2062 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2) 2063 { 2064 /* Create demotion operation. */ 2065 vop0 = VEC_index (tree, *vec_oprnds, i); 2066 vop1 = VEC_index (tree, *vec_oprnds, i + 1); 2067 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2068 new_tmp = make_ssa_name (vec_dest, new_stmt); 2069 gimple_assign_set_lhs (new_stmt, new_tmp); 2070 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2071 2072 if (multi_step_cvt) 2073 /* Store the resulting vector for next recursive call. */ 2074 VEC_replace (tree, *vec_oprnds, i/2, new_tmp); 2075 else 2076 { 2077 /* This is the last step of the conversion sequence. Store the 2078 vectors in SLP_NODE or in vector info of the scalar statement 2079 (or in STMT_VINFO_RELATED_STMT chain). */ 2080 if (slp_node) 2081 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2082 else 2083 { 2084 if (!*prev_stmt_info) 2085 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2086 else 2087 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; 2088 2089 *prev_stmt_info = vinfo_for_stmt (new_stmt); 2090 } 2091 } 2092 } 2093 2094 /* For multi-step demotion operations we first generate demotion operations 2095 from the source type to the intermediate types, and then combine the 2096 results (stored in VEC_OPRNDS) in demotion operation to the destination 2097 type. */ 2098 if (multi_step_cvt) 2099 { 2100 /* At each level of recursion we have half of the operands we had at the 2101 previous level. */ 2102 VEC_truncate (tree, *vec_oprnds, (i+1)/2); 2103 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 2104 stmt, vec_dsts, gsi, slp_node, 2105 VEC_PACK_TRUNC_EXPR, 2106 prev_stmt_info); 2107 } 2108 2109 VEC_quick_push (tree, vec_dsts, vec_dest); 2110 } 2111 2112 2113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 2114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 2115 the resulting vectors and call the function recursively. */ 2116 2117 static void 2118 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0, 2119 VEC (tree, heap) **vec_oprnds1, 2120 gimple stmt, tree vec_dest, 2121 gimple_stmt_iterator *gsi, 2122 enum tree_code code1, 2123 enum tree_code code2, tree decl1, 2124 tree decl2, int op_type) 2125 { 2126 int i; 2127 tree vop0, vop1, new_tmp1, new_tmp2; 2128 gimple new_stmt1, new_stmt2; 2129 VEC (tree, heap) *vec_tmp = NULL; 2130 2131 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2); 2132 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0) 2133 { 2134 if (op_type == binary_op) 2135 vop1 = VEC_index (tree, *vec_oprnds1, i); 2136 else 2137 vop1 = NULL_TREE; 2138 2139 /* Generate the two halves of promotion operation. */ 2140 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, 2141 op_type, vec_dest, gsi, stmt); 2142 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, 2143 op_type, vec_dest, gsi, stmt); 2144 if (is_gimple_call (new_stmt1)) 2145 { 2146 new_tmp1 = gimple_call_lhs (new_stmt1); 2147 new_tmp2 = gimple_call_lhs (new_stmt2); 2148 } 2149 else 2150 { 2151 new_tmp1 = gimple_assign_lhs (new_stmt1); 2152 new_tmp2 = gimple_assign_lhs (new_stmt2); 2153 } 2154 2155 /* Store the results for the next step. */ 2156 VEC_quick_push (tree, vec_tmp, new_tmp1); 2157 VEC_quick_push (tree, vec_tmp, new_tmp2); 2158 } 2159 2160 VEC_free (tree, heap, *vec_oprnds0); 2161 *vec_oprnds0 = vec_tmp; 2162 } 2163 2164 2165 /* Check if STMT performs a conversion operation, that can be vectorized. 2166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2167 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 2168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2169 2170 static bool 2171 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, 2172 gimple *vec_stmt, slp_tree slp_node) 2173 { 2174 tree vec_dest; 2175 tree scalar_dest; 2176 tree op0, op1 = NULL_TREE; 2177 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 2178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2180 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 2181 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 2182 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 2183 tree new_temp; 2184 tree def; 2185 gimple def_stmt; 2186 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2187 gimple new_stmt = NULL; 2188 stmt_vec_info prev_stmt_info; 2189 int nunits_in; 2190 int nunits_out; 2191 tree vectype_out, vectype_in; 2192 int ncopies, i, j; 2193 tree lhs_type, rhs_type; 2194 enum { NARROW, NONE, WIDEN } modifier; 2195 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 2196 tree vop0; 2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2198 int multi_step_cvt = 0; 2199 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL; 2200 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE; 2201 int op_type; 2202 enum machine_mode rhs_mode; 2203 unsigned short fltsz; 2204 2205 /* Is STMT a vectorizable conversion? */ 2206 2207 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2208 return false; 2209 2210 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2211 return false; 2212 2213 if (!is_gimple_assign (stmt)) 2214 return false; 2215 2216 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2217 return false; 2218 2219 code = gimple_assign_rhs_code (stmt); 2220 if (!CONVERT_EXPR_CODE_P (code) 2221 && code != FIX_TRUNC_EXPR 2222 && code != FLOAT_EXPR 2223 && code != WIDEN_MULT_EXPR 2224 && code != WIDEN_LSHIFT_EXPR) 2225 return false; 2226 2227 op_type = TREE_CODE_LENGTH (code); 2228 2229 /* Check types of lhs and rhs. */ 2230 scalar_dest = gimple_assign_lhs (stmt); 2231 lhs_type = TREE_TYPE (scalar_dest); 2232 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 2233 2234 op0 = gimple_assign_rhs1 (stmt); 2235 rhs_type = TREE_TYPE (op0); 2236 2237 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 2238 && !((INTEGRAL_TYPE_P (lhs_type) 2239 && INTEGRAL_TYPE_P (rhs_type)) 2240 || (SCALAR_FLOAT_TYPE_P (lhs_type) 2241 && SCALAR_FLOAT_TYPE_P (rhs_type)))) 2242 return false; 2243 2244 if ((INTEGRAL_TYPE_P (lhs_type) 2245 && (TYPE_PRECISION (lhs_type) 2246 != GET_MODE_PRECISION (TYPE_MODE (lhs_type)))) 2247 || (INTEGRAL_TYPE_P (rhs_type) 2248 && (TYPE_PRECISION (rhs_type) 2249 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))) 2250 { 2251 if (vect_print_dump_info (REPORT_DETAILS)) 2252 fprintf (vect_dump, 2253 "type conversion to/from bit-precision unsupported."); 2254 return false; 2255 } 2256 2257 /* Check the operands of the operation. */ 2258 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 2259 &def_stmt, &def, &dt[0], &vectype_in)) 2260 { 2261 if (vect_print_dump_info (REPORT_DETAILS)) 2262 fprintf (vect_dump, "use not simple."); 2263 return false; 2264 } 2265 if (op_type == binary_op) 2266 { 2267 bool ok; 2268 2269 op1 = gimple_assign_rhs2 (stmt); 2270 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); 2271 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 2272 OP1. */ 2273 if (CONSTANT_CLASS_P (op0)) 2274 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, NULL, 2275 &def_stmt, &def, &dt[1], &vectype_in); 2276 else 2277 ok = vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &def_stmt, 2278 &def, &dt[1]); 2279 2280 if (!ok) 2281 { 2282 if (vect_print_dump_info (REPORT_DETAILS)) 2283 fprintf (vect_dump, "use not simple."); 2284 return false; 2285 } 2286 } 2287 2288 /* If op0 is an external or constant defs use a vector type of 2289 the same size as the output vector type. */ 2290 if (!vectype_in) 2291 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 2292 if (vec_stmt) 2293 gcc_assert (vectype_in); 2294 if (!vectype_in) 2295 { 2296 if (vect_print_dump_info (REPORT_DETAILS)) 2297 { 2298 fprintf (vect_dump, "no vectype for scalar type "); 2299 print_generic_expr (vect_dump, rhs_type, TDF_SLIM); 2300 } 2301 2302 return false; 2303 } 2304 2305 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 2306 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2307 if (nunits_in < nunits_out) 2308 modifier = NARROW; 2309 else if (nunits_out == nunits_in) 2310 modifier = NONE; 2311 else 2312 modifier = WIDEN; 2313 2314 /* Multiple types in SLP are handled by creating the appropriate number of 2315 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2316 case of SLP. */ 2317 if (slp_node || PURE_SLP_STMT (stmt_info)) 2318 ncopies = 1; 2319 else if (modifier == NARROW) 2320 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 2321 else 2322 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2323 2324 /* Sanity check: make sure that at least one copy of the vectorized stmt 2325 needs to be generated. */ 2326 gcc_assert (ncopies >= 1); 2327 2328 /* Supportable by target? */ 2329 switch (modifier) 2330 { 2331 case NONE: 2332 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 2333 return false; 2334 if (supportable_convert_operation (code, vectype_out, vectype_in, 2335 &decl1, &code1)) 2336 break; 2337 /* FALLTHRU */ 2338 unsupported: 2339 if (vect_print_dump_info (REPORT_DETAILS)) 2340 fprintf (vect_dump, "conversion not supported by target."); 2341 return false; 2342 2343 case WIDEN: 2344 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in, 2345 &decl1, &decl2, &code1, &code2, 2346 &multi_step_cvt, &interm_types)) 2347 { 2348 /* Binary widening operation can only be supported directly by the 2349 architecture. */ 2350 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 2351 break; 2352 } 2353 2354 if (code != FLOAT_EXPR 2355 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 2356 <= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 2357 goto unsupported; 2358 2359 rhs_mode = TYPE_MODE (rhs_type); 2360 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type)); 2361 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type)); 2362 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz; 2363 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode)) 2364 { 2365 cvt_type 2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 2367 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 2368 if (cvt_type == NULL_TREE) 2369 goto unsupported; 2370 2371 if (GET_MODE_SIZE (rhs_mode) == fltsz) 2372 { 2373 if (!supportable_convert_operation (code, vectype_out, 2374 cvt_type, &decl1, &codecvt1)) 2375 goto unsupported; 2376 } 2377 else if (!supportable_widening_operation (code, stmt, vectype_out, 2378 cvt_type, &decl1, &decl2, 2379 &codecvt1, &codecvt2, 2380 &multi_step_cvt, 2381 &interm_types)) 2382 continue; 2383 else 2384 gcc_assert (multi_step_cvt == 0); 2385 2386 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type, 2387 vectype_in, NULL, NULL, &code1, 2388 &code2, &multi_step_cvt, 2389 &interm_types)) 2390 break; 2391 } 2392 2393 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz) 2394 goto unsupported; 2395 2396 if (GET_MODE_SIZE (rhs_mode) == fltsz) 2397 codecvt2 = ERROR_MARK; 2398 else 2399 { 2400 multi_step_cvt++; 2401 VEC_safe_push (tree, heap, interm_types, cvt_type); 2402 cvt_type = NULL_TREE; 2403 } 2404 break; 2405 2406 case NARROW: 2407 gcc_assert (op_type == unary_op); 2408 if (supportable_narrowing_operation (code, vectype_out, vectype_in, 2409 &code1, &multi_step_cvt, 2410 &interm_types)) 2411 break; 2412 2413 if (code != FIX_TRUNC_EXPR 2414 || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) 2415 >= GET_MODE_SIZE (TYPE_MODE (rhs_type)))) 2416 goto unsupported; 2417 2418 rhs_mode = TYPE_MODE (rhs_type); 2419 cvt_type 2420 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 2421 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 2422 if (cvt_type == NULL_TREE) 2423 goto unsupported; 2424 if (!supportable_convert_operation (code, cvt_type, vectype_in, 2425 &decl1, &codecvt1)) 2426 goto unsupported; 2427 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 2428 &code1, &multi_step_cvt, 2429 &interm_types)) 2430 break; 2431 goto unsupported; 2432 2433 default: 2434 gcc_unreachable (); 2435 } 2436 2437 if (!vec_stmt) /* transformation not required. */ 2438 { 2439 if (vect_print_dump_info (REPORT_DETAILS)) 2440 fprintf (vect_dump, "=== vectorizable_conversion ==="); 2441 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) 2442 { 2443 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 2444 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 2445 } 2446 else if (modifier == NARROW) 2447 { 2448 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 2449 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 2450 } 2451 else 2452 { 2453 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 2454 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 2455 } 2456 VEC_free (tree, heap, interm_types); 2457 return true; 2458 } 2459 2460 /** Transform. **/ 2461 if (vect_print_dump_info (REPORT_DETAILS)) 2462 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies); 2463 2464 if (op_type == binary_op) 2465 { 2466 if (CONSTANT_CLASS_P (op0)) 2467 op0 = fold_convert (TREE_TYPE (op1), op0); 2468 else if (CONSTANT_CLASS_P (op1)) 2469 op1 = fold_convert (TREE_TYPE (op0), op1); 2470 } 2471 2472 /* In case of multi-step conversion, we first generate conversion operations 2473 to the intermediate types, and then from that types to the final one. 2474 We create vector destinations for the intermediate type (TYPES) received 2475 from supportable_*_operation, and store them in the correct order 2476 for future use in vect_create_vectorized_*_stmts (). */ 2477 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1); 2478 vec_dest = vect_create_destination_var (scalar_dest, 2479 (cvt_type && modifier == WIDEN) 2480 ? cvt_type : vectype_out); 2481 VEC_quick_push (tree, vec_dsts, vec_dest); 2482 2483 if (multi_step_cvt) 2484 { 2485 for (i = VEC_length (tree, interm_types) - 1; 2486 VEC_iterate (tree, interm_types, i, intermediate_type); i--) 2487 { 2488 vec_dest = vect_create_destination_var (scalar_dest, 2489 intermediate_type); 2490 VEC_quick_push (tree, vec_dsts, vec_dest); 2491 } 2492 } 2493 2494 if (cvt_type) 2495 vec_dest = vect_create_destination_var (scalar_dest, 2496 modifier == WIDEN 2497 ? vectype_out : cvt_type); 2498 2499 if (!slp_node) 2500 { 2501 if (modifier == NONE) 2502 vec_oprnds0 = VEC_alloc (tree, heap, 1); 2503 else if (modifier == WIDEN) 2504 { 2505 vec_oprnds0 = VEC_alloc (tree, heap, 2506 (multi_step_cvt 2507 ? vect_pow2 (multi_step_cvt) : 1)); 2508 if (op_type == binary_op) 2509 vec_oprnds1 = VEC_alloc (tree, heap, 1); 2510 } 2511 else 2512 vec_oprnds0 = VEC_alloc (tree, heap, 2513 2 * (multi_step_cvt 2514 ? vect_pow2 (multi_step_cvt) : 1)); 2515 } 2516 else if (code == WIDEN_LSHIFT_EXPR) 2517 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); 2518 2519 last_oprnd = op0; 2520 prev_stmt_info = NULL; 2521 switch (modifier) 2522 { 2523 case NONE: 2524 for (j = 0; j < ncopies; j++) 2525 { 2526 if (j == 0) 2527 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, 2528 -1); 2529 else 2530 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 2531 2532 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) 2533 { 2534 /* Arguments are ready, create the new vector stmt. */ 2535 if (code1 == CALL_EXPR) 2536 { 2537 new_stmt = gimple_build_call (decl1, 1, vop0); 2538 new_temp = make_ssa_name (vec_dest, new_stmt); 2539 gimple_call_set_lhs (new_stmt, new_temp); 2540 } 2541 else 2542 { 2543 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 2544 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, 2545 vop0, NULL); 2546 new_temp = make_ssa_name (vec_dest, new_stmt); 2547 gimple_assign_set_lhs (new_stmt, new_temp); 2548 } 2549 2550 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2551 if (slp_node) 2552 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), 2553 new_stmt); 2554 } 2555 2556 if (j == 0) 2557 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2558 else 2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2560 prev_stmt_info = vinfo_for_stmt (new_stmt); 2561 } 2562 break; 2563 2564 case WIDEN: 2565 /* In case the vectorization factor (VF) is bigger than the number 2566 of elements that we can fit in a vectype (nunits), we have to 2567 generate more than one vector stmt - i.e - we need to "unroll" 2568 the vector stmt by a factor VF/nunits. */ 2569 for (j = 0; j < ncopies; j++) 2570 { 2571 /* Handle uses. */ 2572 if (j == 0) 2573 { 2574 if (slp_node) 2575 { 2576 if (code == WIDEN_LSHIFT_EXPR) 2577 { 2578 unsigned int k; 2579 2580 vec_oprnd1 = op1; 2581 /* Store vec_oprnd1 for every vector stmt to be created 2582 for SLP_NODE. We check during the analysis that all 2583 the shift arguments are the same. */ 2584 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 2585 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 2586 2587 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2588 slp_node, -1); 2589 } 2590 else 2591 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, 2592 &vec_oprnds1, slp_node, -1); 2593 } 2594 else 2595 { 2596 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 2597 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); 2598 if (op_type == binary_op) 2599 { 2600 if (code == WIDEN_LSHIFT_EXPR) 2601 vec_oprnd1 = op1; 2602 else 2603 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, 2604 NULL); 2605 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 2606 } 2607 } 2608 } 2609 else 2610 { 2611 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 2612 VEC_truncate (tree, vec_oprnds0, 0); 2613 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); 2614 if (op_type == binary_op) 2615 { 2616 if (code == WIDEN_LSHIFT_EXPR) 2617 vec_oprnd1 = op1; 2618 else 2619 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], 2620 vec_oprnd1); 2621 VEC_truncate (tree, vec_oprnds1, 0); 2622 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 2623 } 2624 } 2625 2626 /* Arguments are ready. Create the new vector stmts. */ 2627 for (i = multi_step_cvt; i >= 0; i--) 2628 { 2629 tree this_dest = VEC_index (tree, vec_dsts, i); 2630 enum tree_code c1 = code1, c2 = code2; 2631 if (i == 0 && codecvt2 != ERROR_MARK) 2632 { 2633 c1 = codecvt1; 2634 c2 = codecvt2; 2635 } 2636 vect_create_vectorized_promotion_stmts (&vec_oprnds0, 2637 &vec_oprnds1, 2638 stmt, this_dest, gsi, 2639 c1, c2, decl1, decl2, 2640 op_type); 2641 } 2642 2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) 2644 { 2645 if (cvt_type) 2646 { 2647 if (codecvt1 == CALL_EXPR) 2648 { 2649 new_stmt = gimple_build_call (decl1, 1, vop0); 2650 new_temp = make_ssa_name (vec_dest, new_stmt); 2651 gimple_call_set_lhs (new_stmt, new_temp); 2652 } 2653 else 2654 { 2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 2656 new_temp = make_ssa_name (vec_dest, NULL); 2657 new_stmt = gimple_build_assign_with_ops (codecvt1, 2658 new_temp, 2659 vop0, NULL); 2660 } 2661 2662 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2663 } 2664 else 2665 new_stmt = SSA_NAME_DEF_STMT (vop0); 2666 2667 if (slp_node) 2668 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), 2669 new_stmt); 2670 else 2671 { 2672 if (!prev_stmt_info) 2673 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2674 else 2675 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2676 prev_stmt_info = vinfo_for_stmt (new_stmt); 2677 } 2678 } 2679 } 2680 2681 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2682 break; 2683 2684 case NARROW: 2685 /* In case the vectorization factor (VF) is bigger than the number 2686 of elements that we can fit in a vectype (nunits), we have to 2687 generate more than one vector stmt - i.e - we need to "unroll" 2688 the vector stmt by a factor VF/nunits. */ 2689 for (j = 0; j < ncopies; j++) 2690 { 2691 /* Handle uses. */ 2692 if (slp_node) 2693 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2694 slp_node, -1); 2695 else 2696 { 2697 VEC_truncate (tree, vec_oprnds0, 0); 2698 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 2699 vect_pow2 (multi_step_cvt) - 1); 2700 } 2701 2702 /* Arguments are ready. Create the new vector stmts. */ 2703 if (cvt_type) 2704 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) 2705 { 2706 if (codecvt1 == CALL_EXPR) 2707 { 2708 new_stmt = gimple_build_call (decl1, 1, vop0); 2709 new_temp = make_ssa_name (vec_dest, new_stmt); 2710 gimple_call_set_lhs (new_stmt, new_temp); 2711 } 2712 else 2713 { 2714 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 2715 new_temp = make_ssa_name (vec_dest, NULL); 2716 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp, 2717 vop0, NULL); 2718 } 2719 2720 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2721 VEC_replace (tree, vec_oprnds0, i, new_temp); 2722 } 2723 2724 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt, 2725 stmt, vec_dsts, gsi, 2726 slp_node, code1, 2727 &prev_stmt_info); 2728 } 2729 2730 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2731 break; 2732 } 2733 2734 VEC_free (tree, heap, vec_oprnds0); 2735 VEC_free (tree, heap, vec_oprnds1); 2736 VEC_free (tree, heap, vec_dsts); 2737 VEC_free (tree, heap, interm_types); 2738 2739 return true; 2740 } 2741 2742 2743 /* Function vectorizable_assignment. 2744 2745 Check if STMT performs an assignment (copy) that can be vectorized. 2746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2747 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2749 2750 static bool 2751 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, 2752 gimple *vec_stmt, slp_tree slp_node) 2753 { 2754 tree vec_dest; 2755 tree scalar_dest; 2756 tree op; 2757 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2758 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2759 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2760 tree new_temp; 2761 tree def; 2762 gimple def_stmt; 2763 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2764 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); 2765 int ncopies; 2766 int i, j; 2767 VEC(tree,heap) *vec_oprnds = NULL; 2768 tree vop; 2769 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2770 gimple new_stmt = NULL; 2771 stmt_vec_info prev_stmt_info = NULL; 2772 enum tree_code code; 2773 tree vectype_in; 2774 2775 /* Multiple types in SLP are handled by creating the appropriate number of 2776 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2777 case of SLP. */ 2778 if (slp_node || PURE_SLP_STMT (stmt_info)) 2779 ncopies = 1; 2780 else 2781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 2782 2783 gcc_assert (ncopies >= 1); 2784 2785 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2786 return false; 2787 2788 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2789 return false; 2790 2791 /* Is vectorizable assignment? */ 2792 if (!is_gimple_assign (stmt)) 2793 return false; 2794 2795 scalar_dest = gimple_assign_lhs (stmt); 2796 if (TREE_CODE (scalar_dest) != SSA_NAME) 2797 return false; 2798 2799 code = gimple_assign_rhs_code (stmt); 2800 if (gimple_assign_single_p (stmt) 2801 || code == PAREN_EXPR 2802 || CONVERT_EXPR_CODE_P (code)) 2803 op = gimple_assign_rhs1 (stmt); 2804 else 2805 return false; 2806 2807 if (code == VIEW_CONVERT_EXPR) 2808 op = TREE_OPERAND (op, 0); 2809 2810 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo, 2811 &def_stmt, &def, &dt[0], &vectype_in)) 2812 { 2813 if (vect_print_dump_info (REPORT_DETAILS)) 2814 fprintf (vect_dump, "use not simple."); 2815 return false; 2816 } 2817 2818 /* We can handle NOP_EXPR conversions that do not change the number 2819 of elements or the vector size. */ 2820 if ((CONVERT_EXPR_CODE_P (code) 2821 || code == VIEW_CONVERT_EXPR) 2822 && (!vectype_in 2823 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits 2824 || (GET_MODE_SIZE (TYPE_MODE (vectype)) 2825 != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 2826 return false; 2827 2828 /* We do not handle bit-precision changes. */ 2829 if ((CONVERT_EXPR_CODE_P (code) 2830 || code == VIEW_CONVERT_EXPR) 2831 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 2832 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 2833 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 2834 || ((TYPE_PRECISION (TREE_TYPE (op)) 2835 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))) 2836 /* But a conversion that does not change the bit-pattern is ok. */ 2837 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 2838 > TYPE_PRECISION (TREE_TYPE (op))) 2839 && TYPE_UNSIGNED (TREE_TYPE (op)))) 2840 { 2841 if (vect_print_dump_info (REPORT_DETAILS)) 2842 fprintf (vect_dump, "type conversion to/from bit-precision " 2843 "unsupported."); 2844 return false; 2845 } 2846 2847 if (!vec_stmt) /* transformation not required. */ 2848 { 2849 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 2850 if (vect_print_dump_info (REPORT_DETAILS)) 2851 fprintf (vect_dump, "=== vectorizable_assignment ==="); 2852 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 2853 return true; 2854 } 2855 2856 /** Transform. **/ 2857 if (vect_print_dump_info (REPORT_DETAILS)) 2858 fprintf (vect_dump, "transform assignment."); 2859 2860 /* Handle def. */ 2861 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2862 2863 /* Handle use. */ 2864 for (j = 0; j < ncopies; j++) 2865 { 2866 /* Handle uses. */ 2867 if (j == 0) 2868 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); 2869 else 2870 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 2871 2872 /* Arguments are ready. create the new vector stmt. */ 2873 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop) 2874 { 2875 if (CONVERT_EXPR_CODE_P (code) 2876 || code == VIEW_CONVERT_EXPR) 2877 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 2878 new_stmt = gimple_build_assign (vec_dest, vop); 2879 new_temp = make_ssa_name (vec_dest, new_stmt); 2880 gimple_assign_set_lhs (new_stmt, new_temp); 2881 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2882 if (slp_node) 2883 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2884 } 2885 2886 if (slp_node) 2887 continue; 2888 2889 if (j == 0) 2890 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2891 else 2892 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2893 2894 prev_stmt_info = vinfo_for_stmt (new_stmt); 2895 } 2896 2897 VEC_free (tree, heap, vec_oprnds); 2898 return true; 2899 } 2900 2901 2902 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 2903 either as shift by a scalar or by a vector. */ 2904 2905 bool 2906 vect_supportable_shift (enum tree_code code, tree scalar_type) 2907 { 2908 2909 enum machine_mode vec_mode; 2910 optab optab; 2911 int icode; 2912 tree vectype; 2913 2914 vectype = get_vectype_for_scalar_type (scalar_type); 2915 if (!vectype) 2916 return false; 2917 2918 optab = optab_for_tree_code (code, vectype, optab_scalar); 2919 if (!optab 2920 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 2921 { 2922 optab = optab_for_tree_code (code, vectype, optab_vector); 2923 if (!optab 2924 || (optab_handler (optab, TYPE_MODE (vectype)) 2925 == CODE_FOR_nothing)) 2926 return false; 2927 } 2928 2929 vec_mode = TYPE_MODE (vectype); 2930 icode = (int) optab_handler (optab, vec_mode); 2931 if (icode == CODE_FOR_nothing) 2932 return false; 2933 2934 return true; 2935 } 2936 2937 2938 /* Function vectorizable_shift. 2939 2940 Check if STMT performs a shift operation that can be vectorized. 2941 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2942 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2943 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2944 2945 static bool 2946 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, 2947 gimple *vec_stmt, slp_tree slp_node) 2948 { 2949 tree vec_dest; 2950 tree scalar_dest; 2951 tree op0, op1 = NULL; 2952 tree vec_oprnd1 = NULL_TREE; 2953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2954 tree vectype; 2955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2956 enum tree_code code; 2957 enum machine_mode vec_mode; 2958 tree new_temp; 2959 optab optab; 2960 int icode; 2961 enum machine_mode optab_op2_mode; 2962 tree def; 2963 gimple def_stmt; 2964 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2965 gimple new_stmt = NULL; 2966 stmt_vec_info prev_stmt_info; 2967 int nunits_in; 2968 int nunits_out; 2969 tree vectype_out; 2970 tree op1_vectype; 2971 int ncopies; 2972 int j, i; 2973 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 2974 tree vop0, vop1; 2975 unsigned int k; 2976 bool scalar_shift_arg = true; 2977 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2978 int vf; 2979 2980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2981 return false; 2982 2983 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2984 return false; 2985 2986 /* Is STMT a vectorizable binary/unary operation? */ 2987 if (!is_gimple_assign (stmt)) 2988 return false; 2989 2990 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2991 return false; 2992 2993 code = gimple_assign_rhs_code (stmt); 2994 2995 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 2996 || code == RROTATE_EXPR)) 2997 return false; 2998 2999 scalar_dest = gimple_assign_lhs (stmt); 3000 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3001 if (TYPE_PRECISION (TREE_TYPE (scalar_dest)) 3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 3003 { 3004 if (vect_print_dump_info (REPORT_DETAILS)) 3005 fprintf (vect_dump, "bit-precision shifts not supported."); 3006 return false; 3007 } 3008 3009 op0 = gimple_assign_rhs1 (stmt); 3010 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 3011 &def_stmt, &def, &dt[0], &vectype)) 3012 { 3013 if (vect_print_dump_info (REPORT_DETAILS)) 3014 fprintf (vect_dump, "use not simple."); 3015 return false; 3016 } 3017 /* If op0 is an external or constant def use a vector type with 3018 the same size as the output vector type. */ 3019 if (!vectype) 3020 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 3021 if (vec_stmt) 3022 gcc_assert (vectype); 3023 if (!vectype) 3024 { 3025 if (vect_print_dump_info (REPORT_DETAILS)) 3026 { 3027 fprintf (vect_dump, "no vectype for scalar type "); 3028 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM); 3029 } 3030 3031 return false; 3032 } 3033 3034 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3035 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 3036 if (nunits_out != nunits_in) 3037 return false; 3038 3039 op1 = gimple_assign_rhs2 (stmt); 3040 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3041 &def, &dt[1], &op1_vectype)) 3042 { 3043 if (vect_print_dump_info (REPORT_DETAILS)) 3044 fprintf (vect_dump, "use not simple."); 3045 return false; 3046 } 3047 3048 if (loop_vinfo) 3049 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3050 else 3051 vf = 1; 3052 3053 /* Multiple types in SLP are handled by creating the appropriate number of 3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3055 case of SLP. */ 3056 if (slp_node || PURE_SLP_STMT (stmt_info)) 3057 ncopies = 1; 3058 else 3059 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3060 3061 gcc_assert (ncopies >= 1); 3062 3063 /* Determine whether the shift amount is a vector, or scalar. If the 3064 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 3065 3066 if (dt[1] == vect_internal_def && !slp_node) 3067 scalar_shift_arg = false; 3068 else if (dt[1] == vect_constant_def 3069 || dt[1] == vect_external_def 3070 || dt[1] == vect_internal_def) 3071 { 3072 /* In SLP, need to check whether the shift count is the same, 3073 in loops if it is a constant or invariant, it is always 3074 a scalar shift. */ 3075 if (slp_node) 3076 { 3077 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); 3078 gimple slpstmt; 3079 3080 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt) 3081 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 3082 scalar_shift_arg = false; 3083 } 3084 } 3085 else 3086 { 3087 if (vect_print_dump_info (REPORT_DETAILS)) 3088 fprintf (vect_dump, "operand mode requires invariant argument."); 3089 return false; 3090 } 3091 3092 /* Vector shifted by vector. */ 3093 if (!scalar_shift_arg) 3094 { 3095 optab = optab_for_tree_code (code, vectype, optab_vector); 3096 if (vect_print_dump_info (REPORT_DETAILS)) 3097 fprintf (vect_dump, "vector/vector shift/rotate found."); 3098 if (!op1_vectype) 3099 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); 3100 if (op1_vectype == NULL_TREE 3101 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) 3102 { 3103 if (vect_print_dump_info (REPORT_DETAILS)) 3104 fprintf (vect_dump, "unusable type for last operand in" 3105 " vector/vector shift/rotate."); 3106 return false; 3107 } 3108 } 3109 /* See if the machine has a vector shifted by scalar insn and if not 3110 then see if it has a vector shifted by vector insn. */ 3111 else 3112 { 3113 optab = optab_for_tree_code (code, vectype, optab_scalar); 3114 if (optab 3115 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 3116 { 3117 if (vect_print_dump_info (REPORT_DETAILS)) 3118 fprintf (vect_dump, "vector/scalar shift/rotate found."); 3119 } 3120 else 3121 { 3122 optab = optab_for_tree_code (code, vectype, optab_vector); 3123 if (optab 3124 && (optab_handler (optab, TYPE_MODE (vectype)) 3125 != CODE_FOR_nothing)) 3126 { 3127 scalar_shift_arg = false; 3128 3129 if (vect_print_dump_info (REPORT_DETAILS)) 3130 fprintf (vect_dump, "vector/vector shift/rotate found."); 3131 3132 /* Unlike the other binary operators, shifts/rotates have 3133 the rhs being int, instead of the same type as the lhs, 3134 so make sure the scalar is the right type if we are 3135 dealing with vectors of long long/long/short/char. */ 3136 if (dt[1] == vect_constant_def) 3137 op1 = fold_convert (TREE_TYPE (vectype), op1); 3138 else if (!useless_type_conversion_p (TREE_TYPE (vectype), 3139 TREE_TYPE (op1))) 3140 { 3141 if (slp_node 3142 && TYPE_MODE (TREE_TYPE (vectype)) 3143 != TYPE_MODE (TREE_TYPE (op1))) 3144 { 3145 if (vect_print_dump_info (REPORT_DETAILS)) 3146 fprintf (vect_dump, "unusable type for last operand in" 3147 " vector/vector shift/rotate."); 3148 return false; 3149 } 3150 if (vec_stmt && !slp_node) 3151 { 3152 op1 = fold_convert (TREE_TYPE (vectype), op1); 3153 op1 = vect_init_vector (stmt, op1, 3154 TREE_TYPE (vectype), NULL); 3155 } 3156 } 3157 } 3158 } 3159 } 3160 3161 /* Supportable by target? */ 3162 if (!optab) 3163 { 3164 if (vect_print_dump_info (REPORT_DETAILS)) 3165 fprintf (vect_dump, "no optab."); 3166 return false; 3167 } 3168 vec_mode = TYPE_MODE (vectype); 3169 icode = (int) optab_handler (optab, vec_mode); 3170 if (icode == CODE_FOR_nothing) 3171 { 3172 if (vect_print_dump_info (REPORT_DETAILS)) 3173 fprintf (vect_dump, "op not supported by target."); 3174 /* Check only during analysis. */ 3175 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 3176 || (vf < vect_min_worthwhile_factor (code) 3177 && !vec_stmt)) 3178 return false; 3179 if (vect_print_dump_info (REPORT_DETAILS)) 3180 fprintf (vect_dump, "proceeding using word mode."); 3181 } 3182 3183 /* Worthwhile without SIMD support? Check only during analysis. */ 3184 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 3185 && vf < vect_min_worthwhile_factor (code) 3186 && !vec_stmt) 3187 { 3188 if (vect_print_dump_info (REPORT_DETAILS)) 3189 fprintf (vect_dump, "not worthwhile without SIMD support."); 3190 return false; 3191 } 3192 3193 if (!vec_stmt) /* transformation not required. */ 3194 { 3195 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 3196 if (vect_print_dump_info (REPORT_DETAILS)) 3197 fprintf (vect_dump, "=== vectorizable_shift ==="); 3198 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 3199 return true; 3200 } 3201 3202 /** Transform. **/ 3203 3204 if (vect_print_dump_info (REPORT_DETAILS)) 3205 fprintf (vect_dump, "transform binary/unary operation."); 3206 3207 /* Handle def. */ 3208 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3209 3210 /* Allocate VECs for vector operands. In case of SLP, vector operands are 3211 created in the previous stages of the recursion, so no allocation is 3212 needed, except for the case of shift with scalar shift argument. In that 3213 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to 3214 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. 3215 In case of loop-based vectorization we allocate VECs of size 1. We 3216 allocate VEC_OPRNDS1 only in case of binary operation. */ 3217 if (!slp_node) 3218 { 3219 vec_oprnds0 = VEC_alloc (tree, heap, 1); 3220 vec_oprnds1 = VEC_alloc (tree, heap, 1); 3221 } 3222 else if (scalar_shift_arg) 3223 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); 3224 3225 prev_stmt_info = NULL; 3226 for (j = 0; j < ncopies; j++) 3227 { 3228 /* Handle uses. */ 3229 if (j == 0) 3230 { 3231 if (scalar_shift_arg) 3232 { 3233 /* Vector shl and shr insn patterns can be defined with scalar 3234 operand 2 (shift operand). In this case, use constant or loop 3235 invariant op1 directly, without extending it to vector mode 3236 first. */ 3237 optab_op2_mode = insn_data[icode].operand[2].mode; 3238 if (!VECTOR_MODE_P (optab_op2_mode)) 3239 { 3240 if (vect_print_dump_info (REPORT_DETAILS)) 3241 fprintf (vect_dump, "operand 1 using scalar mode."); 3242 vec_oprnd1 = op1; 3243 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 3244 if (slp_node) 3245 { 3246 /* Store vec_oprnd1 for every vector stmt to be created 3247 for SLP_NODE. We check during the analysis that all 3248 the shift arguments are the same. 3249 TODO: Allow different constants for different vector 3250 stmts generated for an SLP instance. */ 3251 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 3252 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 3253 } 3254 } 3255 } 3256 3257 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 3258 (a special case for certain kind of vector shifts); otherwise, 3259 operand 1 should be of a vector type (the usual case). */ 3260 if (vec_oprnd1) 3261 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3262 slp_node, -1); 3263 else 3264 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 3265 slp_node, -1); 3266 } 3267 else 3268 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 3269 3270 /* Arguments are ready. Create the new vector stmt. */ 3271 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) 3272 { 3273 vop1 = VEC_index (tree, vec_oprnds1, i); 3274 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 3275 new_temp = make_ssa_name (vec_dest, new_stmt); 3276 gimple_assign_set_lhs (new_stmt, new_temp); 3277 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3278 if (slp_node) 3279 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 3280 } 3281 3282 if (slp_node) 3283 continue; 3284 3285 if (j == 0) 3286 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3287 else 3288 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3289 prev_stmt_info = vinfo_for_stmt (new_stmt); 3290 } 3291 3292 VEC_free (tree, heap, vec_oprnds0); 3293 VEC_free (tree, heap, vec_oprnds1); 3294 3295 return true; 3296 } 3297 3298 3299 /* Function vectorizable_operation. 3300 3301 Check if STMT performs a binary, unary or ternary operation that can 3302 be vectorized. 3303 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3304 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3305 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3306 3307 static bool 3308 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, 3309 gimple *vec_stmt, slp_tree slp_node) 3310 { 3311 tree vec_dest; 3312 tree scalar_dest; 3313 tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 3314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3315 tree vectype; 3316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3317 enum tree_code code; 3318 enum machine_mode vec_mode; 3319 tree new_temp; 3320 int op_type; 3321 optab optab; 3322 int icode; 3323 tree def; 3324 gimple def_stmt; 3325 enum vect_def_type dt[3] 3326 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 3327 gimple new_stmt = NULL; 3328 stmt_vec_info prev_stmt_info; 3329 int nunits_in; 3330 int nunits_out; 3331 tree vectype_out; 3332 int ncopies; 3333 int j, i; 3334 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL; 3335 tree vop0, vop1, vop2; 3336 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3337 int vf; 3338 3339 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3340 return false; 3341 3342 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3343 return false; 3344 3345 /* Is STMT a vectorizable binary/unary operation? */ 3346 if (!is_gimple_assign (stmt)) 3347 return false; 3348 3349 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 3350 return false; 3351 3352 code = gimple_assign_rhs_code (stmt); 3353 3354 /* For pointer addition, we should use the normal plus for 3355 the vector addition. */ 3356 if (code == POINTER_PLUS_EXPR) 3357 code = PLUS_EXPR; 3358 3359 /* Support only unary or binary operations. */ 3360 op_type = TREE_CODE_LENGTH (code); 3361 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 3362 { 3363 if (vect_print_dump_info (REPORT_DETAILS)) 3364 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).", 3365 op_type); 3366 return false; 3367 } 3368 3369 scalar_dest = gimple_assign_lhs (stmt); 3370 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3371 3372 /* Most operations cannot handle bit-precision types without extra 3373 truncations. */ 3374 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 3375 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) 3376 /* Exception are bitwise binary operations. */ 3377 && code != BIT_IOR_EXPR 3378 && code != BIT_XOR_EXPR 3379 && code != BIT_AND_EXPR) 3380 { 3381 if (vect_print_dump_info (REPORT_DETAILS)) 3382 fprintf (vect_dump, "bit-precision arithmetic not supported."); 3383 return false; 3384 } 3385 3386 op0 = gimple_assign_rhs1 (stmt); 3387 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo, 3388 &def_stmt, &def, &dt[0], &vectype)) 3389 { 3390 if (vect_print_dump_info (REPORT_DETAILS)) 3391 fprintf (vect_dump, "use not simple."); 3392 return false; 3393 } 3394 /* If op0 is an external or constant def use a vector type with 3395 the same size as the output vector type. */ 3396 if (!vectype) 3397 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 3398 if (vec_stmt) 3399 gcc_assert (vectype); 3400 if (!vectype) 3401 { 3402 if (vect_print_dump_info (REPORT_DETAILS)) 3403 { 3404 fprintf (vect_dump, "no vectype for scalar type "); 3405 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM); 3406 } 3407 3408 return false; 3409 } 3410 3411 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3412 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 3413 if (nunits_out != nunits_in) 3414 return false; 3415 3416 if (op_type == binary_op || op_type == ternary_op) 3417 { 3418 op1 = gimple_assign_rhs2 (stmt); 3419 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3420 &def, &dt[1])) 3421 { 3422 if (vect_print_dump_info (REPORT_DETAILS)) 3423 fprintf (vect_dump, "use not simple."); 3424 return false; 3425 } 3426 } 3427 if (op_type == ternary_op) 3428 { 3429 op2 = gimple_assign_rhs3 (stmt); 3430 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3431 &def, &dt[2])) 3432 { 3433 if (vect_print_dump_info (REPORT_DETAILS)) 3434 fprintf (vect_dump, "use not simple."); 3435 return false; 3436 } 3437 } 3438 3439 if (loop_vinfo) 3440 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3441 else 3442 vf = 1; 3443 3444 /* Multiple types in SLP are handled by creating the appropriate number of 3445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3446 case of SLP. */ 3447 if (slp_node || PURE_SLP_STMT (stmt_info)) 3448 ncopies = 1; 3449 else 3450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 3451 3452 gcc_assert (ncopies >= 1); 3453 3454 /* Shifts are handled in vectorizable_shift (). */ 3455 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 3456 || code == RROTATE_EXPR) 3457 return false; 3458 3459 optab = optab_for_tree_code (code, vectype, optab_default); 3460 3461 /* Supportable by target? */ 3462 if (!optab) 3463 { 3464 if (vect_print_dump_info (REPORT_DETAILS)) 3465 fprintf (vect_dump, "no optab."); 3466 return false; 3467 } 3468 vec_mode = TYPE_MODE (vectype); 3469 icode = (int) optab_handler (optab, vec_mode); 3470 if (icode == CODE_FOR_nothing) 3471 { 3472 if (vect_print_dump_info (REPORT_DETAILS)) 3473 fprintf (vect_dump, "op not supported by target."); 3474 /* Check only during analysis. */ 3475 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 3476 || (vf < vect_min_worthwhile_factor (code) 3477 && !vec_stmt)) 3478 return false; 3479 if (vect_print_dump_info (REPORT_DETAILS)) 3480 fprintf (vect_dump, "proceeding using word mode."); 3481 } 3482 3483 /* Worthwhile without SIMD support? Check only during analysis. */ 3484 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 3485 && vf < vect_min_worthwhile_factor (code) 3486 && !vec_stmt) 3487 { 3488 if (vect_print_dump_info (REPORT_DETAILS)) 3489 fprintf (vect_dump, "not worthwhile without SIMD support."); 3490 return false; 3491 } 3492 3493 if (!vec_stmt) /* transformation not required. */ 3494 { 3495 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 3496 if (vect_print_dump_info (REPORT_DETAILS)) 3497 fprintf (vect_dump, "=== vectorizable_operation ==="); 3498 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 3499 return true; 3500 } 3501 3502 /** Transform. **/ 3503 3504 if (vect_print_dump_info (REPORT_DETAILS)) 3505 fprintf (vect_dump, "transform binary/unary operation."); 3506 3507 /* Handle def. */ 3508 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3509 3510 /* In case the vectorization factor (VF) is bigger than the number 3511 of elements that we can fit in a vectype (nunits), we have to generate 3512 more than one vector stmt - i.e - we need to "unroll" the 3513 vector stmt by a factor VF/nunits. In doing so, we record a pointer 3514 from one copy of the vector stmt to the next, in the field 3515 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 3516 stages to find the correct vector defs to be used when vectorizing 3517 stmts that use the defs of the current stmt. The example below 3518 illustrates the vectorization process when VF=16 and nunits=4 (i.e., 3519 we need to create 4 vectorized stmts): 3520 3521 before vectorization: 3522 RELATED_STMT VEC_STMT 3523 S1: x = memref - - 3524 S2: z = x + 1 - - 3525 3526 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 3527 there): 3528 RELATED_STMT VEC_STMT 3529 VS1_0: vx0 = memref0 VS1_1 - 3530 VS1_1: vx1 = memref1 VS1_2 - 3531 VS1_2: vx2 = memref2 VS1_3 - 3532 VS1_3: vx3 = memref3 - - 3533 S1: x = load - VS1_0 3534 S2: z = x + 1 - - 3535 3536 step2: vectorize stmt S2 (done here): 3537 To vectorize stmt S2 we first need to find the relevant vector 3538 def for the first operand 'x'. This is, as usual, obtained from 3539 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 3540 that defines 'x' (S1). This way we find the stmt VS1_0, and the 3541 relevant vector def 'vx0'. Having found 'vx0' we can generate 3542 the vector stmt VS2_0, and as usual, record it in the 3543 STMT_VINFO_VEC_STMT of stmt S2. 3544 When creating the second copy (VS2_1), we obtain the relevant vector 3545 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 3546 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 3547 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 3548 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 3549 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 3550 chain of stmts and pointers: 3551 RELATED_STMT VEC_STMT 3552 VS1_0: vx0 = memref0 VS1_1 - 3553 VS1_1: vx1 = memref1 VS1_2 - 3554 VS1_2: vx2 = memref2 VS1_3 - 3555 VS1_3: vx3 = memref3 - - 3556 S1: x = load - VS1_0 3557 VS2_0: vz0 = vx0 + v1 VS2_1 - 3558 VS2_1: vz1 = vx1 + v1 VS2_2 - 3559 VS2_2: vz2 = vx2 + v1 VS2_3 - 3560 VS2_3: vz3 = vx3 + v1 - - 3561 S2: z = x + 1 - VS2_0 */ 3562 3563 prev_stmt_info = NULL; 3564 for (j = 0; j < ncopies; j++) 3565 { 3566 /* Handle uses. */ 3567 if (j == 0) 3568 { 3569 if (op_type == binary_op || op_type == ternary_op) 3570 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 3571 slp_node, -1); 3572 else 3573 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 3574 slp_node, -1); 3575 if (op_type == ternary_op) 3576 { 3577 vec_oprnds2 = VEC_alloc (tree, heap, 1); 3578 VEC_quick_push (tree, vec_oprnds2, 3579 vect_get_vec_def_for_operand (op2, stmt, NULL)); 3580 } 3581 } 3582 else 3583 { 3584 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 3585 if (op_type == ternary_op) 3586 { 3587 tree vec_oprnd = VEC_pop (tree, vec_oprnds2); 3588 VEC_quick_push (tree, vec_oprnds2, 3589 vect_get_vec_def_for_stmt_copy (dt[2], 3590 vec_oprnd)); 3591 } 3592 } 3593 3594 /* Arguments are ready. Create the new vector stmt. */ 3595 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) 3596 { 3597 vop1 = ((op_type == binary_op || op_type == ternary_op) 3598 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); 3599 vop2 = ((op_type == ternary_op) 3600 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); 3601 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, 3602 vop0, vop1, vop2); 3603 new_temp = make_ssa_name (vec_dest, new_stmt); 3604 gimple_assign_set_lhs (new_stmt, new_temp); 3605 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3606 if (slp_node) 3607 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 3608 } 3609 3610 if (slp_node) 3611 continue; 3612 3613 if (j == 0) 3614 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3615 else 3616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3617 prev_stmt_info = vinfo_for_stmt (new_stmt); 3618 } 3619 3620 VEC_free (tree, heap, vec_oprnds0); 3621 if (vec_oprnds1) 3622 VEC_free (tree, heap, vec_oprnds1); 3623 if (vec_oprnds2) 3624 VEC_free (tree, heap, vec_oprnds2); 3625 3626 return true; 3627 } 3628 3629 3630 /* Function vectorizable_store. 3631 3632 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 3633 can be vectorized. 3634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3635 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3636 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3637 3638 static bool 3639 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 3640 slp_tree slp_node) 3641 { 3642 tree scalar_dest; 3643 tree data_ref; 3644 tree op; 3645 tree vec_oprnd = NULL_TREE; 3646 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 3648 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3649 tree elem_type; 3650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3651 struct loop *loop = NULL; 3652 enum machine_mode vec_mode; 3653 tree dummy; 3654 enum dr_alignment_support alignment_support_scheme; 3655 tree def; 3656 gimple def_stmt; 3657 enum vect_def_type dt; 3658 stmt_vec_info prev_stmt_info = NULL; 3659 tree dataref_ptr = NULL_TREE; 3660 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 3661 int ncopies; 3662 int j; 3663 gimple next_stmt, first_stmt = NULL; 3664 bool strided_store = false; 3665 bool store_lanes_p = false; 3666 unsigned int group_size, i; 3667 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; 3668 bool inv_p; 3669 VEC(tree,heap) *vec_oprnds = NULL; 3670 bool slp = (slp_node != NULL); 3671 unsigned int vec_num; 3672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3673 tree aggr_type; 3674 3675 if (loop_vinfo) 3676 loop = LOOP_VINFO_LOOP (loop_vinfo); 3677 3678 /* Multiple types in SLP are handled by creating the appropriate number of 3679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3680 case of SLP. */ 3681 if (slp || PURE_SLP_STMT (stmt_info)) 3682 ncopies = 1; 3683 else 3684 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 3685 3686 gcc_assert (ncopies >= 1); 3687 3688 /* FORNOW. This restriction should be relaxed. */ 3689 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1) 3690 { 3691 if (vect_print_dump_info (REPORT_DETAILS)) 3692 fprintf (vect_dump, "multiple types in nested loop."); 3693 return false; 3694 } 3695 3696 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3697 return false; 3698 3699 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3700 return false; 3701 3702 /* Is vectorizable store? */ 3703 3704 if (!is_gimple_assign (stmt)) 3705 return false; 3706 3707 scalar_dest = gimple_assign_lhs (stmt); 3708 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 3709 && is_pattern_stmt_p (stmt_info)) 3710 scalar_dest = TREE_OPERAND (scalar_dest, 0); 3711 if (TREE_CODE (scalar_dest) != ARRAY_REF 3712 && TREE_CODE (scalar_dest) != INDIRECT_REF 3713 && TREE_CODE (scalar_dest) != COMPONENT_REF 3714 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 3715 && TREE_CODE (scalar_dest) != REALPART_EXPR 3716 && TREE_CODE (scalar_dest) != MEM_REF) 3717 return false; 3718 3719 gcc_assert (gimple_assign_single_p (stmt)); 3720 op = gimple_assign_rhs1 (stmt); 3721 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt, 3722 &def, &dt)) 3723 { 3724 if (vect_print_dump_info (REPORT_DETAILS)) 3725 fprintf (vect_dump, "use not simple."); 3726 return false; 3727 } 3728 3729 elem_type = TREE_TYPE (vectype); 3730 vec_mode = TYPE_MODE (vectype); 3731 3732 /* FORNOW. In some cases can vectorize even if data-type not supported 3733 (e.g. - array initialization with 0). */ 3734 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 3735 return false; 3736 3737 if (!STMT_VINFO_DATA_REF (stmt_info)) 3738 return false; 3739 3740 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) 3741 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), 3742 size_zero_node) < 0) 3743 { 3744 if (vect_print_dump_info (REPORT_DETAILS)) 3745 fprintf (vect_dump, "negative step for store."); 3746 return false; 3747 } 3748 3749 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 3750 { 3751 strided_store = true; 3752 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 3753 if (!slp && !PURE_SLP_STMT (stmt_info)) 3754 { 3755 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3756 if (vect_store_lanes_supported (vectype, group_size)) 3757 store_lanes_p = true; 3758 else if (!vect_strided_store_supported (vectype, group_size)) 3759 return false; 3760 } 3761 3762 if (first_stmt == stmt) 3763 { 3764 /* STMT is the leader of the group. Check the operands of all the 3765 stmts of the group. */ 3766 next_stmt = GROUP_NEXT_ELEMENT (stmt_info); 3767 while (next_stmt) 3768 { 3769 gcc_assert (gimple_assign_single_p (next_stmt)); 3770 op = gimple_assign_rhs1 (next_stmt); 3771 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo, 3772 &def_stmt, &def, &dt)) 3773 { 3774 if (vect_print_dump_info (REPORT_DETAILS)) 3775 fprintf (vect_dump, "use not simple."); 3776 return false; 3777 } 3778 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 3779 } 3780 } 3781 } 3782 3783 if (!vec_stmt) /* transformation not required. */ 3784 { 3785 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 3786 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL); 3787 return true; 3788 } 3789 3790 /** Transform. **/ 3791 3792 if (strided_store) 3793 { 3794 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3795 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3796 3797 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++; 3798 3799 /* FORNOW */ 3800 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt)); 3801 3802 /* We vectorize all the stmts of the interleaving group when we 3803 reach the last stmt in the group. */ 3804 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) 3805 < GROUP_SIZE (vinfo_for_stmt (first_stmt)) 3806 && !slp) 3807 { 3808 *vec_stmt = NULL; 3809 return true; 3810 } 3811 3812 if (slp) 3813 { 3814 strided_store = false; 3815 /* VEC_NUM is the number of vect stmts to be created for this 3816 group. */ 3817 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 3818 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 3819 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3820 op = gimple_assign_rhs1 (first_stmt); 3821 } 3822 else 3823 /* VEC_NUM is the number of vect stmts to be created for this 3824 group. */ 3825 vec_num = group_size; 3826 } 3827 else 3828 { 3829 first_stmt = stmt; 3830 first_dr = dr; 3831 group_size = vec_num = 1; 3832 } 3833 3834 if (vect_print_dump_info (REPORT_DETAILS)) 3835 fprintf (vect_dump, "transform store. ncopies = %d",ncopies); 3836 3837 dr_chain = VEC_alloc (tree, heap, group_size); 3838 oprnds = VEC_alloc (tree, heap, group_size); 3839 3840 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 3841 gcc_assert (alignment_support_scheme); 3842 /* Targets with store-lane instructions must not require explicit 3843 realignment. */ 3844 gcc_assert (!store_lanes_p 3845 || alignment_support_scheme == dr_aligned 3846 || alignment_support_scheme == dr_unaligned_supported); 3847 3848 if (store_lanes_p) 3849 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 3850 else 3851 aggr_type = vectype; 3852 3853 /* In case the vectorization factor (VF) is bigger than the number 3854 of elements that we can fit in a vectype (nunits), we have to generate 3855 more than one vector stmt - i.e - we need to "unroll" the 3856 vector stmt by a factor VF/nunits. For more details see documentation in 3857 vect_get_vec_def_for_copy_stmt. */ 3858 3859 /* In case of interleaving (non-unit strided access): 3860 3861 S1: &base + 2 = x2 3862 S2: &base = x0 3863 S3: &base + 1 = x1 3864 S4: &base + 3 = x3 3865 3866 We create vectorized stores starting from base address (the access of the 3867 first stmt in the chain (S2 in the above example), when the last store stmt 3868 of the chain (S4) is reached: 3869 3870 VS1: &base = vx2 3871 VS2: &base + vec_size*1 = vx0 3872 VS3: &base + vec_size*2 = vx1 3873 VS4: &base + vec_size*3 = vx3 3874 3875 Then permutation statements are generated: 3876 3877 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 3878 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 3879 ... 3880 3881 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 3882 (the order of the data-refs in the output of vect_permute_store_chain 3883 corresponds to the order of scalar stmts in the interleaving chain - see 3884 the documentation of vect_permute_store_chain()). 3885 3886 In case of both multiple types and interleaving, above vector stores and 3887 permutation stmts are created for every copy. The result vector stmts are 3888 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 3889 STMT_VINFO_RELATED_STMT for the next copies. 3890 */ 3891 3892 prev_stmt_info = NULL; 3893 for (j = 0; j < ncopies; j++) 3894 { 3895 gimple new_stmt; 3896 gimple ptr_incr; 3897 3898 if (j == 0) 3899 { 3900 if (slp) 3901 { 3902 /* Get vectorized arguments for SLP_NODE. */ 3903 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, 3904 NULL, slp_node, -1); 3905 3906 vec_oprnd = VEC_index (tree, vec_oprnds, 0); 3907 } 3908 else 3909 { 3910 /* For interleaved stores we collect vectorized defs for all the 3911 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 3912 used as an input to vect_permute_store_chain(), and OPRNDS as 3913 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 3914 3915 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and 3916 OPRNDS are of size 1. */ 3917 next_stmt = first_stmt; 3918 for (i = 0; i < group_size; i++) 3919 { 3920 /* Since gaps are not supported for interleaved stores, 3921 GROUP_SIZE is the exact number of stmts in the chain. 3922 Therefore, NEXT_STMT can't be NULL_TREE. In case that 3923 there is no interleaving, GROUP_SIZE is 1, and only one 3924 iteration of the loop will be executed. */ 3925 gcc_assert (next_stmt 3926 && gimple_assign_single_p (next_stmt)); 3927 op = gimple_assign_rhs1 (next_stmt); 3928 3929 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, 3930 NULL); 3931 VEC_quick_push(tree, dr_chain, vec_oprnd); 3932 VEC_quick_push(tree, oprnds, vec_oprnd); 3933 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 3934 } 3935 } 3936 3937 /* We should have catched mismatched types earlier. */ 3938 gcc_assert (useless_type_conversion_p (vectype, 3939 TREE_TYPE (vec_oprnd))); 3940 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL, 3941 NULL_TREE, &dummy, gsi, 3942 &ptr_incr, false, &inv_p); 3943 gcc_assert (bb_vinfo || !inv_p); 3944 } 3945 else 3946 { 3947 /* For interleaved stores we created vectorized defs for all the 3948 defs stored in OPRNDS in the previous iteration (previous copy). 3949 DR_CHAIN is then used as an input to vect_permute_store_chain(), 3950 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 3951 next copy. 3952 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and 3953 OPRNDS are of size 1. */ 3954 for (i = 0; i < group_size; i++) 3955 { 3956 op = VEC_index (tree, oprnds, i); 3957 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt, 3958 &def, &dt); 3959 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op); 3960 VEC_replace(tree, dr_chain, i, vec_oprnd); 3961 VEC_replace(tree, oprnds, i, vec_oprnd); 3962 } 3963 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 3964 TYPE_SIZE_UNIT (aggr_type)); 3965 } 3966 3967 if (store_lanes_p) 3968 { 3969 tree vec_array; 3970 3971 /* Combine all the vectors into an array. */ 3972 vec_array = create_vector_array (vectype, vec_num); 3973 for (i = 0; i < vec_num; i++) 3974 { 3975 vec_oprnd = VEC_index (tree, dr_chain, i); 3976 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); 3977 } 3978 3979 /* Emit: 3980 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 3981 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 3982 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); 3983 gimple_call_set_lhs (new_stmt, data_ref); 3984 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3985 mark_symbols_for_renaming (new_stmt); 3986 } 3987 else 3988 { 3989 new_stmt = NULL; 3990 if (strided_store) 3991 { 3992 result_chain = VEC_alloc (tree, heap, group_size); 3993 /* Permute. */ 3994 vect_permute_store_chain (dr_chain, group_size, stmt, gsi, 3995 &result_chain); 3996 } 3997 3998 next_stmt = first_stmt; 3999 for (i = 0; i < vec_num; i++) 4000 { 4001 struct ptr_info_def *pi; 4002 4003 if (i > 0) 4004 /* Bump the vector pointer. */ 4005 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 4006 stmt, NULL_TREE); 4007 4008 if (slp) 4009 vec_oprnd = VEC_index (tree, vec_oprnds, i); 4010 else if (strided_store) 4011 /* For strided stores vectorized defs are interleaved in 4012 vect_permute_store_chain(). */ 4013 vec_oprnd = VEC_index (tree, result_chain, i); 4014 4015 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, 4016 build_int_cst (reference_alias_ptr_type 4017 (DR_REF (first_dr)), 0)); 4018 pi = get_ptr_info (dataref_ptr); 4019 pi->align = TYPE_ALIGN_UNIT (vectype); 4020 if (aligned_access_p (first_dr)) 4021 pi->misalign = 0; 4022 else if (DR_MISALIGNMENT (first_dr) == -1) 4023 { 4024 TREE_TYPE (data_ref) 4025 = build_aligned_type (TREE_TYPE (data_ref), 4026 TYPE_ALIGN (elem_type)); 4027 pi->align = TYPE_ALIGN_UNIT (elem_type); 4028 pi->misalign = 0; 4029 } 4030 else 4031 { 4032 TREE_TYPE (data_ref) 4033 = build_aligned_type (TREE_TYPE (data_ref), 4034 TYPE_ALIGN (elem_type)); 4035 pi->misalign = DR_MISALIGNMENT (first_dr); 4036 } 4037 4038 /* Arguments are ready. Create the new vector stmt. */ 4039 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 4040 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4041 mark_symbols_for_renaming (new_stmt); 4042 4043 if (slp) 4044 continue; 4045 4046 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 4047 if (!next_stmt) 4048 break; 4049 } 4050 } 4051 if (!slp) 4052 { 4053 if (j == 0) 4054 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4055 else 4056 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4057 prev_stmt_info = vinfo_for_stmt (new_stmt); 4058 } 4059 } 4060 4061 VEC_free (tree, heap, dr_chain); 4062 VEC_free (tree, heap, oprnds); 4063 if (result_chain) 4064 VEC_free (tree, heap, result_chain); 4065 if (vec_oprnds) 4066 VEC_free (tree, heap, vec_oprnds); 4067 4068 return true; 4069 } 4070 4071 /* Given a vector type VECTYPE and permutation SEL returns 4072 the VECTOR_CST mask that implements the permutation of the 4073 vector elements. If that is impossible to do, returns NULL. */ 4074 4075 tree 4076 vect_gen_perm_mask (tree vectype, unsigned char *sel) 4077 { 4078 tree mask_elt_type, mask_type, mask_vec; 4079 int i, nunits; 4080 4081 nunits = TYPE_VECTOR_SUBPARTS (vectype); 4082 4083 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) 4084 return NULL; 4085 4086 mask_elt_type 4087 = lang_hooks.types.type_for_size 4088 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1); 4089 mask_type = get_vectype_for_scalar_type (mask_elt_type); 4090 4091 mask_vec = NULL; 4092 for (i = nunits - 1; i >= 0; i--) 4093 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]), 4094 mask_vec); 4095 mask_vec = build_vector (mask_type, mask_vec); 4096 4097 return mask_vec; 4098 } 4099 4100 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements 4101 reversal of the vector elements. If that is impossible to do, 4102 returns NULL. */ 4103 4104 static tree 4105 perm_mask_for_reverse (tree vectype) 4106 { 4107 int i, nunits; 4108 unsigned char *sel; 4109 4110 nunits = TYPE_VECTOR_SUBPARTS (vectype); 4111 sel = XALLOCAVEC (unsigned char, nunits); 4112 4113 for (i = 0; i < nunits; ++i) 4114 sel[i] = nunits - 1 - i; 4115 4116 return vect_gen_perm_mask (vectype, sel); 4117 } 4118 4119 /* Given a vector variable X and Y, that was generated for the scalar 4120 STMT, generate instructions to permute the vector elements of X and Y 4121 using permutation mask MASK_VEC, insert them at *GSI and return the 4122 permuted vector variable. */ 4123 4124 static tree 4125 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt, 4126 gimple_stmt_iterator *gsi) 4127 { 4128 tree vectype = TREE_TYPE (x); 4129 tree perm_dest, data_ref; 4130 gimple perm_stmt; 4131 4132 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); 4133 data_ref = make_ssa_name (perm_dest, NULL); 4134 4135 /* Generate the permute statement. */ 4136 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref, 4137 x, y, mask_vec); 4138 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 4139 4140 return data_ref; 4141 } 4142 4143 /* vectorizable_load. 4144 4145 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 4146 can be vectorized. 4147 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4148 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 4149 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4150 4151 static bool 4152 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 4153 slp_tree slp_node, slp_instance slp_node_instance) 4154 { 4155 tree scalar_dest; 4156 tree vec_dest = NULL; 4157 tree data_ref = NULL; 4158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4159 stmt_vec_info prev_stmt_info; 4160 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4161 struct loop *loop = NULL; 4162 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 4163 bool nested_in_vect_loop = false; 4164 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 4165 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4166 tree elem_type; 4167 tree new_temp; 4168 enum machine_mode mode; 4169 gimple new_stmt = NULL; 4170 tree dummy; 4171 enum dr_alignment_support alignment_support_scheme; 4172 tree dataref_ptr = NULL_TREE; 4173 gimple ptr_incr; 4174 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 4175 int ncopies; 4176 int i, j, group_size; 4177 tree msq = NULL_TREE, lsq; 4178 tree offset = NULL_TREE; 4179 tree realignment_token = NULL_TREE; 4180 gimple phi = NULL; 4181 VEC(tree,heap) *dr_chain = NULL; 4182 bool strided_load = false; 4183 bool load_lanes_p = false; 4184 gimple first_stmt; 4185 bool inv_p; 4186 bool negative; 4187 bool compute_in_loop = false; 4188 struct loop *at_loop; 4189 int vec_num; 4190 bool slp = (slp_node != NULL); 4191 bool slp_perm = false; 4192 enum tree_code code; 4193 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4194 int vf; 4195 tree aggr_type; 4196 tree gather_base = NULL_TREE, gather_off = NULL_TREE; 4197 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; 4198 int gather_scale = 1; 4199 enum vect_def_type gather_dt = vect_unknown_def_type; 4200 4201 if (loop_vinfo) 4202 { 4203 loop = LOOP_VINFO_LOOP (loop_vinfo); 4204 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 4205 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 4206 } 4207 else 4208 vf = 1; 4209 4210 /* Multiple types in SLP are handled by creating the appropriate number of 4211 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4212 case of SLP. */ 4213 if (slp || PURE_SLP_STMT (stmt_info)) 4214 ncopies = 1; 4215 else 4216 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 4217 4218 gcc_assert (ncopies >= 1); 4219 4220 /* FORNOW. This restriction should be relaxed. */ 4221 if (nested_in_vect_loop && ncopies > 1) 4222 { 4223 if (vect_print_dump_info (REPORT_DETAILS)) 4224 fprintf (vect_dump, "multiple types in nested loop."); 4225 return false; 4226 } 4227 4228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4229 return false; 4230 4231 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 4232 return false; 4233 4234 /* Is vectorizable load? */ 4235 if (!is_gimple_assign (stmt)) 4236 return false; 4237 4238 scalar_dest = gimple_assign_lhs (stmt); 4239 if (TREE_CODE (scalar_dest) != SSA_NAME) 4240 return false; 4241 4242 code = gimple_assign_rhs_code (stmt); 4243 if (code != ARRAY_REF 4244 && code != INDIRECT_REF 4245 && code != COMPONENT_REF 4246 && code != IMAGPART_EXPR 4247 && code != REALPART_EXPR 4248 && code != MEM_REF 4249 && TREE_CODE_CLASS (code) != tcc_declaration) 4250 return false; 4251 4252 if (!STMT_VINFO_DATA_REF (stmt_info)) 4253 return false; 4254 4255 negative = tree_int_cst_compare (nested_in_vect_loop 4256 ? STMT_VINFO_DR_STEP (stmt_info) 4257 : DR_STEP (dr), 4258 size_zero_node) < 0; 4259 if (negative && ncopies > 1) 4260 { 4261 if (vect_print_dump_info (REPORT_DETAILS)) 4262 fprintf (vect_dump, "multiple types with negative step."); 4263 return false; 4264 } 4265 4266 elem_type = TREE_TYPE (vectype); 4267 mode = TYPE_MODE (vectype); 4268 4269 /* FORNOW. In some cases can vectorize even if data-type not supported 4270 (e.g. - data copies). */ 4271 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 4272 { 4273 if (vect_print_dump_info (REPORT_DETAILS)) 4274 fprintf (vect_dump, "Aligned load, but unsupported type."); 4275 return false; 4276 } 4277 4278 /* Check if the load is a part of an interleaving chain. */ 4279 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 4280 { 4281 strided_load = true; 4282 /* FORNOW */ 4283 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info)); 4284 4285 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 4286 if (!slp && !PURE_SLP_STMT (stmt_info)) 4287 { 4288 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 4289 if (vect_load_lanes_supported (vectype, group_size)) 4290 load_lanes_p = true; 4291 else if (!vect_strided_load_supported (vectype, group_size)) 4292 return false; 4293 } 4294 } 4295 4296 if (negative) 4297 { 4298 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info)); 4299 alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 4300 if (alignment_support_scheme != dr_aligned 4301 && alignment_support_scheme != dr_unaligned_supported) 4302 { 4303 if (vect_print_dump_info (REPORT_DETAILS)) 4304 fprintf (vect_dump, "negative step but alignment required."); 4305 return false; 4306 } 4307 if (!perm_mask_for_reverse (vectype)) 4308 { 4309 if (vect_print_dump_info (REPORT_DETAILS)) 4310 fprintf (vect_dump, "negative step and reversing not supported."); 4311 return false; 4312 } 4313 } 4314 4315 if (STMT_VINFO_GATHER_P (stmt_info)) 4316 { 4317 gimple def_stmt; 4318 tree def; 4319 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, 4320 &gather_off, &gather_scale); 4321 gcc_assert (gather_decl); 4322 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo, 4323 &def_stmt, &def, &gather_dt, 4324 &gather_off_vectype)) 4325 { 4326 if (vect_print_dump_info (REPORT_DETAILS)) 4327 fprintf (vect_dump, "gather index use not simple."); 4328 return false; 4329 } 4330 } 4331 4332 if (!vec_stmt) /* transformation not required. */ 4333 { 4334 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 4335 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL); 4336 return true; 4337 } 4338 4339 if (vect_print_dump_info (REPORT_DETAILS)) 4340 fprintf (vect_dump, "transform load. ncopies = %d", ncopies); 4341 4342 /** Transform. **/ 4343 4344 if (STMT_VINFO_GATHER_P (stmt_info)) 4345 { 4346 tree vec_oprnd0 = NULL_TREE, op; 4347 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); 4348 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 4349 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE; 4350 edge pe = loop_preheader_edge (loop); 4351 gimple_seq seq; 4352 basic_block new_bb; 4353 enum { NARROW, NONE, WIDEN } modifier; 4354 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); 4355 4356 if (nunits == gather_off_nunits) 4357 modifier = NONE; 4358 else if (nunits == gather_off_nunits / 2) 4359 { 4360 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); 4361 modifier = WIDEN; 4362 4363 for (i = 0; i < gather_off_nunits; ++i) 4364 sel[i] = i | nunits; 4365 4366 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel); 4367 gcc_assert (perm_mask != NULL_TREE); 4368 } 4369 else if (nunits == gather_off_nunits * 2) 4370 { 4371 unsigned char *sel = XALLOCAVEC (unsigned char, nunits); 4372 modifier = NARROW; 4373 4374 for (i = 0; i < nunits; ++i) 4375 sel[i] = i < gather_off_nunits 4376 ? i : i + nunits - gather_off_nunits; 4377 4378 perm_mask = vect_gen_perm_mask (vectype, sel); 4379 gcc_assert (perm_mask != NULL_TREE); 4380 ncopies *= 2; 4381 } 4382 else 4383 gcc_unreachable (); 4384 4385 rettype = TREE_TYPE (TREE_TYPE (gather_decl)); 4386 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4387 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4388 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4389 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 4390 scaletype = TREE_VALUE (arglist); 4391 gcc_checking_assert (types_compatible_p (srctype, rettype) 4392 && types_compatible_p (srctype, masktype)); 4393 4394 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4395 4396 ptr = fold_convert (ptrtype, gather_base); 4397 if (!is_gimple_min_invariant (ptr)) 4398 { 4399 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 4400 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 4401 gcc_assert (!new_bb); 4402 } 4403 4404 /* Currently we support only unconditional gather loads, 4405 so mask should be all ones. */ 4406 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 4407 mask = build_int_cst (TREE_TYPE (masktype), -1); 4408 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 4409 { 4410 REAL_VALUE_TYPE r; 4411 long tmp[6]; 4412 for (j = 0; j < 6; ++j) 4413 tmp[j] = -1; 4414 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 4415 mask = build_real (TREE_TYPE (masktype), r); 4416 } 4417 else 4418 gcc_unreachable (); 4419 mask = build_vector_from_val (masktype, mask); 4420 mask = vect_init_vector (stmt, mask, masktype, NULL); 4421 4422 scale = build_int_cst (scaletype, gather_scale); 4423 4424 prev_stmt_info = NULL; 4425 for (j = 0; j < ncopies; ++j) 4426 { 4427 if (modifier == WIDEN && (j & 1)) 4428 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 4429 perm_mask, stmt, gsi); 4430 else if (j == 0) 4431 op = vec_oprnd0 4432 = vect_get_vec_def_for_operand (gather_off, stmt, NULL); 4433 else 4434 op = vec_oprnd0 4435 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); 4436 4437 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 4438 { 4439 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) 4440 == TYPE_VECTOR_SUBPARTS (idxtype)); 4441 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); 4442 add_referenced_var (var); 4443 var = make_ssa_name (var, NULL); 4444 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 4445 new_stmt 4446 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, 4447 op, NULL_TREE); 4448 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4449 op = var; 4450 } 4451 4452 new_stmt 4453 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale); 4454 4455 if (!useless_type_conversion_p (vectype, rettype)) 4456 { 4457 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) 4458 == TYPE_VECTOR_SUBPARTS (rettype)); 4459 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); 4460 add_referenced_var (var); 4461 op = make_ssa_name (var, new_stmt); 4462 gimple_call_set_lhs (new_stmt, op); 4463 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4464 var = make_ssa_name (vec_dest, NULL); 4465 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 4466 new_stmt 4467 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op, 4468 NULL_TREE); 4469 } 4470 else 4471 { 4472 var = make_ssa_name (vec_dest, new_stmt); 4473 gimple_call_set_lhs (new_stmt, var); 4474 } 4475 4476 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4477 4478 if (modifier == NARROW) 4479 { 4480 if ((j & 1) == 0) 4481 { 4482 prev_res = var; 4483 continue; 4484 } 4485 var = permute_vec_elements (prev_res, var, 4486 perm_mask, stmt, gsi); 4487 new_stmt = SSA_NAME_DEF_STMT (var); 4488 } 4489 4490 if (prev_stmt_info == NULL) 4491 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4492 else 4493 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4494 prev_stmt_info = vinfo_for_stmt (new_stmt); 4495 } 4496 return true; 4497 } 4498 4499 if (strided_load) 4500 { 4501 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 4502 if (slp 4503 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) 4504 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) 4505 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 4506 4507 /* Check if the chain of loads is already vectorized. */ 4508 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) 4509 { 4510 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 4511 return true; 4512 } 4513 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 4514 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 4515 4516 /* VEC_NUM is the number of vect stmts to be created for this group. */ 4517 if (slp) 4518 { 4519 strided_load = false; 4520 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 4521 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)) 4522 slp_perm = true; 4523 } 4524 else 4525 vec_num = group_size; 4526 } 4527 else 4528 { 4529 first_stmt = stmt; 4530 first_dr = dr; 4531 group_size = vec_num = 1; 4532 } 4533 4534 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 4535 gcc_assert (alignment_support_scheme); 4536 /* Targets with load-lane instructions must not require explicit 4537 realignment. */ 4538 gcc_assert (!load_lanes_p 4539 || alignment_support_scheme == dr_aligned 4540 || alignment_support_scheme == dr_unaligned_supported); 4541 4542 /* In case the vectorization factor (VF) is bigger than the number 4543 of elements that we can fit in a vectype (nunits), we have to generate 4544 more than one vector stmt - i.e - we need to "unroll" the 4545 vector stmt by a factor VF/nunits. In doing so, we record a pointer 4546 from one copy of the vector stmt to the next, in the field 4547 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 4548 stages to find the correct vector defs to be used when vectorizing 4549 stmts that use the defs of the current stmt. The example below 4550 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 4551 need to create 4 vectorized stmts): 4552 4553 before vectorization: 4554 RELATED_STMT VEC_STMT 4555 S1: x = memref - - 4556 S2: z = x + 1 - - 4557 4558 step 1: vectorize stmt S1: 4559 We first create the vector stmt VS1_0, and, as usual, record a 4560 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 4561 Next, we create the vector stmt VS1_1, and record a pointer to 4562 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 4563 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 4564 stmts and pointers: 4565 RELATED_STMT VEC_STMT 4566 VS1_0: vx0 = memref0 VS1_1 - 4567 VS1_1: vx1 = memref1 VS1_2 - 4568 VS1_2: vx2 = memref2 VS1_3 - 4569 VS1_3: vx3 = memref3 - - 4570 S1: x = load - VS1_0 4571 S2: z = x + 1 - - 4572 4573 See in documentation in vect_get_vec_def_for_stmt_copy for how the 4574 information we recorded in RELATED_STMT field is used to vectorize 4575 stmt S2. */ 4576 4577 /* In case of interleaving (non-unit strided access): 4578 4579 S1: x2 = &base + 2 4580 S2: x0 = &base 4581 S3: x1 = &base + 1 4582 S4: x3 = &base + 3 4583 4584 Vectorized loads are created in the order of memory accesses 4585 starting from the access of the first stmt of the chain: 4586 4587 VS1: vx0 = &base 4588 VS2: vx1 = &base + vec_size*1 4589 VS3: vx3 = &base + vec_size*2 4590 VS4: vx4 = &base + vec_size*3 4591 4592 Then permutation statements are generated: 4593 4594 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 4595 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 4596 ... 4597 4598 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 4599 (the order of the data-refs in the output of vect_permute_load_chain 4600 corresponds to the order of scalar stmts in the interleaving chain - see 4601 the documentation of vect_permute_load_chain()). 4602 The generation of permutation stmts and recording them in 4603 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load(). 4604 4605 In case of both multiple types and interleaving, the vector loads and 4606 permutation stmts above are created for every copy. The result vector 4607 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 4608 corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 4609 4610 /* If the data reference is aligned (dr_aligned) or potentially unaligned 4611 on a target that supports unaligned accesses (dr_unaligned_supported) 4612 we generate the following code: 4613 p = initial_addr; 4614 indx = 0; 4615 loop { 4616 p = p + indx * vectype_size; 4617 vec_dest = *(p); 4618 indx = indx + 1; 4619 } 4620 4621 Otherwise, the data reference is potentially unaligned on a target that 4622 does not support unaligned accesses (dr_explicit_realign_optimized) - 4623 then generate the following code, in which the data in each iteration is 4624 obtained by two vector loads, one from the previous iteration, and one 4625 from the current iteration: 4626 p1 = initial_addr; 4627 msq_init = *(floor(p1)) 4628 p2 = initial_addr + VS - 1; 4629 realignment_token = call target_builtin; 4630 indx = 0; 4631 loop { 4632 p2 = p2 + indx * vectype_size 4633 lsq = *(floor(p2)) 4634 vec_dest = realign_load (msq, lsq, realignment_token) 4635 indx = indx + 1; 4636 msq = lsq; 4637 } */ 4638 4639 /* If the misalignment remains the same throughout the execution of the 4640 loop, we can create the init_addr and permutation mask at the loop 4641 preheader. Otherwise, it needs to be created inside the loop. 4642 This can only occur when vectorizing memory accesses in the inner-loop 4643 nested within an outer-loop that is being vectorized. */ 4644 4645 if (nested_in_vect_loop 4646 && (TREE_INT_CST_LOW (DR_STEP (dr)) 4647 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) 4648 { 4649 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 4650 compute_in_loop = true; 4651 } 4652 4653 if ((alignment_support_scheme == dr_explicit_realign_optimized 4654 || alignment_support_scheme == dr_explicit_realign) 4655 && !compute_in_loop) 4656 { 4657 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token, 4658 alignment_support_scheme, NULL_TREE, 4659 &at_loop); 4660 if (alignment_support_scheme == dr_explicit_realign_optimized) 4661 { 4662 phi = SSA_NAME_DEF_STMT (msq); 4663 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 4664 } 4665 } 4666 else 4667 at_loop = loop; 4668 4669 if (negative) 4670 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 4671 4672 if (load_lanes_p) 4673 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 4674 else 4675 aggr_type = vectype; 4676 4677 prev_stmt_info = NULL; 4678 for (j = 0; j < ncopies; j++) 4679 { 4680 /* 1. Create the vector or array pointer update chain. */ 4681 if (j == 0) 4682 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, 4683 offset, &dummy, gsi, 4684 &ptr_incr, false, &inv_p); 4685 else 4686 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 4687 TYPE_SIZE_UNIT (aggr_type)); 4688 4689 if (strided_load || slp_perm) 4690 dr_chain = VEC_alloc (tree, heap, vec_num); 4691 4692 if (load_lanes_p) 4693 { 4694 tree vec_array; 4695 4696 vec_array = create_vector_array (vectype, vec_num); 4697 4698 /* Emit: 4699 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 4700 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); 4701 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 4702 gimple_call_set_lhs (new_stmt, vec_array); 4703 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4704 mark_symbols_for_renaming (new_stmt); 4705 4706 /* Extract each vector into an SSA_NAME. */ 4707 for (i = 0; i < vec_num; i++) 4708 { 4709 new_temp = read_vector_array (stmt, gsi, scalar_dest, 4710 vec_array, i); 4711 VEC_quick_push (tree, dr_chain, new_temp); 4712 } 4713 4714 /* Record the mapping between SSA_NAMEs and statements. */ 4715 vect_record_strided_load_vectors (stmt, dr_chain); 4716 } 4717 else 4718 { 4719 for (i = 0; i < vec_num; i++) 4720 { 4721 if (i > 0) 4722 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 4723 stmt, NULL_TREE); 4724 4725 /* 2. Create the vector-load in the loop. */ 4726 switch (alignment_support_scheme) 4727 { 4728 case dr_aligned: 4729 case dr_unaligned_supported: 4730 { 4731 struct ptr_info_def *pi; 4732 data_ref 4733 = build2 (MEM_REF, vectype, dataref_ptr, 4734 build_int_cst (reference_alias_ptr_type 4735 (DR_REF (first_dr)), 0)); 4736 pi = get_ptr_info (dataref_ptr); 4737 pi->align = TYPE_ALIGN_UNIT (vectype); 4738 if (alignment_support_scheme == dr_aligned) 4739 { 4740 gcc_assert (aligned_access_p (first_dr)); 4741 pi->misalign = 0; 4742 } 4743 else if (DR_MISALIGNMENT (first_dr) == -1) 4744 { 4745 TREE_TYPE (data_ref) 4746 = build_aligned_type (TREE_TYPE (data_ref), 4747 TYPE_ALIGN (elem_type)); 4748 pi->align = TYPE_ALIGN_UNIT (elem_type); 4749 pi->misalign = 0; 4750 } 4751 else 4752 { 4753 TREE_TYPE (data_ref) 4754 = build_aligned_type (TREE_TYPE (data_ref), 4755 TYPE_ALIGN (elem_type)); 4756 pi->misalign = DR_MISALIGNMENT (first_dr); 4757 } 4758 break; 4759 } 4760 case dr_explicit_realign: 4761 { 4762 tree ptr, bump; 4763 tree vs_minus_1; 4764 4765 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 4766 4767 if (compute_in_loop) 4768 msq = vect_setup_realignment (first_stmt, gsi, 4769 &realignment_token, 4770 dr_explicit_realign, 4771 dataref_ptr, NULL); 4772 4773 new_stmt = gimple_build_assign_with_ops 4774 (BIT_AND_EXPR, NULL_TREE, dataref_ptr, 4775 build_int_cst 4776 (TREE_TYPE (dataref_ptr), 4777 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 4778 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); 4779 gimple_assign_set_lhs (new_stmt, ptr); 4780 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4781 data_ref 4782 = build2 (MEM_REF, vectype, ptr, 4783 build_int_cst (reference_alias_ptr_type 4784 (DR_REF (first_dr)), 0)); 4785 vec_dest = vect_create_destination_var (scalar_dest, 4786 vectype); 4787 new_stmt = gimple_build_assign (vec_dest, data_ref); 4788 new_temp = make_ssa_name (vec_dest, new_stmt); 4789 gimple_assign_set_lhs (new_stmt, new_temp); 4790 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 4791 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 4792 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4793 msq = new_temp; 4794 4795 bump = size_binop (MULT_EXPR, vs_minus_1, 4796 TYPE_SIZE_UNIT (elem_type)); 4797 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 4798 new_stmt = gimple_build_assign_with_ops 4799 (BIT_AND_EXPR, NULL_TREE, ptr, 4800 build_int_cst 4801 (TREE_TYPE (ptr), 4802 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 4803 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); 4804 gimple_assign_set_lhs (new_stmt, ptr); 4805 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4806 data_ref 4807 = build2 (MEM_REF, vectype, ptr, 4808 build_int_cst (reference_alias_ptr_type 4809 (DR_REF (first_dr)), 0)); 4810 break; 4811 } 4812 case dr_explicit_realign_optimized: 4813 new_stmt = gimple_build_assign_with_ops 4814 (BIT_AND_EXPR, NULL_TREE, dataref_ptr, 4815 build_int_cst 4816 (TREE_TYPE (dataref_ptr), 4817 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); 4818 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), 4819 new_stmt); 4820 gimple_assign_set_lhs (new_stmt, new_temp); 4821 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4822 data_ref 4823 = build2 (MEM_REF, vectype, new_temp, 4824 build_int_cst (reference_alias_ptr_type 4825 (DR_REF (first_dr)), 0)); 4826 break; 4827 default: 4828 gcc_unreachable (); 4829 } 4830 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4831 new_stmt = gimple_build_assign (vec_dest, data_ref); 4832 new_temp = make_ssa_name (vec_dest, new_stmt); 4833 gimple_assign_set_lhs (new_stmt, new_temp); 4834 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4835 mark_symbols_for_renaming (new_stmt); 4836 4837 /* 3. Handle explicit realignment if necessary/supported. 4838 Create in loop: 4839 vec_dest = realign_load (msq, lsq, realignment_token) */ 4840 if (alignment_support_scheme == dr_explicit_realign_optimized 4841 || alignment_support_scheme == dr_explicit_realign) 4842 { 4843 lsq = gimple_assign_lhs (new_stmt); 4844 if (!realignment_token) 4845 realignment_token = dataref_ptr; 4846 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4847 new_stmt 4848 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR, 4849 vec_dest, msq, lsq, 4850 realignment_token); 4851 new_temp = make_ssa_name (vec_dest, new_stmt); 4852 gimple_assign_set_lhs (new_stmt, new_temp); 4853 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4854 4855 if (alignment_support_scheme == dr_explicit_realign_optimized) 4856 { 4857 gcc_assert (phi); 4858 if (i == vec_num - 1 && j == ncopies - 1) 4859 add_phi_arg (phi, lsq, 4860 loop_latch_edge (containing_loop), 4861 UNKNOWN_LOCATION); 4862 msq = lsq; 4863 } 4864 } 4865 4866 /* 4. Handle invariant-load. */ 4867 if (inv_p && !bb_vinfo) 4868 { 4869 tree tem, vec_inv; 4870 gimple_stmt_iterator gsi2 = *gsi; 4871 gcc_assert (!strided_load); 4872 gsi_next (&gsi2); 4873 tem = scalar_dest; 4874 if (!useless_type_conversion_p (TREE_TYPE (vectype), 4875 TREE_TYPE (tem))) 4876 { 4877 tem = fold_convert (TREE_TYPE (vectype), tem); 4878 tem = force_gimple_operand_gsi (&gsi2, tem, true, 4879 NULL_TREE, true, 4880 GSI_SAME_STMT); 4881 } 4882 vec_inv = build_vector_from_val (vectype, tem); 4883 new_temp = vect_init_vector (stmt, vec_inv, 4884 vectype, &gsi2); 4885 new_stmt = SSA_NAME_DEF_STMT (new_temp); 4886 } 4887 4888 if (negative) 4889 { 4890 tree perm_mask = perm_mask_for_reverse (vectype); 4891 new_temp = permute_vec_elements (new_temp, new_temp, 4892 perm_mask, stmt, gsi); 4893 new_stmt = SSA_NAME_DEF_STMT (new_temp); 4894 } 4895 4896 /* Collect vector loads and later create their permutation in 4897 vect_transform_strided_load (). */ 4898 if (strided_load || slp_perm) 4899 VEC_quick_push (tree, dr_chain, new_temp); 4900 4901 /* Store vector loads in the corresponding SLP_NODE. */ 4902 if (slp && !slp_perm) 4903 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), 4904 new_stmt); 4905 } 4906 } 4907 4908 if (slp && !slp_perm) 4909 continue; 4910 4911 if (slp_perm) 4912 { 4913 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf, 4914 slp_node_instance, false)) 4915 { 4916 VEC_free (tree, heap, dr_chain); 4917 return false; 4918 } 4919 } 4920 else 4921 { 4922 if (strided_load) 4923 { 4924 if (!load_lanes_p) 4925 vect_transform_strided_load (stmt, dr_chain, group_size, gsi); 4926 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 4927 } 4928 else 4929 { 4930 if (j == 0) 4931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4932 else 4933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4934 prev_stmt_info = vinfo_for_stmt (new_stmt); 4935 } 4936 } 4937 if (dr_chain) 4938 VEC_free (tree, heap, dr_chain); 4939 } 4940 4941 return true; 4942 } 4943 4944 /* Function vect_is_simple_cond. 4945 4946 Input: 4947 LOOP - the loop that is being vectorized. 4948 COND - Condition that is checked for simple use. 4949 4950 Output: 4951 *COMP_VECTYPE - the vector type for the comparison. 4952 4953 Returns whether a COND can be vectorized. Checks whether 4954 condition operands are supportable using vec_is_simple_use. */ 4955 4956 static bool 4957 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo, 4958 bb_vec_info bb_vinfo, tree *comp_vectype) 4959 { 4960 tree lhs, rhs; 4961 tree def; 4962 enum vect_def_type dt; 4963 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 4964 4965 if (!COMPARISON_CLASS_P (cond)) 4966 return false; 4967 4968 lhs = TREE_OPERAND (cond, 0); 4969 rhs = TREE_OPERAND (cond, 1); 4970 4971 if (TREE_CODE (lhs) == SSA_NAME) 4972 { 4973 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 4974 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo, 4975 &lhs_def_stmt, &def, &dt, &vectype1)) 4976 return false; 4977 } 4978 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST 4979 && TREE_CODE (lhs) != FIXED_CST) 4980 return false; 4981 4982 if (TREE_CODE (rhs) == SSA_NAME) 4983 { 4984 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 4985 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo, 4986 &rhs_def_stmt, &def, &dt, &vectype2)) 4987 return false; 4988 } 4989 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST 4990 && TREE_CODE (rhs) != FIXED_CST) 4991 return false; 4992 4993 *comp_vectype = vectype1 ? vectype1 : vectype2; 4994 return true; 4995 } 4996 4997 /* vectorizable_condition. 4998 4999 Check if STMT is conditional modify expression that can be vectorized. 5000 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5001 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 5002 at GSI. 5003 5004 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable 5005 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in 5006 else caluse if it is 2). 5007 5008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5009 5010 bool 5011 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, 5012 gimple *vec_stmt, tree reduc_def, int reduc_index, 5013 slp_tree slp_node) 5014 { 5015 tree scalar_dest = NULL_TREE; 5016 tree vec_dest = NULL_TREE; 5017 tree cond_expr, then_clause, else_clause; 5018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5019 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5020 tree comp_vectype = NULL_TREE; 5021 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 5022 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 5023 tree vec_compare, vec_cond_expr; 5024 tree new_temp; 5025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5026 tree def; 5027 enum vect_def_type dt, dts[4]; 5028 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 5029 int ncopies; 5030 enum tree_code code; 5031 stmt_vec_info prev_stmt_info = NULL; 5032 int i, j; 5033 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5034 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 5035 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL; 5036 5037 if (slp_node || PURE_SLP_STMT (stmt_info)) 5038 ncopies = 1; 5039 else 5040 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 5041 5042 gcc_assert (ncopies >= 1); 5043 if (reduc_index && ncopies > 1) 5044 return false; /* FORNOW */ 5045 5046 if (reduc_index && STMT_SLP_TYPE (stmt_info)) 5047 return false; 5048 5049 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5050 return false; 5051 5052 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5053 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 5054 && reduc_def)) 5055 return false; 5056 5057 /* FORNOW: not yet supported. */ 5058 if (STMT_VINFO_LIVE_P (stmt_info)) 5059 { 5060 if (vect_print_dump_info (REPORT_DETAILS)) 5061 fprintf (vect_dump, "value used after loop."); 5062 return false; 5063 } 5064 5065 /* Is vectorizable conditional operation? */ 5066 if (!is_gimple_assign (stmt)) 5067 return false; 5068 5069 code = gimple_assign_rhs_code (stmt); 5070 5071 if (code != COND_EXPR) 5072 return false; 5073 5074 cond_expr = gimple_assign_rhs1 (stmt); 5075 then_clause = gimple_assign_rhs2 (stmt); 5076 else_clause = gimple_assign_rhs3 (stmt); 5077 5078 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo, 5079 &comp_vectype) 5080 || !comp_vectype) 5081 return false; 5082 5083 if (TREE_CODE (then_clause) == SSA_NAME) 5084 { 5085 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 5086 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo, 5087 &then_def_stmt, &def, &dt)) 5088 return false; 5089 } 5090 else if (TREE_CODE (then_clause) != INTEGER_CST 5091 && TREE_CODE (then_clause) != REAL_CST 5092 && TREE_CODE (then_clause) != FIXED_CST) 5093 return false; 5094 5095 if (TREE_CODE (else_clause) == SSA_NAME) 5096 { 5097 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 5098 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo, 5099 &else_def_stmt, &def, &dt)) 5100 return false; 5101 } 5102 else if (TREE_CODE (else_clause) != INTEGER_CST 5103 && TREE_CODE (else_clause) != REAL_CST 5104 && TREE_CODE (else_clause) != FIXED_CST) 5105 return false; 5106 5107 if (!vec_stmt) 5108 { 5109 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 5110 return expand_vec_cond_expr_p (vectype, comp_vectype); 5111 } 5112 5113 /* Transform. */ 5114 5115 if (!slp_node) 5116 { 5117 vec_oprnds0 = VEC_alloc (tree, heap, 1); 5118 vec_oprnds1 = VEC_alloc (tree, heap, 1); 5119 vec_oprnds2 = VEC_alloc (tree, heap, 1); 5120 vec_oprnds3 = VEC_alloc (tree, heap, 1); 5121 } 5122 5123 /* Handle def. */ 5124 scalar_dest = gimple_assign_lhs (stmt); 5125 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5126 5127 /* Handle cond expr. */ 5128 for (j = 0; j < ncopies; j++) 5129 { 5130 gimple new_stmt = NULL; 5131 if (j == 0) 5132 { 5133 if (slp_node) 5134 { 5135 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4); 5136 VEC (slp_void_p, heap) *vec_defs; 5137 5138 vec_defs = VEC_alloc (slp_void_p, heap, 4); 5139 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0)); 5140 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1)); 5141 VEC_safe_push (tree, heap, ops, then_clause); 5142 VEC_safe_push (tree, heap, ops, else_clause); 5143 vect_get_slp_defs (ops, slp_node, &vec_defs, -1); 5144 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); 5145 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); 5146 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); 5147 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); 5148 5149 VEC_free (tree, heap, ops); 5150 VEC_free (slp_void_p, heap, vec_defs); 5151 } 5152 else 5153 { 5154 gimple gtemp; 5155 vec_cond_lhs = 5156 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), 5157 stmt, NULL); 5158 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt, 5159 loop_vinfo, NULL, >emp, &def, &dts[0]); 5160 5161 vec_cond_rhs = 5162 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), 5163 stmt, NULL); 5164 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt, 5165 loop_vinfo, NULL, >emp, &def, &dts[1]); 5166 if (reduc_index == 1) 5167 vec_then_clause = reduc_def; 5168 else 5169 { 5170 vec_then_clause = vect_get_vec_def_for_operand (then_clause, 5171 stmt, NULL); 5172 vect_is_simple_use (then_clause, stmt, loop_vinfo, 5173 NULL, >emp, &def, &dts[2]); 5174 } 5175 if (reduc_index == 2) 5176 vec_else_clause = reduc_def; 5177 else 5178 { 5179 vec_else_clause = vect_get_vec_def_for_operand (else_clause, 5180 stmt, NULL); 5181 vect_is_simple_use (else_clause, stmt, loop_vinfo, 5182 NULL, >emp, &def, &dts[3]); 5183 } 5184 } 5185 } 5186 else 5187 { 5188 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], 5189 VEC_pop (tree, vec_oprnds0)); 5190 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], 5191 VEC_pop (tree, vec_oprnds1)); 5192 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], 5193 VEC_pop (tree, vec_oprnds2)); 5194 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], 5195 VEC_pop (tree, vec_oprnds3)); 5196 } 5197 5198 if (!slp_node) 5199 { 5200 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs); 5201 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs); 5202 VEC_quick_push (tree, vec_oprnds2, vec_then_clause); 5203 VEC_quick_push (tree, vec_oprnds3, vec_else_clause); 5204 } 5205 5206 /* Arguments are ready. Create the new vector stmt. */ 5207 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs) 5208 { 5209 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i); 5210 vec_then_clause = VEC_index (tree, vec_oprnds2, i); 5211 vec_else_clause = VEC_index (tree, vec_oprnds3, i); 5212 5213 vec_compare = build2 (TREE_CODE (cond_expr), vectype, 5214 vec_cond_lhs, vec_cond_rhs); 5215 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 5216 vec_compare, vec_then_clause, vec_else_clause); 5217 5218 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); 5219 new_temp = make_ssa_name (vec_dest, new_stmt); 5220 gimple_assign_set_lhs (new_stmt, new_temp); 5221 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5222 if (slp_node) 5223 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 5224 } 5225 5226 if (slp_node) 5227 continue; 5228 5229 if (j == 0) 5230 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5231 else 5232 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5233 5234 prev_stmt_info = vinfo_for_stmt (new_stmt); 5235 } 5236 5237 VEC_free (tree, heap, vec_oprnds0); 5238 VEC_free (tree, heap, vec_oprnds1); 5239 VEC_free (tree, heap, vec_oprnds2); 5240 VEC_free (tree, heap, vec_oprnds3); 5241 5242 return true; 5243 } 5244 5245 5246 /* Make sure the statement is vectorizable. */ 5247 5248 bool 5249 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) 5250 { 5251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5253 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 5254 bool ok; 5255 tree scalar_type, vectype; 5256 gimple pattern_stmt; 5257 gimple_seq pattern_def_seq; 5258 5259 if (vect_print_dump_info (REPORT_DETAILS)) 5260 { 5261 fprintf (vect_dump, "==> examining statement: "); 5262 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 5263 } 5264 5265 if (gimple_has_volatile_ops (stmt)) 5266 { 5267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 5268 fprintf (vect_dump, "not vectorized: stmt has volatile operands"); 5269 5270 return false; 5271 } 5272 5273 /* Skip stmts that do not need to be vectorized. In loops this is expected 5274 to include: 5275 - the COND_EXPR which is the loop exit condition 5276 - any LABEL_EXPRs in the loop 5277 - computations that are used only for array indexing or loop control. 5278 In basic blocks we only analyze statements that are a part of some SLP 5279 instance, therefore, all the statements are relevant. 5280 5281 Pattern statement needs to be analyzed instead of the original statement 5282 if the original statement is not relevant. Otherwise, we analyze both 5283 statements. */ 5284 5285 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 5286 if (!STMT_VINFO_RELEVANT_P (stmt_info) 5287 && !STMT_VINFO_LIVE_P (stmt_info)) 5288 { 5289 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 5290 && pattern_stmt 5291 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 5292 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 5293 { 5294 /* Analyze PATTERN_STMT instead of the original stmt. */ 5295 stmt = pattern_stmt; 5296 stmt_info = vinfo_for_stmt (pattern_stmt); 5297 if (vect_print_dump_info (REPORT_DETAILS)) 5298 { 5299 fprintf (vect_dump, "==> examining pattern statement: "); 5300 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 5301 } 5302 } 5303 else 5304 { 5305 if (vect_print_dump_info (REPORT_DETAILS)) 5306 fprintf (vect_dump, "irrelevant."); 5307 5308 return true; 5309 } 5310 } 5311 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 5312 && pattern_stmt 5313 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 5314 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 5315 { 5316 /* Analyze PATTERN_STMT too. */ 5317 if (vect_print_dump_info (REPORT_DETAILS)) 5318 { 5319 fprintf (vect_dump, "==> examining pattern statement: "); 5320 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 5321 } 5322 5323 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) 5324 return false; 5325 } 5326 5327 if (is_pattern_stmt_p (stmt_info) 5328 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 5329 { 5330 gimple_stmt_iterator si; 5331 5332 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 5333 { 5334 gimple pattern_def_stmt = gsi_stmt (si); 5335 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) 5336 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))) 5337 { 5338 /* Analyze def stmt of STMT if it's a pattern stmt. */ 5339 if (vect_print_dump_info (REPORT_DETAILS)) 5340 { 5341 fprintf (vect_dump, "==> examining pattern def statement: "); 5342 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM); 5343 } 5344 5345 if (!vect_analyze_stmt (pattern_def_stmt, 5346 need_to_vectorize, node)) 5347 return false; 5348 } 5349 } 5350 } 5351 5352 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 5353 { 5354 case vect_internal_def: 5355 break; 5356 5357 case vect_reduction_def: 5358 case vect_nested_cycle: 5359 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer 5360 || relevance == vect_used_in_outer_by_reduction 5361 || relevance == vect_unused_in_scope)); 5362 break; 5363 5364 case vect_induction_def: 5365 case vect_constant_def: 5366 case vect_external_def: 5367 case vect_unknown_def_type: 5368 default: 5369 gcc_unreachable (); 5370 } 5371 5372 if (bb_vinfo) 5373 { 5374 gcc_assert (PURE_SLP_STMT (stmt_info)); 5375 5376 scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 5377 if (vect_print_dump_info (REPORT_DETAILS)) 5378 { 5379 fprintf (vect_dump, "get vectype for scalar type: "); 5380 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 5381 } 5382 5383 vectype = get_vectype_for_scalar_type (scalar_type); 5384 if (!vectype) 5385 { 5386 if (vect_print_dump_info (REPORT_DETAILS)) 5387 { 5388 fprintf (vect_dump, "not SLPed: unsupported data-type "); 5389 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 5390 } 5391 return false; 5392 } 5393 5394 if (vect_print_dump_info (REPORT_DETAILS)) 5395 { 5396 fprintf (vect_dump, "vectype: "); 5397 print_generic_expr (vect_dump, vectype, TDF_SLIM); 5398 } 5399 5400 STMT_VINFO_VECTYPE (stmt_info) = vectype; 5401 } 5402 5403 if (STMT_VINFO_RELEVANT_P (stmt_info)) 5404 { 5405 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); 5406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); 5407 *need_to_vectorize = true; 5408 } 5409 5410 ok = true; 5411 if (!bb_vinfo 5412 && (STMT_VINFO_RELEVANT_P (stmt_info) 5413 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 5414 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL) 5415 || vectorizable_shift (stmt, NULL, NULL, NULL) 5416 || vectorizable_operation (stmt, NULL, NULL, NULL) 5417 || vectorizable_assignment (stmt, NULL, NULL, NULL) 5418 || vectorizable_load (stmt, NULL, NULL, NULL, NULL) 5419 || vectorizable_call (stmt, NULL, NULL, NULL) 5420 || vectorizable_store (stmt, NULL, NULL, NULL) 5421 || vectorizable_reduction (stmt, NULL, NULL, NULL) 5422 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); 5423 else 5424 { 5425 if (bb_vinfo) 5426 ok = (vectorizable_conversion (stmt, NULL, NULL, node) 5427 || vectorizable_shift (stmt, NULL, NULL, node) 5428 || vectorizable_operation (stmt, NULL, NULL, node) 5429 || vectorizable_assignment (stmt, NULL, NULL, node) 5430 || vectorizable_load (stmt, NULL, NULL, node, NULL) 5431 || vectorizable_call (stmt, NULL, NULL, node) 5432 || vectorizable_store (stmt, NULL, NULL, node) 5433 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); 5434 } 5435 5436 if (!ok) 5437 { 5438 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 5439 { 5440 fprintf (vect_dump, "not vectorized: relevant stmt not "); 5441 fprintf (vect_dump, "supported: "); 5442 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 5443 } 5444 5445 return false; 5446 } 5447 5448 if (bb_vinfo) 5449 return true; 5450 5451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 5452 need extra handling, except for vectorizable reductions. */ 5453 if (STMT_VINFO_LIVE_P (stmt_info) 5454 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 5455 ok = vectorizable_live_operation (stmt, NULL, NULL); 5456 5457 if (!ok) 5458 { 5459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 5460 { 5461 fprintf (vect_dump, "not vectorized: live stmt not "); 5462 fprintf (vect_dump, "supported: "); 5463 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 5464 } 5465 5466 return false; 5467 } 5468 5469 return true; 5470 } 5471 5472 5473 /* Function vect_transform_stmt. 5474 5475 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 5476 5477 bool 5478 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, 5479 bool *strided_store, slp_tree slp_node, 5480 slp_instance slp_node_instance) 5481 { 5482 bool is_store = false; 5483 gimple vec_stmt = NULL; 5484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5485 bool done; 5486 5487 switch (STMT_VINFO_TYPE (stmt_info)) 5488 { 5489 case type_demotion_vec_info_type: 5490 case type_promotion_vec_info_type: 5491 case type_conversion_vec_info_type: 5492 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node); 5493 gcc_assert (done); 5494 break; 5495 5496 case induc_vec_info_type: 5497 gcc_assert (!slp_node); 5498 done = vectorizable_induction (stmt, gsi, &vec_stmt); 5499 gcc_assert (done); 5500 break; 5501 5502 case shift_vec_info_type: 5503 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node); 5504 gcc_assert (done); 5505 break; 5506 5507 case op_vec_info_type: 5508 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 5509 gcc_assert (done); 5510 break; 5511 5512 case assignment_vec_info_type: 5513 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node); 5514 gcc_assert (done); 5515 break; 5516 5517 case load_vec_info_type: 5518 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node, 5519 slp_node_instance); 5520 gcc_assert (done); 5521 break; 5522 5523 case store_vec_info_type: 5524 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 5525 gcc_assert (done); 5526 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node) 5527 { 5528 /* In case of interleaving, the whole chain is vectorized when the 5529 last store in the chain is reached. Store stmts before the last 5530 one are skipped, and there vec_stmt_info shouldn't be freed 5531 meanwhile. */ 5532 *strided_store = true; 5533 if (STMT_VINFO_VEC_STMT (stmt_info)) 5534 is_store = true; 5535 } 5536 else 5537 is_store = true; 5538 break; 5539 5540 case condition_vec_info_type: 5541 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); 5542 gcc_assert (done); 5543 break; 5544 5545 case call_vec_info_type: 5546 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); 5547 stmt = gsi_stmt (*gsi); 5548 break; 5549 5550 case reduc_vec_info_type: 5551 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); 5552 gcc_assert (done); 5553 break; 5554 5555 default: 5556 if (!STMT_VINFO_LIVE_P (stmt_info)) 5557 { 5558 if (vect_print_dump_info (REPORT_DETAILS)) 5559 fprintf (vect_dump, "stmt not supported."); 5560 gcc_unreachable (); 5561 } 5562 } 5563 5564 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 5565 is being vectorized, but outside the immediately enclosing loop. */ 5566 if (vec_stmt 5567 && STMT_VINFO_LOOP_VINFO (stmt_info) 5568 && nested_in_vect_loop_p (LOOP_VINFO_LOOP ( 5569 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt) 5570 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 5571 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 5572 || STMT_VINFO_RELEVANT (stmt_info) == 5573 vect_used_in_outer_by_reduction)) 5574 { 5575 struct loop *innerloop = LOOP_VINFO_LOOP ( 5576 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 5577 imm_use_iterator imm_iter; 5578 use_operand_p use_p; 5579 tree scalar_dest; 5580 gimple exit_phi; 5581 5582 if (vect_print_dump_info (REPORT_DETAILS)) 5583 fprintf (vect_dump, "Record the vdef for outer-loop vectorization."); 5584 5585 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 5586 (to be used when vectorizing outer-loop stmts that use the DEF of 5587 STMT). */ 5588 if (gimple_code (stmt) == GIMPLE_PHI) 5589 scalar_dest = PHI_RESULT (stmt); 5590 else 5591 scalar_dest = gimple_assign_lhs (stmt); 5592 5593 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 5594 { 5595 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 5596 { 5597 exit_phi = USE_STMT (use_p); 5598 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt; 5599 } 5600 } 5601 } 5602 5603 /* Handle stmts whose DEF is used outside the loop-nest that is 5604 being vectorized. */ 5605 if (STMT_VINFO_LIVE_P (stmt_info) 5606 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 5607 { 5608 done = vectorizable_live_operation (stmt, gsi, &vec_stmt); 5609 gcc_assert (done); 5610 } 5611 5612 if (vec_stmt) 5613 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 5614 5615 return is_store; 5616 } 5617 5618 5619 /* Remove a group of stores (for SLP or interleaving), free their 5620 stmt_vec_info. */ 5621 5622 void 5623 vect_remove_stores (gimple first_stmt) 5624 { 5625 gimple next = first_stmt; 5626 gimple tmp; 5627 gimple_stmt_iterator next_si; 5628 5629 while (next) 5630 { 5631 stmt_vec_info stmt_info = vinfo_for_stmt (next); 5632 5633 tmp = GROUP_NEXT_ELEMENT (stmt_info); 5634 if (is_pattern_stmt_p (stmt_info)) 5635 next = STMT_VINFO_RELATED_STMT (stmt_info); 5636 /* Free the attached stmt_vec_info and remove the stmt. */ 5637 next_si = gsi_for_stmt (next); 5638 gsi_remove (&next_si, true); 5639 free_stmt_vec_info (next); 5640 next = tmp; 5641 } 5642 } 5643 5644 5645 /* Function new_stmt_vec_info. 5646 5647 Create and initialize a new stmt_vec_info struct for STMT. */ 5648 5649 stmt_vec_info 5650 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, 5651 bb_vec_info bb_vinfo) 5652 { 5653 stmt_vec_info res; 5654 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 5655 5656 STMT_VINFO_TYPE (res) = undef_vec_info_type; 5657 STMT_VINFO_STMT (res) = stmt; 5658 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; 5659 STMT_VINFO_BB_VINFO (res) = bb_vinfo; 5660 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; 5661 STMT_VINFO_LIVE_P (res) = false; 5662 STMT_VINFO_VECTYPE (res) = NULL; 5663 STMT_VINFO_VEC_STMT (res) = NULL; 5664 STMT_VINFO_VECTORIZABLE (res) = true; 5665 STMT_VINFO_IN_PATTERN_P (res) = false; 5666 STMT_VINFO_RELATED_STMT (res) = NULL; 5667 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL; 5668 STMT_VINFO_DATA_REF (res) = NULL; 5669 5670 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; 5671 STMT_VINFO_DR_OFFSET (res) = NULL; 5672 STMT_VINFO_DR_INIT (res) = NULL; 5673 STMT_VINFO_DR_STEP (res) = NULL; 5674 STMT_VINFO_DR_ALIGNED_TO (res) = NULL; 5675 5676 if (gimple_code (stmt) == GIMPLE_PHI 5677 && is_loop_header_bb_p (gimple_bb (stmt))) 5678 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 5679 else 5680 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; 5681 5682 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL; 5683 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0; 5684 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0; 5685 STMT_SLP_TYPE (res) = loop_vect; 5686 GROUP_FIRST_ELEMENT (res) = NULL; 5687 GROUP_NEXT_ELEMENT (res) = NULL; 5688 GROUP_SIZE (res) = 0; 5689 GROUP_STORE_COUNT (res) = 0; 5690 GROUP_GAP (res) = 0; 5691 GROUP_SAME_DR_STMT (res) = NULL; 5692 GROUP_READ_WRITE_DEPENDENCE (res) = false; 5693 5694 return res; 5695 } 5696 5697 5698 /* Create a hash table for stmt_vec_info. */ 5699 5700 void 5701 init_stmt_vec_info_vec (void) 5702 { 5703 gcc_assert (!stmt_vec_info_vec); 5704 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50); 5705 } 5706 5707 5708 /* Free hash table for stmt_vec_info. */ 5709 5710 void 5711 free_stmt_vec_info_vec (void) 5712 { 5713 gcc_assert (stmt_vec_info_vec); 5714 VEC_free (vec_void_p, heap, stmt_vec_info_vec); 5715 } 5716 5717 5718 /* Free stmt vectorization related info. */ 5719 5720 void 5721 free_stmt_vec_info (gimple stmt) 5722 { 5723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5724 5725 if (!stmt_info) 5726 return; 5727 5728 /* Check if this statement has a related "pattern stmt" 5729 (introduced by the vectorizer during the pattern recognition 5730 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info 5731 too. */ 5732 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 5733 { 5734 stmt_vec_info patt_info 5735 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 5736 if (patt_info) 5737 { 5738 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info); 5739 if (seq) 5740 { 5741 gimple_stmt_iterator si; 5742 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si)) 5743 free_stmt_vec_info (gsi_stmt (si)); 5744 } 5745 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); 5746 } 5747 } 5748 5749 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); 5750 set_vinfo_for_stmt (stmt, NULL); 5751 free (stmt_info); 5752 } 5753 5754 5755 /* Function get_vectype_for_scalar_type_and_size. 5756 5757 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported 5758 by the target. */ 5759 5760 static tree 5761 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size) 5762 { 5763 enum machine_mode inner_mode = TYPE_MODE (scalar_type); 5764 enum machine_mode simd_mode; 5765 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 5766 int nunits; 5767 tree vectype; 5768 5769 if (nbytes == 0) 5770 return NULL_TREE; 5771 5772 if (GET_MODE_CLASS (inner_mode) != MODE_INT 5773 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) 5774 return NULL_TREE; 5775 5776 /* For vector types of elements whose mode precision doesn't 5777 match their types precision we use a element type of mode 5778 precision. The vectorization routines will have to make sure 5779 they support the proper result truncation/extension. 5780 We also make sure to build vector types with INTEGER_TYPE 5781 component type only. */ 5782 if (INTEGRAL_TYPE_P (scalar_type) 5783 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 5784 || TREE_CODE (scalar_type) != INTEGER_TYPE)) 5785 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 5786 TYPE_UNSIGNED (scalar_type)); 5787 5788 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 5789 When the component mode passes the above test simply use a type 5790 corresponding to that mode. The theory is that any use that 5791 would cause problems with this will disable vectorization anyway. */ 5792 else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 5793 && !INTEGRAL_TYPE_P (scalar_type) 5794 && !POINTER_TYPE_P (scalar_type)) 5795 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 5796 5797 /* We can't build a vector type of elements with alignment bigger than 5798 their size. */ 5799 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 5800 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 5801 TYPE_UNSIGNED (scalar_type)); 5802 5803 /* If we felt back to using the mode fail if there was 5804 no scalar type for it. */ 5805 if (scalar_type == NULL_TREE) 5806 return NULL_TREE; 5807 5808 /* If no size was supplied use the mode the target prefers. Otherwise 5809 lookup a vector mode of the specified size. */ 5810 if (size == 0) 5811 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 5812 else 5813 simd_mode = mode_for_vector (inner_mode, size / nbytes); 5814 nunits = GET_MODE_SIZE (simd_mode) / nbytes; 5815 if (nunits <= 1) 5816 return NULL_TREE; 5817 5818 vectype = build_vector_type (scalar_type, nunits); 5819 if (vect_print_dump_info (REPORT_DETAILS)) 5820 { 5821 fprintf (vect_dump, "get vectype with %d units of type ", nunits); 5822 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 5823 } 5824 5825 if (!vectype) 5826 return NULL_TREE; 5827 5828 if (vect_print_dump_info (REPORT_DETAILS)) 5829 { 5830 fprintf (vect_dump, "vectype: "); 5831 print_generic_expr (vect_dump, vectype, TDF_SLIM); 5832 } 5833 5834 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 5835 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 5836 { 5837 if (vect_print_dump_info (REPORT_DETAILS)) 5838 fprintf (vect_dump, "mode not supported by target."); 5839 return NULL_TREE; 5840 } 5841 5842 return vectype; 5843 } 5844 5845 unsigned int current_vector_size; 5846 5847 /* Function get_vectype_for_scalar_type. 5848 5849 Returns the vector type corresponding to SCALAR_TYPE as supported 5850 by the target. */ 5851 5852 tree 5853 get_vectype_for_scalar_type (tree scalar_type) 5854 { 5855 tree vectype; 5856 vectype = get_vectype_for_scalar_type_and_size (scalar_type, 5857 current_vector_size); 5858 if (vectype 5859 && current_vector_size == 0) 5860 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); 5861 return vectype; 5862 } 5863 5864 /* Function get_same_sized_vectype 5865 5866 Returns a vector type corresponding to SCALAR_TYPE of size 5867 VECTOR_TYPE if supported by the target. */ 5868 5869 tree 5870 get_same_sized_vectype (tree scalar_type, tree vector_type) 5871 { 5872 return get_vectype_for_scalar_type_and_size 5873 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type))); 5874 } 5875 5876 /* Function vect_is_simple_use. 5877 5878 Input: 5879 LOOP_VINFO - the vect info of the loop that is being vectorized. 5880 BB_VINFO - the vect info of the basic block that is being vectorized. 5881 OPERAND - operand of STMT in the loop or bb. 5882 DEF - the defining stmt in case OPERAND is an SSA_NAME. 5883 5884 Returns whether a stmt with OPERAND can be vectorized. 5885 For loops, supportable operands are constants, loop invariants, and operands 5886 that are defined by the current iteration of the loop. Unsupportable 5887 operands are those that are defined by a previous iteration of the loop (as 5888 is the case in reduction/induction computations). 5889 For basic blocks, supportable operands are constants and bb invariants. 5890 For now, operands defined outside the basic block are not supported. */ 5891 5892 bool 5893 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, 5894 bb_vec_info bb_vinfo, gimple *def_stmt, 5895 tree *def, enum vect_def_type *dt) 5896 { 5897 basic_block bb; 5898 stmt_vec_info stmt_vinfo; 5899 struct loop *loop = NULL; 5900 5901 if (loop_vinfo) 5902 loop = LOOP_VINFO_LOOP (loop_vinfo); 5903 5904 *def_stmt = NULL; 5905 *def = NULL_TREE; 5906 5907 if (vect_print_dump_info (REPORT_DETAILS)) 5908 { 5909 fprintf (vect_dump, "vect_is_simple_use: operand "); 5910 print_generic_expr (vect_dump, operand, TDF_SLIM); 5911 } 5912 5913 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) 5914 { 5915 *dt = vect_constant_def; 5916 return true; 5917 } 5918 5919 if (is_gimple_min_invariant (operand)) 5920 { 5921 *def = operand; 5922 *dt = vect_external_def; 5923 return true; 5924 } 5925 5926 if (TREE_CODE (operand) == PAREN_EXPR) 5927 { 5928 if (vect_print_dump_info (REPORT_DETAILS)) 5929 fprintf (vect_dump, "non-associatable copy."); 5930 operand = TREE_OPERAND (operand, 0); 5931 } 5932 5933 if (TREE_CODE (operand) != SSA_NAME) 5934 { 5935 if (vect_print_dump_info (REPORT_DETAILS)) 5936 fprintf (vect_dump, "not ssa-name."); 5937 return false; 5938 } 5939 5940 *def_stmt = SSA_NAME_DEF_STMT (operand); 5941 if (*def_stmt == NULL) 5942 { 5943 if (vect_print_dump_info (REPORT_DETAILS)) 5944 fprintf (vect_dump, "no def_stmt."); 5945 return false; 5946 } 5947 5948 if (vect_print_dump_info (REPORT_DETAILS)) 5949 { 5950 fprintf (vect_dump, "def_stmt: "); 5951 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM); 5952 } 5953 5954 /* Empty stmt is expected only in case of a function argument. 5955 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ 5956 if (gimple_nop_p (*def_stmt)) 5957 { 5958 *def = operand; 5959 *dt = vect_external_def; 5960 return true; 5961 } 5962 5963 bb = gimple_bb (*def_stmt); 5964 5965 if ((loop && !flow_bb_inside_loop_p (loop, bb)) 5966 || (!loop && bb != BB_VINFO_BB (bb_vinfo)) 5967 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI)) 5968 *dt = vect_external_def; 5969 else 5970 { 5971 stmt_vinfo = vinfo_for_stmt (*def_stmt); 5972 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 5973 } 5974 5975 if (*dt == vect_unknown_def_type 5976 || (stmt 5977 && *dt == vect_double_reduction_def 5978 && gimple_code (stmt) != GIMPLE_PHI)) 5979 { 5980 if (vect_print_dump_info (REPORT_DETAILS)) 5981 fprintf (vect_dump, "Unsupported pattern."); 5982 return false; 5983 } 5984 5985 if (vect_print_dump_info (REPORT_DETAILS)) 5986 fprintf (vect_dump, "type of def: %d.",*dt); 5987 5988 switch (gimple_code (*def_stmt)) 5989 { 5990 case GIMPLE_PHI: 5991 *def = gimple_phi_result (*def_stmt); 5992 break; 5993 5994 case GIMPLE_ASSIGN: 5995 *def = gimple_assign_lhs (*def_stmt); 5996 break; 5997 5998 case GIMPLE_CALL: 5999 *def = gimple_call_lhs (*def_stmt); 6000 if (*def != NULL) 6001 break; 6002 /* FALLTHRU */ 6003 default: 6004 if (vect_print_dump_info (REPORT_DETAILS)) 6005 fprintf (vect_dump, "unsupported defining stmt: "); 6006 return false; 6007 } 6008 6009 return true; 6010 } 6011 6012 /* Function vect_is_simple_use_1. 6013 6014 Same as vect_is_simple_use_1 but also determines the vector operand 6015 type of OPERAND and stores it to *VECTYPE. If the definition of 6016 OPERAND is vect_uninitialized_def, vect_constant_def or 6017 vect_external_def *VECTYPE will be set to NULL_TREE and the caller 6018 is responsible to compute the best suited vector type for the 6019 scalar operand. */ 6020 6021 bool 6022 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo, 6023 bb_vec_info bb_vinfo, gimple *def_stmt, 6024 tree *def, enum vect_def_type *dt, tree *vectype) 6025 { 6026 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt, 6027 def, dt)) 6028 return false; 6029 6030 /* Now get a vector type if the def is internal, otherwise supply 6031 NULL_TREE and leave it up to the caller to figure out a proper 6032 type for the use stmt. */ 6033 if (*dt == vect_internal_def 6034 || *dt == vect_induction_def 6035 || *dt == vect_reduction_def 6036 || *dt == vect_double_reduction_def 6037 || *dt == vect_nested_cycle) 6038 { 6039 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); 6040 6041 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 6042 && !STMT_VINFO_RELEVANT (stmt_info) 6043 && !STMT_VINFO_LIVE_P (stmt_info)) 6044 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 6045 6046 *vectype = STMT_VINFO_VECTYPE (stmt_info); 6047 gcc_assert (*vectype != NULL_TREE); 6048 } 6049 else if (*dt == vect_uninitialized_def 6050 || *dt == vect_constant_def 6051 || *dt == vect_external_def) 6052 *vectype = NULL_TREE; 6053 else 6054 gcc_unreachable (); 6055 6056 return true; 6057 } 6058 6059 6060 /* Function supportable_widening_operation 6061 6062 Check whether an operation represented by the code CODE is a 6063 widening operation that is supported by the target platform in 6064 vector form (i.e., when operating on arguments of type VECTYPE_IN 6065 producing a result of type VECTYPE_OUT). 6066 6067 Widening operations we currently support are NOP (CONVERT), FLOAT 6068 and WIDEN_MULT. This function checks if these operations are supported 6069 by the target platform either directly (via vector tree-codes), or via 6070 target builtins. 6071 6072 Output: 6073 - CODE1 and CODE2 are codes of vector operations to be used when 6074 vectorizing the operation, if available. 6075 - DECL1 and DECL2 are decls of target builtin functions to be used 6076 when vectorizing the operation, if available. In this case, 6077 CODE1 and CODE2 are CALL_EXPR. 6078 - MULTI_STEP_CVT determines the number of required intermediate steps in 6079 case of multi-step conversion (like char->short->int - in that case 6080 MULTI_STEP_CVT will be 1). 6081 - INTERM_TYPES contains the intermediate type required to perform the 6082 widening operation (short in the above example). */ 6083 6084 bool 6085 supportable_widening_operation (enum tree_code code, gimple stmt, 6086 tree vectype_out, tree vectype_in, 6087 tree *decl1, tree *decl2, 6088 enum tree_code *code1, enum tree_code *code2, 6089 int *multi_step_cvt, 6090 VEC (tree, heap) **interm_types) 6091 { 6092 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 6093 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 6094 struct loop *vect_loop = NULL; 6095 bool ordered_p; 6096 enum machine_mode vec_mode; 6097 enum insn_code icode1, icode2; 6098 optab optab1, optab2; 6099 tree vectype = vectype_in; 6100 tree wide_vectype = vectype_out; 6101 enum tree_code c1, c2; 6102 int i; 6103 tree prev_type, intermediate_type; 6104 enum machine_mode intermediate_mode, prev_mode; 6105 optab optab3, optab4; 6106 6107 *multi_step_cvt = 0; 6108 if (loop_info) 6109 vect_loop = LOOP_VINFO_LOOP (loop_info); 6110 6111 /* The result of a vectorized widening operation usually requires two vectors 6112 (because the widened results do not fit into one vector). The generated 6113 vector results would normally be expected to be generated in the same 6114 order as in the original scalar computation, i.e. if 8 results are 6115 generated in each vector iteration, they are to be organized as follows: 6116 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. 6117 6118 However, in the special case that the result of the widening operation is 6119 used in a reduction computation only, the order doesn't matter (because 6120 when vectorizing a reduction we change the order of the computation). 6121 Some targets can take advantage of this and generate more efficient code. 6122 For example, targets like Altivec, that support widen_mult using a sequence 6123 of {mult_even,mult_odd} generate the following vectors: 6124 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. 6125 6126 When vectorizing outer-loops, we execute the inner-loop sequentially 6127 (each vectorized inner-loop iteration contributes to VF outer-loop 6128 iterations in parallel). We therefore don't allow to change the order 6129 of the computation in the inner-loop during outer-loop vectorization. */ 6130 6131 if (vect_loop 6132 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 6133 && !nested_in_vect_loop_p (vect_loop, stmt)) 6134 ordered_p = false; 6135 else 6136 ordered_p = true; 6137 6138 if (!ordered_p 6139 && code == WIDEN_MULT_EXPR 6140 && targetm.vectorize.builtin_mul_widen_even 6141 && targetm.vectorize.builtin_mul_widen_even (vectype) 6142 && targetm.vectorize.builtin_mul_widen_odd 6143 && targetm.vectorize.builtin_mul_widen_odd (vectype)) 6144 { 6145 if (vect_print_dump_info (REPORT_DETAILS)) 6146 fprintf (vect_dump, "Unordered widening operation detected."); 6147 6148 *code1 = *code2 = CALL_EXPR; 6149 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype); 6150 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype); 6151 return true; 6152 } 6153 6154 switch (code) 6155 { 6156 case WIDEN_MULT_EXPR: 6157 c1 = VEC_WIDEN_MULT_LO_EXPR; 6158 c2 = VEC_WIDEN_MULT_HI_EXPR; 6159 break; 6160 6161 case WIDEN_LSHIFT_EXPR: 6162 c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 6163 c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 6164 break; 6165 6166 CASE_CONVERT: 6167 c1 = VEC_UNPACK_LO_EXPR; 6168 c2 = VEC_UNPACK_HI_EXPR; 6169 break; 6170 6171 case FLOAT_EXPR: 6172 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 6173 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 6174 break; 6175 6176 case FIX_TRUNC_EXPR: 6177 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ 6178 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for 6179 computing the operation. */ 6180 return false; 6181 6182 default: 6183 gcc_unreachable (); 6184 } 6185 6186 if (BYTES_BIG_ENDIAN) 6187 { 6188 enum tree_code ctmp = c1; 6189 c1 = c2; 6190 c2 = ctmp; 6191 } 6192 6193 if (code == FIX_TRUNC_EXPR) 6194 { 6195 /* The signedness is determined from output operand. */ 6196 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 6197 optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 6198 } 6199 else 6200 { 6201 optab1 = optab_for_tree_code (c1, vectype, optab_default); 6202 optab2 = optab_for_tree_code (c2, vectype, optab_default); 6203 } 6204 6205 if (!optab1 || !optab2) 6206 return false; 6207 6208 vec_mode = TYPE_MODE (vectype); 6209 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 6210 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 6211 return false; 6212 6213 *code1 = c1; 6214 *code2 = c2; 6215 6216 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 6217 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 6218 return true; 6219 6220 /* Check if it's a multi-step conversion that can be done using intermediate 6221 types. */ 6222 6223 prev_type = vectype; 6224 prev_mode = vec_mode; 6225 6226 if (!CONVERT_EXPR_CODE_P (code)) 6227 return false; 6228 6229 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 6230 intermediate steps in promotion sequence. We try 6231 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 6232 not. */ 6233 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 6234 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 6235 { 6236 intermediate_mode = insn_data[icode1].operand[0].mode; 6237 intermediate_type 6238 = lang_hooks.types.type_for_mode (intermediate_mode, 6239 TYPE_UNSIGNED (prev_type)); 6240 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 6241 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 6242 6243 if (!optab3 || !optab4 6244 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 6245 || insn_data[icode1].operand[0].mode != intermediate_mode 6246 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 6247 || insn_data[icode2].operand[0].mode != intermediate_mode 6248 || ((icode1 = optab_handler (optab3, intermediate_mode)) 6249 == CODE_FOR_nothing) 6250 || ((icode2 = optab_handler (optab4, intermediate_mode)) 6251 == CODE_FOR_nothing)) 6252 break; 6253 6254 VEC_quick_push (tree, *interm_types, intermediate_type); 6255 (*multi_step_cvt)++; 6256 6257 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 6258 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 6259 return true; 6260 6261 prev_type = intermediate_type; 6262 prev_mode = intermediate_mode; 6263 } 6264 6265 VEC_free (tree, heap, *interm_types); 6266 return false; 6267 } 6268 6269 6270 /* Function supportable_narrowing_operation 6271 6272 Check whether an operation represented by the code CODE is a 6273 narrowing operation that is supported by the target platform in 6274 vector form (i.e., when operating on arguments of type VECTYPE_IN 6275 and producing a result of type VECTYPE_OUT). 6276 6277 Narrowing operations we currently support are NOP (CONVERT) and 6278 FIX_TRUNC. This function checks if these operations are supported by 6279 the target platform directly via vector tree-codes. 6280 6281 Output: 6282 - CODE1 is the code of a vector operation to be used when 6283 vectorizing the operation, if available. 6284 - MULTI_STEP_CVT determines the number of required intermediate steps in 6285 case of multi-step conversion (like int->short->char - in that case 6286 MULTI_STEP_CVT will be 1). 6287 - INTERM_TYPES contains the intermediate type required to perform the 6288 narrowing operation (short in the above example). */ 6289 6290 bool 6291 supportable_narrowing_operation (enum tree_code code, 6292 tree vectype_out, tree vectype_in, 6293 enum tree_code *code1, int *multi_step_cvt, 6294 VEC (tree, heap) **interm_types) 6295 { 6296 enum machine_mode vec_mode; 6297 enum insn_code icode1; 6298 optab optab1, interm_optab; 6299 tree vectype = vectype_in; 6300 tree narrow_vectype = vectype_out; 6301 enum tree_code c1; 6302 tree intermediate_type; 6303 enum machine_mode intermediate_mode, prev_mode; 6304 int i; 6305 bool uns; 6306 6307 *multi_step_cvt = 0; 6308 switch (code) 6309 { 6310 CASE_CONVERT: 6311 c1 = VEC_PACK_TRUNC_EXPR; 6312 break; 6313 6314 case FIX_TRUNC_EXPR: 6315 c1 = VEC_PACK_FIX_TRUNC_EXPR; 6316 break; 6317 6318 case FLOAT_EXPR: 6319 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR 6320 tree code and optabs used for computing the operation. */ 6321 return false; 6322 6323 default: 6324 gcc_unreachable (); 6325 } 6326 6327 if (code == FIX_TRUNC_EXPR) 6328 /* The signedness is determined from output operand. */ 6329 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 6330 else 6331 optab1 = optab_for_tree_code (c1, vectype, optab_default); 6332 6333 if (!optab1) 6334 return false; 6335 6336 vec_mode = TYPE_MODE (vectype); 6337 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 6338 return false; 6339 6340 *code1 = c1; 6341 6342 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 6343 return true; 6344 6345 /* Check if it's a multi-step conversion that can be done using intermediate 6346 types. */ 6347 prev_mode = vec_mode; 6348 if (code == FIX_TRUNC_EXPR) 6349 uns = TYPE_UNSIGNED (vectype_out); 6350 else 6351 uns = TYPE_UNSIGNED (vectype); 6352 6353 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 6354 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 6355 costly than signed. */ 6356 if (code == FIX_TRUNC_EXPR && uns) 6357 { 6358 enum insn_code icode2; 6359 6360 intermediate_type 6361 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 6362 interm_optab 6363 = optab_for_tree_code (c1, intermediate_type, optab_default); 6364 if (interm_optab != NULL 6365 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 6366 && insn_data[icode1].operand[0].mode 6367 == insn_data[icode2].operand[0].mode) 6368 { 6369 uns = false; 6370 optab1 = interm_optab; 6371 icode1 = icode2; 6372 } 6373 } 6374 6375 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 6376 intermediate steps in promotion sequence. We try 6377 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 6378 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 6379 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 6380 { 6381 intermediate_mode = insn_data[icode1].operand[0].mode; 6382 intermediate_type 6383 = lang_hooks.types.type_for_mode (intermediate_mode, uns); 6384 interm_optab 6385 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 6386 optab_default); 6387 if (!interm_optab 6388 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 6389 || insn_data[icode1].operand[0].mode != intermediate_mode 6390 || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 6391 == CODE_FOR_nothing)) 6392 break; 6393 6394 VEC_quick_push (tree, *interm_types, intermediate_type); 6395 (*multi_step_cvt)++; 6396 6397 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 6398 return true; 6399 6400 prev_mode = intermediate_mode; 6401 optab1 = interm_optab; 6402 } 6403 6404 VEC_free (tree, heap, *interm_types); 6405 return false; 6406 } 6407