1 /* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 3 Free Software Foundation, Inc. 4 Contributed by Dorit Nuzman <dorit@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "ggc.h" 27 #include "tree.h" 28 #include "target.h" 29 #include "basic-block.h" 30 #include "gimple-pretty-print.h" 31 #include "tree-flow.h" 32 #include "tree-dump.h" 33 #include "cfgloop.h" 34 #include "expr.h" 35 #include "optabs.h" 36 #include "params.h" 37 #include "tree-data-ref.h" 38 #include "tree-vectorizer.h" 39 #include "recog.h" 40 #include "diagnostic-core.h" 41 42 /* Pattern recognition functions */ 43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, 44 tree *); 45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, 46 tree *); 47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, 48 tree *); 49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); 50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, 51 tree *); 52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, 53 tree *, tree *); 54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **, 55 tree *, tree *); 56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **, 57 tree *, tree *); 58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, 59 tree *, tree *); 60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *); 61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { 62 vect_recog_widen_mult_pattern, 63 vect_recog_widen_sum_pattern, 64 vect_recog_dot_prod_pattern, 65 vect_recog_pow_pattern, 66 vect_recog_widen_shift_pattern, 67 vect_recog_over_widening_pattern, 68 vect_recog_vector_vector_shift_pattern, 69 vect_recog_sdivmod_pow2_pattern, 70 vect_recog_mixed_size_cond_pattern, 71 vect_recog_bool_pattern}; 72 73 static inline void 74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 75 { 76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 77 stmt); 78 } 79 80 static inline void 81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 82 { 83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL; 84 append_pattern_def_seq (stmt_info, stmt); 85 } 86 87 /* If the LHS of DEF_STMT has a single use, and that statement is 88 in the same loop, return it. */ 89 90 static gimple 91 vect_single_imm_use (gimple def_stmt) 92 { 93 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt); 94 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 96 tree lhs = gimple_assign_lhs (def_stmt); 97 use_operand_p use_p; 98 gimple use_stmt; 99 100 if (!single_imm_use (lhs, &use_p, &use_stmt)) 101 return NULL; 102 103 if (!gimple_bb (use_stmt)) 104 return NULL; 105 106 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 107 return NULL; 108 109 gcc_assert (vinfo_for_stmt (use_stmt)); 110 return use_stmt; 111 } 112 113 /* Function widened_name_p 114 115 Check whether NAME, an ssa-name used in USE_STMT, 116 is a result of a type-promotion, such that: 117 DEF_STMT: NAME = NOP (name0) 118 where the type of name0 (HALF_TYPE) is smaller than the type of NAME. 119 If CHECK_SIGN is TRUE, check that either both types are signed or both are 120 unsigned. */ 121 122 static bool 123 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, 124 bool check_sign) 125 { 126 tree dummy; 127 gimple dummy_gimple; 128 loop_vec_info loop_vinfo; 129 stmt_vec_info stmt_vinfo; 130 tree type = TREE_TYPE (name); 131 tree oprnd0; 132 enum vect_def_type dt; 133 tree def; 134 135 stmt_vinfo = vinfo_for_stmt (use_stmt); 136 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 137 138 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def, 139 &dt)) 140 return false; 141 142 if (dt != vect_internal_def 143 && dt != vect_external_def && dt != vect_constant_def) 144 return false; 145 146 if (! *def_stmt) 147 return false; 148 149 if (!is_gimple_assign (*def_stmt)) 150 return false; 151 152 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) 153 return false; 154 155 oprnd0 = gimple_assign_rhs1 (*def_stmt); 156 157 *half_type = TREE_TYPE (oprnd0); 158 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) 159 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) 160 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) 161 return false; 162 163 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo, 164 NULL, &dummy_gimple, &dummy, &dt)) 165 return false; 166 167 return true; 168 } 169 170 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 171 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 172 173 static tree 174 vect_recog_temp_ssa_var (tree type, gimple stmt) 175 { 176 tree var = create_tmp_var (type, "patt"); 177 178 add_referenced_var (var); 179 var = make_ssa_name (var, stmt); 180 return var; 181 } 182 183 /* Function vect_recog_dot_prod_pattern 184 185 Try to find the following pattern: 186 187 type x_t, y_t; 188 TYPE1 prod; 189 TYPE2 sum = init; 190 loop: 191 sum_0 = phi <init, sum_1> 192 S1 x_t = ... 193 S2 y_t = ... 194 S3 x_T = (TYPE1) x_t; 195 S4 y_T = (TYPE1) y_t; 196 S5 prod = x_T * y_T; 197 [S6 prod = (TYPE2) prod; #optional] 198 S7 sum_1 = prod + sum_0; 199 200 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the 201 same size of 'TYPE1' or bigger. This is a special case of a reduction 202 computation. 203 204 Input: 205 206 * STMTS: Contains a stmt from which the pattern search begins. In the 207 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} 208 will be detected. 209 210 Output: 211 212 * TYPE_IN: The type of the input arguments to the pattern. 213 214 * TYPE_OUT: The type of the output of this pattern. 215 216 * Return value: A new stmt that will be used to replace the sequence of 217 stmts that constitute the pattern. In this case it will be: 218 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 219 220 Note: The dot-prod idiom is a widening reduction pattern that is 221 vectorized without preserving all the intermediate results. It 222 produces only N/2 (widened) results (by summing up pairs of 223 intermediate results) rather than all N results. Therefore, we 224 cannot allow this pattern when we want to get all the results and in 225 the correct order (as is the case when this computation is in an 226 inner-loop nested in an outer-loop that us being vectorized). */ 227 228 static gimple 229 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, 230 tree *type_out) 231 { 232 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); 233 tree oprnd0, oprnd1; 234 tree oprnd00, oprnd01; 235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 236 tree type, half_type; 237 gimple pattern_stmt; 238 tree prod_type; 239 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 240 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 241 tree var; 242 243 if (!is_gimple_assign (last_stmt)) 244 return NULL; 245 246 type = gimple_expr_type (last_stmt); 247 248 /* Look for the following pattern 249 DX = (TYPE1) X; 250 DY = (TYPE1) Y; 251 DPROD = DX * DY; 252 DDPROD = (TYPE2) DPROD; 253 sum_1 = DDPROD + sum_0; 254 In which 255 - DX is double the size of X 256 - DY is double the size of Y 257 - DX, DY, DPROD all have the same type 258 - sum is the same size of DPROD or bigger 259 - sum has been recognized as a reduction variable. 260 261 This is equivalent to: 262 DPROD = X w* Y; #widen mult 263 sum_1 = DPROD w+ sum_0; #widen summation 264 or 265 DPROD = X w* Y; #widen mult 266 sum_1 = DPROD + sum_0; #summation 267 */ 268 269 /* Starting from LAST_STMT, follow the defs of its uses in search 270 of the above pattern. */ 271 272 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 273 return NULL; 274 275 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 276 { 277 /* Has been detected as widening-summation? */ 278 279 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 280 type = gimple_expr_type (stmt); 281 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 282 return NULL; 283 oprnd0 = gimple_assign_rhs1 (stmt); 284 oprnd1 = gimple_assign_rhs2 (stmt); 285 half_type = TREE_TYPE (oprnd0); 286 } 287 else 288 { 289 gimple def_stmt; 290 291 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 292 return NULL; 293 oprnd0 = gimple_assign_rhs1 (last_stmt); 294 oprnd1 = gimple_assign_rhs2 (last_stmt); 295 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 296 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 297 return NULL; 298 stmt = last_stmt; 299 300 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) 301 { 302 stmt = def_stmt; 303 oprnd0 = gimple_assign_rhs1 (stmt); 304 } 305 else 306 half_type = type; 307 } 308 309 /* So far so good. Since last_stmt was detected as a (summation) reduction, 310 we know that oprnd1 is the reduction variable (defined by a loop-header 311 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 312 Left to check that oprnd0 is defined by a (widen_)mult_expr */ 313 if (TREE_CODE (oprnd0) != SSA_NAME) 314 return NULL; 315 316 prod_type = half_type; 317 stmt = SSA_NAME_DEF_STMT (oprnd0); 318 319 /* It could not be the dot_prod pattern if the stmt is outside the loop. */ 320 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 321 return NULL; 322 323 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 324 inside the loop (in case we are analyzing an outer-loop). */ 325 if (!is_gimple_assign (stmt)) 326 return NULL; 327 stmt_vinfo = vinfo_for_stmt (stmt); 328 gcc_assert (stmt_vinfo); 329 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 330 return NULL; 331 if (gimple_assign_rhs_code (stmt) != MULT_EXPR) 332 return NULL; 333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 334 { 335 /* Has been detected as a widening multiplication? */ 336 337 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 338 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) 339 return NULL; 340 stmt_vinfo = vinfo_for_stmt (stmt); 341 gcc_assert (stmt_vinfo); 342 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); 343 oprnd00 = gimple_assign_rhs1 (stmt); 344 oprnd01 = gimple_assign_rhs2 (stmt); 345 } 346 else 347 { 348 tree half_type0, half_type1; 349 gimple def_stmt; 350 tree oprnd0, oprnd1; 351 352 oprnd0 = gimple_assign_rhs1 (stmt); 353 oprnd1 = gimple_assign_rhs2 (stmt); 354 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) 355 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) 356 return NULL; 357 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) 358 return NULL; 359 oprnd00 = gimple_assign_rhs1 (def_stmt); 360 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) 361 return NULL; 362 oprnd01 = gimple_assign_rhs1 (def_stmt); 363 if (!types_compatible_p (half_type0, half_type1)) 364 return NULL; 365 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) 366 return NULL; 367 } 368 369 half_type = TREE_TYPE (oprnd00); 370 *type_in = half_type; 371 *type_out = type; 372 373 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 374 var = vect_recog_temp_ssa_var (type, NULL); 375 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var, 376 oprnd00, oprnd01, oprnd1); 377 378 if (vect_print_dump_info (REPORT_DETAILS)) 379 { 380 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); 381 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 382 } 383 384 /* We don't allow changing the order of the computation in the inner-loop 385 when doing outer-loop vectorization. */ 386 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 387 388 return pattern_stmt; 389 } 390 391 392 /* Handle widening operation by a constant. At the moment we support MULT_EXPR 393 and LSHIFT_EXPR. 394 395 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR 396 we check that CONST_OPRND is less or equal to the size of HALF_TYPE. 397 398 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than 399 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) 400 that satisfies the above restrictions, we can perform a widening opeartion 401 from the intermediate type to TYPE and replace a_T = (TYPE) a_t; 402 with a_it = (interm_type) a_t; */ 403 404 static bool 405 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, 406 tree const_oprnd, tree *oprnd, 407 VEC (gimple, heap) **stmts, tree type, 408 tree *half_type, gimple def_stmt) 409 { 410 tree new_type, new_oprnd, tmp; 411 gimple new_stmt; 412 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); 413 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 414 415 if (code != MULT_EXPR && code != LSHIFT_EXPR) 416 return false; 417 418 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) 419 || (code == LSHIFT_EXPR 420 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) 421 != 1)) 422 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) 423 { 424 /* CONST_OPRND is a constant of HALF_TYPE. */ 425 *oprnd = gimple_assign_rhs1 (def_stmt); 426 return true; 427 } 428 429 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) 430 || !gimple_bb (def_stmt) 431 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 432 || !vinfo_for_stmt (def_stmt)) 433 return false; 434 435 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for 436 a type 2 times bigger than HALF_TYPE. */ 437 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, 438 TYPE_UNSIGNED (type)); 439 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) 440 || (code == LSHIFT_EXPR 441 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) 442 return false; 443 444 /* Use NEW_TYPE for widening operation. */ 445 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 446 { 447 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 448 /* Check if the already created pattern stmt is what we need. */ 449 if (!is_gimple_assign (new_stmt) 450 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR 451 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) 452 return false; 453 454 VEC_safe_push (gimple, heap, *stmts, def_stmt); 455 *oprnd = gimple_assign_lhs (new_stmt); 456 } 457 else 458 { 459 /* Create a_T = (NEW_TYPE) a_t; */ 460 *oprnd = gimple_assign_rhs1 (def_stmt); 461 tmp = create_tmp_var (new_type, NULL); 462 add_referenced_var (tmp); 463 new_oprnd = make_ssa_name (tmp, NULL); 464 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, 465 NULL_TREE); 466 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 467 VEC_safe_push (gimple, heap, *stmts, def_stmt); 468 *oprnd = new_oprnd; 469 } 470 471 *half_type = new_type; 472 return true; 473 } 474 475 476 /* Function vect_recog_widen_mult_pattern 477 478 Try to find the following pattern: 479 480 type a_t, b_t; 481 TYPE a_T, b_T, prod_T; 482 483 S1 a_t = ; 484 S2 b_t = ; 485 S3 a_T = (TYPE) a_t; 486 S4 b_T = (TYPE) b_t; 487 S5 prod_T = a_T * b_T; 488 489 where type 'TYPE' is at least double the size of type 'type'. 490 491 Also detect unsgigned cases: 492 493 unsigned type a_t, b_t; 494 unsigned TYPE u_prod_T; 495 TYPE a_T, b_T, prod_T; 496 497 S1 a_t = ; 498 S2 b_t = ; 499 S3 a_T = (TYPE) a_t; 500 S4 b_T = (TYPE) b_t; 501 S5 prod_T = a_T * b_T; 502 S6 u_prod_T = (unsigned TYPE) prod_T; 503 504 and multiplication by constants: 505 506 type a_t; 507 TYPE a_T, prod_T; 508 509 S1 a_t = ; 510 S3 a_T = (TYPE) a_t; 511 S5 prod_T = a_T * CONST; 512 513 A special case of multiplication by constants is when 'TYPE' is 4 times 514 bigger than 'type', but CONST fits an intermediate type 2 times smaller 515 than 'TYPE'. In that case we create an additional pattern stmt for S3 516 to create a variable of the intermediate type, and perform widen-mult 517 on the intermediate type as well: 518 519 type a_t; 520 interm_type a_it; 521 TYPE a_T, prod_T, prod_T'; 522 523 S1 a_t = ; 524 S3 a_T = (TYPE) a_t; 525 '--> a_it = (interm_type) a_t; 526 S5 prod_T = a_T * CONST; 527 '--> prod_T' = a_it w* CONST; 528 529 Input/Output: 530 531 * STMTS: Contains a stmt from which the pattern search begins. In the 532 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} 533 is detected. In case of unsigned widen-mult, the original stmt (S5) is 534 replaced with S6 in STMTS. In case of multiplication by a constant 535 of an intermediate type (the last case above), STMTS also contains S3 536 (inserted before S5). 537 538 Output: 539 540 * TYPE_IN: The type of the input arguments to the pattern. 541 542 * TYPE_OUT: The type of the output of this pattern. 543 544 * Return value: A new stmt that will be used to replace the sequence of 545 stmts that constitute the pattern. In this case it will be: 546 WIDEN_MULT <a_t, b_t> 547 */ 548 549 static gimple 550 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, 551 tree *type_in, tree *type_out) 552 { 553 gimple last_stmt = VEC_pop (gimple, *stmts); 554 gimple def_stmt0, def_stmt1; 555 tree oprnd0, oprnd1; 556 tree type, half_type0, half_type1; 557 gimple pattern_stmt; 558 tree vectype, vectype_out = NULL_TREE; 559 tree dummy; 560 tree var; 561 enum tree_code dummy_code; 562 int dummy_int; 563 VEC (tree, heap) *dummy_vec; 564 bool op1_ok; 565 566 if (!is_gimple_assign (last_stmt)) 567 return NULL; 568 569 type = gimple_expr_type (last_stmt); 570 571 /* Starting from LAST_STMT, follow the defs of its uses in search 572 of the above pattern. */ 573 574 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 575 return NULL; 576 577 oprnd0 = gimple_assign_rhs1 (last_stmt); 578 oprnd1 = gimple_assign_rhs2 (last_stmt); 579 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 580 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 581 return NULL; 582 583 /* Check argument 0. */ 584 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) 585 return NULL; 586 /* Check argument 1. */ 587 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); 588 589 if (op1_ok) 590 { 591 oprnd0 = gimple_assign_rhs1 (def_stmt0); 592 oprnd1 = gimple_assign_rhs1 (def_stmt1); 593 } 594 else 595 { 596 if (TREE_CODE (oprnd1) == INTEGER_CST 597 && TREE_CODE (half_type0) == INTEGER_TYPE 598 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, 599 &oprnd0, stmts, type, 600 &half_type0, def_stmt0)) 601 half_type1 = half_type0; 602 else 603 return NULL; 604 } 605 606 /* Handle unsigned case. Look for 607 S6 u_prod_T = (unsigned TYPE) prod_T; 608 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ 609 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) 610 { 611 gimple use_stmt; 612 tree use_lhs; 613 tree use_type; 614 615 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) 616 return NULL; 617 618 use_stmt = vect_single_imm_use (last_stmt); 619 if (!use_stmt || !is_gimple_assign (use_stmt) 620 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) 621 return NULL; 622 623 use_lhs = gimple_assign_lhs (use_stmt); 624 use_type = TREE_TYPE (use_lhs); 625 if (!INTEGRAL_TYPE_P (use_type) 626 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) 627 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) 628 return NULL; 629 630 type = use_type; 631 last_stmt = use_stmt; 632 } 633 634 if (!types_compatible_p (half_type0, half_type1)) 635 return NULL; 636 637 /* Pattern detected. */ 638 if (vect_print_dump_info (REPORT_DETAILS)) 639 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: "); 640 641 /* Check target support */ 642 vectype = get_vectype_for_scalar_type (half_type0); 643 vectype_out = get_vectype_for_scalar_type (type); 644 if (!vectype 645 || !vectype_out 646 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, 647 vectype_out, vectype, 648 &dummy, &dummy, &dummy_code, 649 &dummy_code, &dummy_int, &dummy_vec)) 650 return NULL; 651 652 *type_in = vectype; 653 *type_out = vectype_out; 654 655 /* Pattern supported. Create a stmt to be used to replace the pattern: */ 656 var = vect_recog_temp_ssa_var (type, NULL); 657 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0, 658 oprnd1); 659 660 if (vect_print_dump_info (REPORT_DETAILS)) 661 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 662 663 VEC_safe_push (gimple, heap, *stmts, last_stmt); 664 return pattern_stmt; 665 } 666 667 668 /* Function vect_recog_pow_pattern 669 670 Try to find the following pattern: 671 672 x = POW (y, N); 673 674 with POW being one of pow, powf, powi, powif and N being 675 either 2 or 0.5. 676 677 Input: 678 679 * LAST_STMT: A stmt from which the pattern search begins. 680 681 Output: 682 683 * TYPE_IN: The type of the input arguments to the pattern. 684 685 * TYPE_OUT: The type of the output of this pattern. 686 687 * Return value: A new stmt that will be used to replace the sequence of 688 stmts that constitute the pattern. In this case it will be: 689 x = x * x 690 or 691 x = sqrt (x) 692 */ 693 694 static gimple 695 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, 696 tree *type_out) 697 { 698 gimple last_stmt = VEC_index (gimple, *stmts, 0); 699 tree fn, base, exp = NULL; 700 gimple stmt; 701 tree var; 702 703 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 704 return NULL; 705 706 fn = gimple_call_fndecl (last_stmt); 707 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) 708 return NULL; 709 710 switch (DECL_FUNCTION_CODE (fn)) 711 { 712 case BUILT_IN_POWIF: 713 case BUILT_IN_POWI: 714 case BUILT_IN_POWF: 715 case BUILT_IN_POW: 716 base = gimple_call_arg (last_stmt, 0); 717 exp = gimple_call_arg (last_stmt, 1); 718 if (TREE_CODE (exp) != REAL_CST 719 && TREE_CODE (exp) != INTEGER_CST) 720 return NULL; 721 break; 722 723 default: 724 return NULL; 725 } 726 727 /* We now have a pow or powi builtin function call with a constant 728 exponent. */ 729 730 *type_out = NULL_TREE; 731 732 /* Catch squaring. */ 733 if ((host_integerp (exp, 0) 734 && tree_low_cst (exp, 0) == 2) 735 || (TREE_CODE (exp) == REAL_CST 736 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2))) 737 { 738 *type_in = TREE_TYPE (base); 739 740 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 741 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base); 742 return stmt; 743 } 744 745 /* Catch square root. */ 746 if (TREE_CODE (exp) == REAL_CST 747 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf)) 748 { 749 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT); 750 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); 751 if (*type_in) 752 { 753 gimple stmt = gimple_build_call (newfn, 1, base); 754 if (vectorizable_function (stmt, *type_in, *type_in) 755 != NULL_TREE) 756 { 757 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 758 gimple_call_set_lhs (stmt, var); 759 return stmt; 760 } 761 } 762 } 763 764 return NULL; 765 } 766 767 768 /* Function vect_recog_widen_sum_pattern 769 770 Try to find the following pattern: 771 772 type x_t; 773 TYPE x_T, sum = init; 774 loop: 775 sum_0 = phi <init, sum_1> 776 S1 x_t = *p; 777 S2 x_T = (TYPE) x_t; 778 S3 sum_1 = x_T + sum_0; 779 780 where type 'TYPE' is at least double the size of type 'type', i.e - we're 781 summing elements of type 'type' into an accumulator of type 'TYPE'. This is 782 a special case of a reduction computation. 783 784 Input: 785 786 * LAST_STMT: A stmt from which the pattern search begins. In the example, 787 when this function is called with S3, the pattern {S2,S3} will be detected. 788 789 Output: 790 791 * TYPE_IN: The type of the input arguments to the pattern. 792 793 * TYPE_OUT: The type of the output of this pattern. 794 795 * Return value: A new stmt that will be used to replace the sequence of 796 stmts that constitute the pattern. In this case it will be: 797 WIDEN_SUM <x_t, sum_0> 798 799 Note: The widening-sum idiom is a widening reduction pattern that is 800 vectorized without preserving all the intermediate results. It 801 produces only N/2 (widened) results (by summing up pairs of 802 intermediate results) rather than all N results. Therefore, we 803 cannot allow this pattern when we want to get all the results and in 804 the correct order (as is the case when this computation is in an 805 inner-loop nested in an outer-loop that us being vectorized). */ 806 807 static gimple 808 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, 809 tree *type_out) 810 { 811 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); 812 tree oprnd0, oprnd1; 813 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 814 tree type, half_type; 815 gimple pattern_stmt; 816 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 817 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 818 tree var; 819 820 if (!is_gimple_assign (last_stmt)) 821 return NULL; 822 823 type = gimple_expr_type (last_stmt); 824 825 /* Look for the following pattern 826 DX = (TYPE) X; 827 sum_1 = DX + sum_0; 828 In which DX is at least double the size of X, and sum_1 has been 829 recognized as a reduction variable. 830 */ 831 832 /* Starting from LAST_STMT, follow the defs of its uses in search 833 of the above pattern. */ 834 835 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 836 return NULL; 837 838 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 839 return NULL; 840 841 oprnd0 = gimple_assign_rhs1 (last_stmt); 842 oprnd1 = gimple_assign_rhs2 (last_stmt); 843 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 844 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 845 return NULL; 846 847 /* So far so good. Since last_stmt was detected as a (summation) reduction, 848 we know that oprnd1 is the reduction variable (defined by a loop-header 849 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 850 Left to check that oprnd0 is defined by a cast from type 'type' to type 851 'TYPE'. */ 852 853 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) 854 return NULL; 855 856 oprnd0 = gimple_assign_rhs1 (stmt); 857 *type_in = half_type; 858 *type_out = type; 859 860 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 861 var = vect_recog_temp_ssa_var (type, NULL); 862 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var, 863 oprnd0, oprnd1); 864 865 if (vect_print_dump_info (REPORT_DETAILS)) 866 { 867 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: "); 868 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 869 } 870 871 /* We don't allow changing the order of the computation in the inner-loop 872 when doing outer-loop vectorization. */ 873 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 874 875 return pattern_stmt; 876 } 877 878 879 /* Return TRUE if the operation in STMT can be performed on a smaller type. 880 881 Input: 882 STMT - a statement to check. 883 DEF - we support operations with two operands, one of which is constant. 884 The other operand can be defined by a demotion operation, or by a 885 previous statement in a sequence of over-promoted operations. In the 886 later case DEF is used to replace that operand. (It is defined by a 887 pattern statement we created for the previous statement in the 888 sequence). 889 890 Input/output: 891 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not 892 NULL, it's the type of DEF. 893 STMTS - additional pattern statements. If a pattern statement (type 894 conversion) is created in this function, its original statement is 895 added to STMTS. 896 897 Output: 898 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new 899 operands to use in the new pattern statement for STMT (will be created 900 in vect_recog_over_widening_pattern ()). 901 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern 902 statements for STMT: the first one is a type promotion and the second 903 one is the operation itself. We return the type promotion statement 904 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of 905 the second pattern statement. */ 906 907 static bool 908 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, 909 tree *op0, tree *op1, gimple *new_def_stmt, 910 VEC (gimple, heap) **stmts) 911 { 912 enum tree_code code; 913 tree const_oprnd, oprnd; 914 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; 915 gimple def_stmt, new_stmt; 916 bool first = false; 917 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); 918 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 919 920 *op0 = NULL_TREE; 921 *op1 = NULL_TREE; 922 *new_def_stmt = NULL; 923 924 if (!is_gimple_assign (stmt)) 925 return false; 926 927 code = gimple_assign_rhs_code (stmt); 928 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR 929 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) 930 return false; 931 932 oprnd = gimple_assign_rhs1 (stmt); 933 const_oprnd = gimple_assign_rhs2 (stmt); 934 type = gimple_expr_type (stmt); 935 936 if (TREE_CODE (oprnd) != SSA_NAME 937 || TREE_CODE (const_oprnd) != INTEGER_CST) 938 return false; 939 940 /* If oprnd has other uses besides that in stmt we cannot mark it 941 as being part of a pattern only. */ 942 if (!has_single_use (oprnd)) 943 return false; 944 945 /* If we are in the middle of a sequence, we use DEF from a previous 946 statement. Otherwise, OPRND has to be a result of type promotion. */ 947 if (*new_type) 948 { 949 half_type = *new_type; 950 oprnd = def; 951 } 952 else 953 { 954 first = true; 955 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) 956 || !gimple_bb (def_stmt) 957 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 958 || !vinfo_for_stmt (def_stmt)) 959 return false; 960 } 961 962 /* Can we perform the operation on a smaller type? */ 963 switch (code) 964 { 965 case BIT_IOR_EXPR: 966 case BIT_XOR_EXPR: 967 case BIT_AND_EXPR: 968 if (!int_fits_type_p (const_oprnd, half_type)) 969 { 970 /* HALF_TYPE is not enough. Try a bigger type if possible. */ 971 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 972 return false; 973 974 interm_type = build_nonstandard_integer_type ( 975 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 976 if (!int_fits_type_p (const_oprnd, interm_type)) 977 return false; 978 } 979 980 break; 981 982 case LSHIFT_EXPR: 983 /* Try intermediate type - HALF_TYPE is not enough for sure. */ 984 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 985 return false; 986 987 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. 988 (e.g., if the original value was char, the shift amount is at most 8 989 if we want to use short). */ 990 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) 991 return false; 992 993 interm_type = build_nonstandard_integer_type ( 994 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 995 996 if (!vect_supportable_shift (code, interm_type)) 997 return false; 998 999 break; 1000 1001 case RSHIFT_EXPR: 1002 if (vect_supportable_shift (code, half_type)) 1003 break; 1004 1005 /* Try intermediate type - HALF_TYPE is not supported. */ 1006 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1007 return false; 1008 1009 interm_type = build_nonstandard_integer_type ( 1010 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1011 1012 if (!vect_supportable_shift (code, interm_type)) 1013 return false; 1014 1015 break; 1016 1017 default: 1018 gcc_unreachable (); 1019 } 1020 1021 /* There are four possible cases: 1022 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's 1023 the first statement in the sequence) 1024 a. The original, HALF_TYPE, is not enough - we replace the promotion 1025 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. 1026 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original 1027 promotion. 1028 2. OPRND is defined by a pattern statement we created. 1029 a. Its type is not sufficient for the operation, we create a new stmt: 1030 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store 1031 this statement in NEW_DEF_STMT, and it is later put in 1032 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT. 1033 b. OPRND is good to use in the new statement. */ 1034 if (first) 1035 { 1036 if (interm_type) 1037 { 1038 /* Replace the original type conversion HALF_TYPE->TYPE with 1039 HALF_TYPE->INTERM_TYPE. */ 1040 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 1041 { 1042 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 1043 /* Check if the already created pattern stmt is what we need. */ 1044 if (!is_gimple_assign (new_stmt) 1045 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR 1046 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) 1047 return false; 1048 1049 VEC_safe_push (gimple, heap, *stmts, def_stmt); 1050 oprnd = gimple_assign_lhs (new_stmt); 1051 } 1052 else 1053 { 1054 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ 1055 oprnd = gimple_assign_rhs1 (def_stmt); 1056 tmp = create_tmp_reg (interm_type, NULL); 1057 add_referenced_var (tmp); 1058 new_oprnd = make_ssa_name (tmp, NULL); 1059 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1060 oprnd, NULL_TREE); 1061 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 1062 VEC_safe_push (gimple, heap, *stmts, def_stmt); 1063 oprnd = new_oprnd; 1064 } 1065 } 1066 else 1067 { 1068 /* Retrieve the operand before the type promotion. */ 1069 oprnd = gimple_assign_rhs1 (def_stmt); 1070 } 1071 } 1072 else 1073 { 1074 if (interm_type) 1075 { 1076 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ 1077 tmp = create_tmp_reg (interm_type, NULL); 1078 add_referenced_var (tmp); 1079 new_oprnd = make_ssa_name (tmp, NULL); 1080 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1081 oprnd, NULL_TREE); 1082 oprnd = new_oprnd; 1083 *new_def_stmt = new_stmt; 1084 } 1085 1086 /* Otherwise, OPRND is already set. */ 1087 } 1088 1089 if (interm_type) 1090 *new_type = interm_type; 1091 else 1092 *new_type = half_type; 1093 1094 *op0 = oprnd; 1095 *op1 = fold_convert (*new_type, const_oprnd); 1096 1097 return true; 1098 } 1099 1100 1101 /* Try to find a statement or a sequence of statements that can be performed 1102 on a smaller type: 1103 1104 type x_t; 1105 TYPE x_T, res0_T, res1_T; 1106 loop: 1107 S1 x_t = *p; 1108 S2 x_T = (TYPE) x_t; 1109 S3 res0_T = op (x_T, C0); 1110 S4 res1_T = op (res0_T, C1); 1111 S5 ... = () res1_T; - type demotion 1112 1113 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are 1114 constants. 1115 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either 1116 be 'type' or some intermediate type. For now, we expect S5 to be a type 1117 demotion operation. We also check that S3 and S4 have only one use. */ 1118 1119 static gimple 1120 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, 1121 tree *type_in, tree *type_out) 1122 { 1123 gimple stmt = VEC_pop (gimple, *stmts); 1124 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; 1125 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; 1126 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; 1127 bool first; 1128 tree type = NULL; 1129 1130 first = true; 1131 while (1) 1132 { 1133 if (!vinfo_for_stmt (stmt) 1134 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) 1135 return NULL; 1136 1137 new_def_stmt = NULL; 1138 if (!vect_operation_fits_smaller_type (stmt, var, &new_type, 1139 &op0, &op1, &new_def_stmt, 1140 stmts)) 1141 { 1142 if (first) 1143 return NULL; 1144 else 1145 break; 1146 } 1147 1148 /* STMT can be performed on a smaller type. Check its uses. */ 1149 use_stmt = vect_single_imm_use (stmt); 1150 if (!use_stmt || !is_gimple_assign (use_stmt)) 1151 return NULL; 1152 1153 /* Create pattern statement for STMT. */ 1154 vectype = get_vectype_for_scalar_type (new_type); 1155 if (!vectype) 1156 return NULL; 1157 1158 /* We want to collect all the statements for which we create pattern 1159 statetments, except for the case when the last statement in the 1160 sequence doesn't have a corresponding pattern statement. In such 1161 case we associate the last pattern statement with the last statement 1162 in the sequence. Therefore, we only add the original statement to 1163 the list if we know that it is not the last. */ 1164 if (prev_stmt) 1165 VEC_safe_push (gimple, heap, *stmts, prev_stmt); 1166 1167 var = vect_recog_temp_ssa_var (new_type, NULL); 1168 pattern_stmt 1169 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var, 1170 op0, op1); 1171 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 1172 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt); 1173 1174 if (vect_print_dump_info (REPORT_DETAILS)) 1175 { 1176 fprintf (vect_dump, "created pattern stmt: "); 1177 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1178 } 1179 1180 type = gimple_expr_type (stmt); 1181 prev_stmt = stmt; 1182 stmt = use_stmt; 1183 1184 first = false; 1185 } 1186 1187 /* We got a sequence. We expect it to end with a type demotion operation. 1188 Otherwise, we quit (for now). There are three possible cases: the 1189 conversion is to NEW_TYPE (we don't do anything), the conversion is to 1190 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and 1191 NEW_TYPE differs (we create a new conversion statement). */ 1192 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 1193 { 1194 use_lhs = gimple_assign_lhs (use_stmt); 1195 use_type = TREE_TYPE (use_lhs); 1196 /* Support only type demotion or signedess change. */ 1197 if (!INTEGRAL_TYPE_P (use_type) 1198 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) 1199 return NULL; 1200 1201 /* Check that NEW_TYPE is not bigger than the conversion result. */ 1202 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) 1203 return NULL; 1204 1205 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) 1206 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) 1207 { 1208 /* Create NEW_TYPE->USE_TYPE conversion. */ 1209 tmp = create_tmp_reg (use_type, NULL); 1210 add_referenced_var (tmp); 1211 new_oprnd = make_ssa_name (tmp, NULL); 1212 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1213 var, NULL_TREE); 1214 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; 1215 1216 *type_in = get_vectype_for_scalar_type (new_type); 1217 *type_out = get_vectype_for_scalar_type (use_type); 1218 1219 /* We created a pattern statement for the last statement in the 1220 sequence, so we don't need to associate it with the pattern 1221 statement created for PREV_STMT. Therefore, we add PREV_STMT 1222 to the list in order to mark it later in vect_pattern_recog_1. */ 1223 if (prev_stmt) 1224 VEC_safe_push (gimple, heap, *stmts, prev_stmt); 1225 } 1226 else 1227 { 1228 if (prev_stmt) 1229 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt)) 1230 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt)); 1231 1232 *type_in = vectype; 1233 *type_out = NULL_TREE; 1234 } 1235 1236 VEC_safe_push (gimple, heap, *stmts, use_stmt); 1237 } 1238 else 1239 /* TODO: support general case, create a conversion to the correct type. */ 1240 return NULL; 1241 1242 /* Pattern detected. */ 1243 if (vect_print_dump_info (REPORT_DETAILS)) 1244 { 1245 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); 1246 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1247 } 1248 1249 return pattern_stmt; 1250 } 1251 1252 /* Detect widening shift pattern: 1253 1254 type a_t; 1255 TYPE a_T, res_T; 1256 1257 S1 a_t = ; 1258 S2 a_T = (TYPE) a_t; 1259 S3 res_T = a_T << CONST; 1260 1261 where type 'TYPE' is at least double the size of type 'type'. 1262 1263 Also detect cases where the shift result is immediately converted 1264 to another type 'result_type' that is no larger in size than 'TYPE'. 1265 In those cases we perform a widen-shift that directly results in 1266 'result_type', to avoid a possible over-widening situation: 1267 1268 type a_t; 1269 TYPE a_T, res_T; 1270 result_type res_result; 1271 1272 S1 a_t = ; 1273 S2 a_T = (TYPE) a_t; 1274 S3 res_T = a_T << CONST; 1275 S4 res_result = (result_type) res_T; 1276 '--> res_result' = a_t w<< CONST; 1277 1278 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we 1279 create an additional pattern stmt for S2 to create a variable of an 1280 intermediate type, and perform widen-shift on the intermediate type: 1281 1282 type a_t; 1283 interm_type a_it; 1284 TYPE a_T, res_T, res_T'; 1285 1286 S1 a_t = ; 1287 S2 a_T = (TYPE) a_t; 1288 '--> a_it = (interm_type) a_t; 1289 S3 res_T = a_T << CONST; 1290 '--> res_T' = a_it <<* CONST; 1291 1292 Input/Output: 1293 1294 * STMTS: Contains a stmt from which the pattern search begins. 1295 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 1296 in STMTS. When an intermediate type is used and a pattern statement is 1297 created for S2, we also put S2 here (before S3). 1298 1299 Output: 1300 1301 * TYPE_IN: The type of the input arguments to the pattern. 1302 1303 * TYPE_OUT: The type of the output of this pattern. 1304 1305 * Return value: A new stmt that will be used to replace the sequence of 1306 stmts that constitute the pattern. In this case it will be: 1307 WIDEN_LSHIFT_EXPR <a_t, CONST>. */ 1308 1309 static gimple 1310 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, 1311 tree *type_in, tree *type_out) 1312 { 1313 gimple last_stmt = VEC_pop (gimple, *stmts); 1314 gimple def_stmt0; 1315 tree oprnd0, oprnd1; 1316 tree type, half_type0; 1317 gimple pattern_stmt; 1318 tree vectype, vectype_out = NULL_TREE; 1319 tree dummy; 1320 tree var; 1321 enum tree_code dummy_code; 1322 int dummy_int; 1323 VEC (tree, heap) * dummy_vec; 1324 gimple use_stmt; 1325 1326 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) 1327 return NULL; 1328 1329 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) 1330 return NULL; 1331 1332 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) 1333 return NULL; 1334 1335 oprnd0 = gimple_assign_rhs1 (last_stmt); 1336 oprnd1 = gimple_assign_rhs2 (last_stmt); 1337 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) 1338 return NULL; 1339 1340 /* Check operand 0: it has to be defined by a type promotion. */ 1341 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) 1342 return NULL; 1343 1344 /* Check operand 1: has to be positive. We check that it fits the type 1345 in vect_handle_widen_op_by_const (). */ 1346 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) 1347 return NULL; 1348 1349 oprnd0 = gimple_assign_rhs1 (def_stmt0); 1350 type = gimple_expr_type (last_stmt); 1351 1352 /* Check for subsequent conversion to another type. */ 1353 use_stmt = vect_single_imm_use (last_stmt); 1354 if (use_stmt && is_gimple_assign (use_stmt) 1355 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)) 1356 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 1357 { 1358 tree use_lhs = gimple_assign_lhs (use_stmt); 1359 tree use_type = TREE_TYPE (use_lhs); 1360 1361 if (INTEGRAL_TYPE_P (use_type) 1362 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type)) 1363 { 1364 last_stmt = use_stmt; 1365 type = use_type; 1366 } 1367 } 1368 1369 /* Check if this a widening operation. */ 1370 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, 1371 &oprnd0, stmts, 1372 type, &half_type0, def_stmt0)) 1373 return NULL; 1374 1375 /* Pattern detected. */ 1376 if (vect_print_dump_info (REPORT_DETAILS)) 1377 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); 1378 1379 /* Check target support. */ 1380 vectype = get_vectype_for_scalar_type (half_type0); 1381 vectype_out = get_vectype_for_scalar_type (type); 1382 1383 if (!vectype 1384 || !vectype_out 1385 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, 1386 vectype_out, vectype, 1387 &dummy, &dummy, &dummy_code, 1388 &dummy_code, &dummy_int, 1389 &dummy_vec)) 1390 return NULL; 1391 1392 *type_in = vectype; 1393 *type_out = vectype_out; 1394 1395 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1396 var = vect_recog_temp_ssa_var (type, NULL); 1397 pattern_stmt = 1398 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); 1399 1400 if (vect_print_dump_info (REPORT_DETAILS)) 1401 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1402 1403 VEC_safe_push (gimple, heap, *stmts, last_stmt); 1404 return pattern_stmt; 1405 } 1406 1407 /* Detect a vector by vector shift pattern that wouldn't be otherwise 1408 vectorized: 1409 1410 type a_t; 1411 TYPE b_T, res_T; 1412 1413 S1 a_t = ; 1414 S2 b_T = ; 1415 S3 res_T = b_T op a_t; 1416 1417 where type 'TYPE' is a type with different size than 'type', 1418 and op is <<, >> or rotate. 1419 1420 Also detect cases: 1421 1422 type a_t; 1423 TYPE b_T, c_T, res_T; 1424 1425 S0 c_T = ; 1426 S1 a_t = (type) c_T; 1427 S2 b_T = ; 1428 S3 res_T = b_T op a_t; 1429 1430 Input/Output: 1431 1432 * STMTS: Contains a stmt from which the pattern search begins, 1433 i.e. the shift/rotate stmt. The original stmt (S3) is replaced 1434 with a shift/rotate which has same type on both operands, in the 1435 second case just b_T op c_T, in the first case with added cast 1436 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. 1437 1438 Output: 1439 1440 * TYPE_IN: The type of the input arguments to the pattern. 1441 1442 * TYPE_OUT: The type of the output of this pattern. 1443 1444 * Return value: A new stmt that will be used to replace the shift/rotate 1445 S3 stmt. */ 1446 1447 static gimple 1448 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, 1449 tree *type_in, tree *type_out) 1450 { 1451 gimple last_stmt = VEC_pop (gimple, *stmts); 1452 tree oprnd0, oprnd1, lhs, var; 1453 gimple pattern_stmt, def_stmt; 1454 enum tree_code rhs_code; 1455 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1456 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1457 enum vect_def_type dt; 1458 tree def; 1459 1460 if (!is_gimple_assign (last_stmt)) 1461 return NULL; 1462 1463 rhs_code = gimple_assign_rhs_code (last_stmt); 1464 switch (rhs_code) 1465 { 1466 case LSHIFT_EXPR: 1467 case RSHIFT_EXPR: 1468 case LROTATE_EXPR: 1469 case RROTATE_EXPR: 1470 break; 1471 default: 1472 return NULL; 1473 } 1474 1475 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1476 return NULL; 1477 1478 lhs = gimple_assign_lhs (last_stmt); 1479 oprnd0 = gimple_assign_rhs1 (last_stmt); 1480 oprnd1 = gimple_assign_rhs2 (last_stmt); 1481 if (TREE_CODE (oprnd0) != SSA_NAME 1482 || TREE_CODE (oprnd1) != SSA_NAME 1483 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) 1484 || TYPE_PRECISION (TREE_TYPE (oprnd1)) 1485 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1))) 1486 || TYPE_PRECISION (TREE_TYPE (lhs)) 1487 != TYPE_PRECISION (TREE_TYPE (oprnd0))) 1488 return NULL; 1489 1490 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt, 1491 &def, &dt)) 1492 return NULL; 1493 1494 if (dt != vect_internal_def) 1495 return NULL; 1496 1497 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0)); 1498 *type_out = *type_in; 1499 if (*type_in == NULL_TREE) 1500 return NULL; 1501 1502 def = NULL_TREE; 1503 if (gimple_assign_cast_p (def_stmt)) 1504 { 1505 tree rhs1 = gimple_assign_rhs1 (def_stmt); 1506 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) 1507 && TYPE_PRECISION (TREE_TYPE (rhs1)) 1508 == TYPE_PRECISION (TREE_TYPE (oprnd0))) 1509 def = rhs1; 1510 } 1511 1512 if (def == NULL_TREE) 1513 { 1514 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 1515 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1, 1516 NULL_TREE); 1517 new_pattern_def_seq (stmt_vinfo, def_stmt); 1518 } 1519 1520 /* Pattern detected. */ 1521 if (vect_print_dump_info (REPORT_DETAILS)) 1522 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: "); 1523 1524 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1525 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 1526 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def); 1527 1528 if (vect_print_dump_info (REPORT_DETAILS)) 1529 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1530 1531 VEC_safe_push (gimple, heap, *stmts, last_stmt); 1532 return pattern_stmt; 1533 } 1534 1535 /* Detect a signed division by power of two constant that wouldn't be 1536 otherwise vectorized: 1537 1538 type a_t, b_t; 1539 1540 S1 a_t = b_t / N; 1541 1542 where type 'type' is a signed integral type and N is a constant positive 1543 power of two. 1544 1545 Similarly handle signed modulo by power of two constant: 1546 1547 S4 a_t = b_t % N; 1548 1549 Input/Output: 1550 1551 * STMTS: Contains a stmt from which the pattern search begins, 1552 i.e. the division stmt. S1 is replaced by: 1553 S3 y_t = b_t < 0 ? N - 1 : 0; 1554 S2 x_t = b_t + y_t; 1555 S1' a_t = x_t >> log2 (N); 1556 1557 S4 is replaced by (where *_T temporaries have unsigned type): 1558 S9 y_T = b_t < 0 ? -1U : 0U; 1559 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); 1560 S7 z_t = (type) z_T; 1561 S6 w_t = b_t + z_t; 1562 S5 x_t = w_t & (N - 1); 1563 S4' a_t = x_t - z_t; 1564 1565 Output: 1566 1567 * TYPE_IN: The type of the input arguments to the pattern. 1568 1569 * TYPE_OUT: The type of the output of this pattern. 1570 1571 * Return value: A new stmt that will be used to replace the division 1572 S1 or modulo S4 stmt. */ 1573 1574 static gimple 1575 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, 1576 tree *type_in, tree *type_out) 1577 { 1578 gimple last_stmt = VEC_pop (gimple, *stmts); 1579 tree oprnd0, oprnd1, vectype, itype, cond; 1580 gimple pattern_stmt, def_stmt; 1581 enum tree_code rhs_code; 1582 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1583 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1584 optab optab; 1585 1586 if (!is_gimple_assign (last_stmt)) 1587 return NULL; 1588 1589 rhs_code = gimple_assign_rhs_code (last_stmt); 1590 switch (rhs_code) 1591 { 1592 case TRUNC_DIV_EXPR: 1593 case TRUNC_MOD_EXPR: 1594 break; 1595 default: 1596 return NULL; 1597 } 1598 1599 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1600 return NULL; 1601 1602 oprnd0 = gimple_assign_rhs1 (last_stmt); 1603 oprnd1 = gimple_assign_rhs2 (last_stmt); 1604 itype = TREE_TYPE (oprnd0); 1605 if (TREE_CODE (oprnd0) != SSA_NAME 1606 || TREE_CODE (oprnd1) != INTEGER_CST 1607 || TREE_CODE (itype) != INTEGER_TYPE 1608 || TYPE_UNSIGNED (itype) 1609 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)) 1610 || !integer_pow2p (oprnd1) 1611 || tree_int_cst_sgn (oprnd1) != 1) 1612 return NULL; 1613 1614 vectype = get_vectype_for_scalar_type (itype); 1615 if (vectype == NULL_TREE) 1616 return NULL; 1617 1618 /* If the target can handle vectorized division or modulo natively, 1619 don't attempt to optimize this. */ 1620 optab = optab_for_tree_code (rhs_code, vectype, optab_default); 1621 if (optab != NULL) 1622 { 1623 enum machine_mode vec_mode = TYPE_MODE (vectype); 1624 int icode = (int) optab_handler (optab, vec_mode); 1625 if (icode != CODE_FOR_nothing 1626 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD) 1627 return NULL; 1628 } 1629 1630 /* Pattern detected. */ 1631 if (vect_print_dump_info (REPORT_DETAILS)) 1632 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: "); 1633 1634 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0)); 1635 if (rhs_code == TRUNC_DIV_EXPR) 1636 { 1637 tree var = vect_recog_temp_ssa_var (itype, NULL); 1638 def_stmt 1639 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, 1640 fold_build2 (MINUS_EXPR, itype, 1641 oprnd1, 1642 build_int_cst (itype, 1643 1)), 1644 build_int_cst (itype, 0)); 1645 new_pattern_def_seq (stmt_vinfo, def_stmt); 1646 var = vect_recog_temp_ssa_var (itype, NULL); 1647 def_stmt 1648 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0, 1649 gimple_assign_lhs (def_stmt)); 1650 append_pattern_def_seq (stmt_vinfo, def_stmt); 1651 1652 pattern_stmt 1653 = gimple_build_assign_with_ops (RSHIFT_EXPR, 1654 vect_recog_temp_ssa_var (itype, NULL), 1655 var, 1656 build_int_cst (itype, 1657 tree_log2 (oprnd1))); 1658 } 1659 else 1660 { 1661 tree signmask; 1662 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 1663 if (compare_tree_int (oprnd1, 2) == 0) 1664 { 1665 signmask = vect_recog_temp_ssa_var (itype, NULL); 1666 def_stmt 1667 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond, 1668 build_int_cst (itype, 1), 1669 build_int_cst (itype, 0)); 1670 append_pattern_def_seq (stmt_vinfo, def_stmt); 1671 } 1672 else 1673 { 1674 tree utype 1675 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1); 1676 tree vecutype = get_vectype_for_scalar_type (utype); 1677 tree shift 1678 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) 1679 - tree_log2 (oprnd1)); 1680 tree var = vect_recog_temp_ssa_var (utype, NULL); 1681 stmt_vec_info def_stmt_vinfo; 1682 1683 def_stmt 1684 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, 1685 build_int_cst (utype, -1), 1686 build_int_cst (utype, 0)); 1687 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1688 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1689 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 1690 append_pattern_def_seq (stmt_vinfo, def_stmt); 1691 var = vect_recog_temp_ssa_var (utype, NULL); 1692 def_stmt 1693 = gimple_build_assign_with_ops (RSHIFT_EXPR, var, 1694 gimple_assign_lhs (def_stmt), 1695 shift); 1696 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1697 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1698 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 1699 append_pattern_def_seq (stmt_vinfo, def_stmt); 1700 signmask = vect_recog_temp_ssa_var (itype, NULL); 1701 def_stmt 1702 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var, 1703 NULL_TREE); 1704 append_pattern_def_seq (stmt_vinfo, def_stmt); 1705 } 1706 def_stmt 1707 = gimple_build_assign_with_ops (PLUS_EXPR, 1708 vect_recog_temp_ssa_var (itype, NULL), 1709 oprnd0, signmask); 1710 append_pattern_def_seq (stmt_vinfo, def_stmt); 1711 def_stmt 1712 = gimple_build_assign_with_ops (BIT_AND_EXPR, 1713 vect_recog_temp_ssa_var (itype, NULL), 1714 gimple_assign_lhs (def_stmt), 1715 fold_build2 (MINUS_EXPR, itype, 1716 oprnd1, 1717 build_int_cst (itype, 1718 1))); 1719 append_pattern_def_seq (stmt_vinfo, def_stmt); 1720 1721 pattern_stmt 1722 = gimple_build_assign_with_ops (MINUS_EXPR, 1723 vect_recog_temp_ssa_var (itype, NULL), 1724 gimple_assign_lhs (def_stmt), 1725 signmask); 1726 } 1727 1728 if (vect_print_dump_info (REPORT_DETAILS)) 1729 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1730 1731 VEC_safe_push (gimple, heap, *stmts, last_stmt); 1732 1733 *type_in = vectype; 1734 *type_out = vectype; 1735 return pattern_stmt; 1736 } 1737 1738 /* Function vect_recog_mixed_size_cond_pattern 1739 1740 Try to find the following pattern: 1741 1742 type x_t, y_t; 1743 TYPE a_T, b_T, c_T; 1744 loop: 1745 S1 a_T = x_t CMP y_t ? b_T : c_T; 1746 1747 where type 'TYPE' is an integral type which has different size 1748 from 'type'. b_T and c_T are constants and if 'TYPE' is wider 1749 than 'type', the constants need to fit into an integer type 1750 with the same width as 'type'. 1751 1752 Input: 1753 1754 * LAST_STMT: A stmt from which the pattern search begins. 1755 1756 Output: 1757 1758 * TYPE_IN: The type of the input arguments to the pattern. 1759 1760 * TYPE_OUT: The type of the output of this pattern. 1761 1762 * Return value: A new stmt that will be used to replace the pattern. 1763 Additionally a def_stmt is added. 1764 1765 a_it = x_t CMP y_t ? b_it : c_it; 1766 a_T = (TYPE) a_it; */ 1767 1768 static gimple 1769 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, 1770 tree *type_out) 1771 { 1772 gimple last_stmt = VEC_index (gimple, *stmts, 0); 1773 tree cond_expr, then_clause, else_clause; 1774 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; 1775 tree type, vectype, comp_vectype, itype, vecitype; 1776 enum machine_mode cmpmode; 1777 gimple pattern_stmt, def_stmt; 1778 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1779 1780 if (!is_gimple_assign (last_stmt) 1781 || gimple_assign_rhs_code (last_stmt) != COND_EXPR 1782 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 1783 return NULL; 1784 1785 cond_expr = gimple_assign_rhs1 (last_stmt); 1786 then_clause = gimple_assign_rhs2 (last_stmt); 1787 else_clause = gimple_assign_rhs3 (last_stmt); 1788 1789 if (TREE_CODE (then_clause) != INTEGER_CST 1790 || TREE_CODE (else_clause) != INTEGER_CST) 1791 return NULL; 1792 1793 if (!COMPARISON_CLASS_P (cond_expr)) 1794 return NULL; 1795 1796 comp_vectype 1797 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0))); 1798 if (comp_vectype == NULL_TREE) 1799 return NULL; 1800 1801 type = gimple_expr_type (last_stmt); 1802 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); 1803 1804 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) 1805 return NULL; 1806 1807 vectype = get_vectype_for_scalar_type (type); 1808 if (vectype == NULL_TREE) 1809 return NULL; 1810 1811 if (expand_vec_cond_expr_p (vectype, comp_vectype)) 1812 return NULL; 1813 1814 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode), 1815 TYPE_UNSIGNED (type)); 1816 if (itype == NULL_TREE 1817 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode)) 1818 return NULL; 1819 1820 vecitype = get_vectype_for_scalar_type (itype); 1821 if (vecitype == NULL_TREE) 1822 return NULL; 1823 1824 if (!expand_vec_cond_expr_p (vecitype, comp_vectype)) 1825 return NULL; 1826 1827 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) 1828 { 1829 if (!int_fits_type_p (then_clause, itype) 1830 || !int_fits_type_p (else_clause, itype)) 1831 return NULL; 1832 } 1833 1834 def_stmt 1835 = gimple_build_assign_with_ops3 (COND_EXPR, 1836 vect_recog_temp_ssa_var (itype, NULL), 1837 unshare_expr (cond_expr), 1838 fold_convert (itype, then_clause), 1839 fold_convert (itype, else_clause)); 1840 pattern_stmt 1841 = gimple_build_assign_with_ops (NOP_EXPR, 1842 vect_recog_temp_ssa_var (type, NULL), 1843 gimple_assign_lhs (def_stmt), NULL_TREE); 1844 1845 new_pattern_def_seq (stmt_vinfo, def_stmt); 1846 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1847 set_vinfo_for_stmt (def_stmt, def_stmt_info); 1848 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype; 1849 *type_in = vecitype; 1850 *type_out = vectype; 1851 1852 return pattern_stmt; 1853 } 1854 1855 1856 /* Helper function of vect_recog_bool_pattern. Called recursively, return 1857 true if bool VAR can be optimized that way. */ 1858 1859 static bool 1860 check_bool_pattern (tree var, loop_vec_info loop_vinfo) 1861 { 1862 gimple def_stmt; 1863 enum vect_def_type dt; 1864 tree def, rhs1; 1865 enum tree_code rhs_code; 1866 1867 if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt)) 1868 return false; 1869 1870 if (dt != vect_internal_def) 1871 return false; 1872 1873 if (!is_gimple_assign (def_stmt)) 1874 return false; 1875 1876 if (!has_single_use (def)) 1877 return false; 1878 1879 rhs1 = gimple_assign_rhs1 (def_stmt); 1880 rhs_code = gimple_assign_rhs_code (def_stmt); 1881 switch (rhs_code) 1882 { 1883 case SSA_NAME: 1884 return check_bool_pattern (rhs1, loop_vinfo); 1885 1886 CASE_CONVERT: 1887 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 1888 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) 1889 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE) 1890 return false; 1891 return check_bool_pattern (rhs1, loop_vinfo); 1892 1893 case BIT_NOT_EXPR: 1894 return check_bool_pattern (rhs1, loop_vinfo); 1895 1896 case BIT_AND_EXPR: 1897 case BIT_IOR_EXPR: 1898 case BIT_XOR_EXPR: 1899 if (!check_bool_pattern (rhs1, loop_vinfo)) 1900 return false; 1901 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo); 1902 1903 default: 1904 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 1905 { 1906 tree vecitype, comp_vectype; 1907 1908 /* If the comparison can throw, then is_gimple_condexpr will be 1909 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ 1910 if (stmt_could_throw_p (def_stmt)) 1911 return false; 1912 1913 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 1914 if (comp_vectype == NULL_TREE) 1915 return false; 1916 1917 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) 1918 { 1919 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 1920 tree itype 1921 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 1922 vecitype = get_vectype_for_scalar_type (itype); 1923 if (vecitype == NULL_TREE) 1924 return false; 1925 } 1926 else 1927 vecitype = comp_vectype; 1928 return expand_vec_cond_expr_p (vecitype, comp_vectype); 1929 } 1930 return false; 1931 } 1932 } 1933 1934 1935 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous 1936 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT 1937 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */ 1938 1939 static tree 1940 adjust_bool_pattern_cast (tree type, tree var) 1941 { 1942 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var)); 1943 gimple cast_stmt, pattern_stmt; 1944 1945 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)); 1946 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 1947 new_pattern_def_seq (stmt_vinfo, pattern_stmt); 1948 cast_stmt 1949 = gimple_build_assign_with_ops (NOP_EXPR, 1950 vect_recog_temp_ssa_var (type, NULL), 1951 gimple_assign_lhs (pattern_stmt), 1952 NULL_TREE); 1953 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt; 1954 return gimple_assign_lhs (cast_stmt); 1955 } 1956 1957 1958 /* Helper function of vect_recog_bool_pattern. Do the actual transformations, 1959 recursively. VAR is an SSA_NAME that should be transformed from bool 1960 to a wider integer type, OUT_TYPE is the desired final integer type of 1961 the whole pattern, TRUEVAL should be NULL unless optimizing 1962 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands 1963 in the then_clause, STMTS is where statements with added pattern stmts 1964 should be pushed to. */ 1965 1966 static tree 1967 adjust_bool_pattern (tree var, tree out_type, tree trueval, 1968 VEC (gimple, heap) **stmts) 1969 { 1970 gimple stmt = SSA_NAME_DEF_STMT (var); 1971 enum tree_code rhs_code, def_rhs_code; 1972 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; 1973 location_t loc; 1974 gimple pattern_stmt, def_stmt; 1975 1976 rhs1 = gimple_assign_rhs1 (stmt); 1977 rhs2 = gimple_assign_rhs2 (stmt); 1978 rhs_code = gimple_assign_rhs_code (stmt); 1979 loc = gimple_location (stmt); 1980 switch (rhs_code) 1981 { 1982 case SSA_NAME: 1983 CASE_CONVERT: 1984 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 1985 itype = TREE_TYPE (irhs1); 1986 pattern_stmt 1987 = gimple_build_assign_with_ops (SSA_NAME, 1988 vect_recog_temp_ssa_var (itype, NULL), 1989 irhs1, NULL_TREE); 1990 break; 1991 1992 case BIT_NOT_EXPR: 1993 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 1994 itype = TREE_TYPE (irhs1); 1995 pattern_stmt 1996 = gimple_build_assign_with_ops (BIT_XOR_EXPR, 1997 vect_recog_temp_ssa_var (itype, NULL), 1998 irhs1, build_int_cst (itype, 1)); 1999 break; 2000 2001 case BIT_AND_EXPR: 2002 /* Try to optimize x = y & (a < b ? 1 : 0); into 2003 x = (a < b ? y : 0); 2004 2005 E.g. for: 2006 bool a_b, b_b, c_b; 2007 TYPE d_T; 2008 2009 S1 a_b = x1 CMP1 y1; 2010 S2 b_b = x2 CMP2 y2; 2011 S3 c_b = a_b & b_b; 2012 S4 d_T = (TYPE) c_b; 2013 2014 we would normally emit: 2015 2016 S1' a_T = x1 CMP1 y1 ? 1 : 0; 2017 S2' b_T = x2 CMP2 y2 ? 1 : 0; 2018 S3' c_T = a_T & b_T; 2019 S4' d_T = c_T; 2020 2021 but we can save one stmt by using the 2022 result of one of the COND_EXPRs in the other COND_EXPR and leave 2023 BIT_AND_EXPR stmt out: 2024 2025 S1' a_T = x1 CMP1 y1 ? 1 : 0; 2026 S3' c_T = x2 CMP2 y2 ? a_T : 0; 2027 S4' f_T = c_T; 2028 2029 At least when VEC_COND_EXPR is implemented using masks 2030 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it 2031 computes the comparison masks and ands it, in one case with 2032 all ones vector, in the other case with a vector register. 2033 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is 2034 often more expensive. */ 2035 def_stmt = SSA_NAME_DEF_STMT (rhs2); 2036 def_rhs_code = gimple_assign_rhs_code (def_stmt); 2037 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2038 { 2039 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 2040 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2041 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 2042 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 2043 { 2044 gimple tstmt; 2045 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 2046 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts); 2047 tstmt = VEC_pop (gimple, *stmts); 2048 gcc_assert (tstmt == def_stmt); 2049 VEC_quick_push (gimple, *stmts, stmt); 2050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 2051 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 2052 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 2053 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 2054 return irhs2; 2055 } 2056 else 2057 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2058 goto and_ior_xor; 2059 } 2060 def_stmt = SSA_NAME_DEF_STMT (rhs1); 2061 def_rhs_code = gimple_assign_rhs_code (def_stmt); 2062 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2063 { 2064 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 2065 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2066 if (TYPE_PRECISION (TREE_TYPE (irhs2)) 2067 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 2068 { 2069 gimple tstmt; 2070 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 2071 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts); 2072 tstmt = VEC_pop (gimple, *stmts); 2073 gcc_assert (tstmt == def_stmt); 2074 VEC_quick_push (gimple, *stmts, stmt); 2075 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 2076 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 2077 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 2078 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 2079 return irhs1; 2080 } 2081 else 2082 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2083 goto and_ior_xor; 2084 } 2085 /* FALLTHRU */ 2086 case BIT_IOR_EXPR: 2087 case BIT_XOR_EXPR: 2088 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2089 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2090 and_ior_xor: 2091 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 2092 != TYPE_PRECISION (TREE_TYPE (irhs2))) 2093 { 2094 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); 2095 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); 2096 int out_prec = TYPE_PRECISION (out_type); 2097 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) 2098 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2); 2099 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) 2100 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1); 2101 else 2102 { 2103 irhs1 = adjust_bool_pattern_cast (out_type, rhs1); 2104 irhs2 = adjust_bool_pattern_cast (out_type, rhs2); 2105 } 2106 } 2107 itype = TREE_TYPE (irhs1); 2108 pattern_stmt 2109 = gimple_build_assign_with_ops (rhs_code, 2110 vect_recog_temp_ssa_var (itype, NULL), 2111 irhs1, irhs2); 2112 break; 2113 2114 default: 2115 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); 2116 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 2117 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)) 2118 || (TYPE_PRECISION (TREE_TYPE (rhs1)) 2119 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1))))) 2120 { 2121 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 2122 itype 2123 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 2124 } 2125 else 2126 itype = TREE_TYPE (rhs1); 2127 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); 2128 if (trueval == NULL_TREE) 2129 trueval = build_int_cst (itype, 1); 2130 else 2131 gcc_checking_assert (useless_type_conversion_p (itype, 2132 TREE_TYPE (trueval))); 2133 pattern_stmt 2134 = gimple_build_assign_with_ops3 (COND_EXPR, 2135 vect_recog_temp_ssa_var (itype, NULL), 2136 cond_expr, trueval, 2137 build_int_cst (itype, 0)); 2138 break; 2139 } 2140 2141 VEC_safe_push (gimple, heap, *stmts, stmt); 2142 gimple_set_location (pattern_stmt, loc); 2143 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 2144 return gimple_assign_lhs (pattern_stmt); 2145 } 2146 2147 2148 /* Function vect_recog_bool_pattern 2149 2150 Try to find pattern like following: 2151 2152 bool a_b, b_b, c_b, d_b, e_b; 2153 TYPE f_T; 2154 loop: 2155 S1 a_b = x1 CMP1 y1; 2156 S2 b_b = x2 CMP2 y2; 2157 S3 c_b = a_b & b_b; 2158 S4 d_b = x3 CMP3 y3; 2159 S5 e_b = c_b | d_b; 2160 S6 f_T = (TYPE) e_b; 2161 2162 where type 'TYPE' is an integral type. 2163 2164 Input: 2165 2166 * LAST_STMT: A stmt at the end from which the pattern 2167 search begins, i.e. cast of a bool to 2168 an integer type. 2169 2170 Output: 2171 2172 * TYPE_IN: The type of the input arguments to the pattern. 2173 2174 * TYPE_OUT: The type of the output of this pattern. 2175 2176 * Return value: A new stmt that will be used to replace the pattern. 2177 2178 Assuming size of TYPE is the same as size of all comparisons 2179 (otherwise some casts would be added where needed), the above 2180 sequence we create related pattern stmts: 2181 S1' a_T = x1 CMP1 y1 ? 1 : 0; 2182 S3' c_T = x2 CMP2 y2 ? a_T : 0; 2183 S4' d_T = x3 CMP3 y3 ? 1 : 0; 2184 S5' e_T = c_T | d_T; 2185 S6' f_T = e_T; 2186 2187 Instead of the above S3' we could emit: 2188 S2' b_T = x2 CMP2 y2 ? 1 : 0; 2189 S3' c_T = a_T | b_T; 2190 but the above is more efficient. */ 2191 2192 static gimple 2193 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in, 2194 tree *type_out) 2195 { 2196 gimple last_stmt = VEC_pop (gimple, *stmts); 2197 enum tree_code rhs_code; 2198 tree var, lhs, rhs, vectype; 2199 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2200 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2201 gimple pattern_stmt; 2202 2203 if (!is_gimple_assign (last_stmt)) 2204 return NULL; 2205 2206 var = gimple_assign_rhs1 (last_stmt); 2207 lhs = gimple_assign_lhs (last_stmt); 2208 2209 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1 2210 || !TYPE_UNSIGNED (TREE_TYPE (var))) 2211 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE) 2212 return NULL; 2213 2214 rhs_code = gimple_assign_rhs_code (last_stmt); 2215 if (CONVERT_EXPR_CODE_P (rhs_code)) 2216 { 2217 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE 2218 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) 2219 return NULL; 2220 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 2221 if (vectype == NULL_TREE) 2222 return NULL; 2223 2224 if (!check_bool_pattern (var, loop_vinfo)) 2225 return NULL; 2226 2227 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); 2228 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 2229 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 2230 pattern_stmt 2231 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); 2232 else 2233 pattern_stmt 2234 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE); 2235 *type_out = vectype; 2236 *type_in = vectype; 2237 VEC_safe_push (gimple, heap, *stmts, last_stmt); 2238 return pattern_stmt; 2239 } 2240 else if (rhs_code == SSA_NAME 2241 && STMT_VINFO_DATA_REF (stmt_vinfo)) 2242 { 2243 stmt_vec_info pattern_stmt_info; 2244 vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 2245 gcc_assert (vectype != NULL_TREE); 2246 if (!VECTOR_MODE_P (TYPE_MODE (vectype))) 2247 return NULL; 2248 if (!check_bool_pattern (var, loop_vinfo)) 2249 return NULL; 2250 2251 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); 2252 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); 2253 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 2254 { 2255 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 2256 gimple cast_stmt 2257 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE); 2258 new_pattern_def_seq (stmt_vinfo, cast_stmt); 2259 rhs = rhs2; 2260 } 2261 pattern_stmt 2262 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); 2263 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); 2264 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 2265 STMT_VINFO_DATA_REF (pattern_stmt_info) 2266 = STMT_VINFO_DATA_REF (stmt_vinfo); 2267 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) 2268 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); 2269 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); 2270 STMT_VINFO_DR_OFFSET (pattern_stmt_info) 2271 = STMT_VINFO_DR_OFFSET (stmt_vinfo); 2272 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); 2273 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) 2274 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); 2275 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; 2276 *type_out = vectype; 2277 *type_in = vectype; 2278 VEC_safe_push (gimple, heap, *stmts, last_stmt); 2279 return pattern_stmt; 2280 } 2281 else 2282 return NULL; 2283 } 2284 2285 2286 /* Mark statements that are involved in a pattern. */ 2287 2288 static inline void 2289 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, 2290 tree pattern_vectype) 2291 { 2292 stmt_vec_info pattern_stmt_info, def_stmt_info; 2293 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); 2294 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); 2295 gimple def_stmt; 2296 2297 pattern_stmt_info = vinfo_for_stmt (pattern_stmt); 2298 if (pattern_stmt_info == NULL) 2299 { 2300 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); 2301 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 2302 } 2303 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); 2304 2305 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; 2306 STMT_VINFO_DEF_TYPE (pattern_stmt_info) 2307 = STMT_VINFO_DEF_TYPE (orig_stmt_info); 2308 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; 2309 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; 2310 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; 2311 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info) 2312 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 2313 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)) 2314 { 2315 gimple_stmt_iterator si; 2316 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)); 2317 !gsi_end_p (si); gsi_next (&si)) 2318 { 2319 def_stmt = gsi_stmt (si); 2320 def_stmt_info = vinfo_for_stmt (def_stmt); 2321 if (def_stmt_info == NULL) 2322 { 2323 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 2324 set_vinfo_for_stmt (def_stmt, def_stmt_info); 2325 } 2326 gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); 2327 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; 2328 STMT_VINFO_DEF_TYPE (def_stmt_info) 2329 = STMT_VINFO_DEF_TYPE (orig_stmt_info); 2330 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) 2331 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; 2332 } 2333 } 2334 } 2335 2336 /* Function vect_pattern_recog_1 2337 2338 Input: 2339 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 2340 computation pattern. 2341 STMT: A stmt from which the pattern search should start. 2342 2343 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an 2344 expression that computes the same functionality and can be used to 2345 replace the sequence of stmts that are involved in the pattern. 2346 2347 Output: 2348 This function checks if the expression returned by PATTERN_RECOG_FUNC is 2349 supported in vector form by the target. We use 'TYPE_IN' to obtain the 2350 relevant vector type. If 'TYPE_IN' is already a vector type, then this 2351 indicates that target support had already been checked by PATTERN_RECOG_FUNC. 2352 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits 2353 to the available target pattern. 2354 2355 This function also does some bookkeeping, as explained in the documentation 2356 for vect_recog_pattern. */ 2357 2358 static void 2359 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func, 2360 gimple_stmt_iterator si, 2361 VEC (gimple, heap) **stmts_to_replace) 2362 { 2363 gimple stmt = gsi_stmt (si), pattern_stmt; 2364 stmt_vec_info stmt_info; 2365 loop_vec_info loop_vinfo; 2366 tree pattern_vectype; 2367 tree type_in, type_out; 2368 enum tree_code code; 2369 int i; 2370 gimple next; 2371 2372 VEC_truncate (gimple, *stmts_to_replace, 0); 2373 VEC_quick_push (gimple, *stmts_to_replace, stmt); 2374 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); 2375 if (!pattern_stmt) 2376 return; 2377 2378 stmt = VEC_last (gimple, *stmts_to_replace); 2379 stmt_info = vinfo_for_stmt (stmt); 2380 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2381 2382 if (VECTOR_MODE_P (TYPE_MODE (type_in))) 2383 { 2384 /* No need to check target support (already checked by the pattern 2385 recognition function). */ 2386 pattern_vectype = type_out ? type_out : type_in; 2387 } 2388 else 2389 { 2390 enum machine_mode vec_mode; 2391 enum insn_code icode; 2392 optab optab; 2393 2394 /* Check target support */ 2395 type_in = get_vectype_for_scalar_type (type_in); 2396 if (!type_in) 2397 return; 2398 if (type_out) 2399 type_out = get_vectype_for_scalar_type (type_out); 2400 else 2401 type_out = type_in; 2402 if (!type_out) 2403 return; 2404 pattern_vectype = type_out; 2405 2406 if (is_gimple_assign (pattern_stmt)) 2407 code = gimple_assign_rhs_code (pattern_stmt); 2408 else 2409 { 2410 gcc_assert (is_gimple_call (pattern_stmt)); 2411 code = CALL_EXPR; 2412 } 2413 2414 optab = optab_for_tree_code (code, type_in, optab_default); 2415 vec_mode = TYPE_MODE (type_in); 2416 if (!optab 2417 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing 2418 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) 2419 return; 2420 } 2421 2422 /* Found a vectorizable pattern. */ 2423 if (vect_print_dump_info (REPORT_DETAILS)) 2424 { 2425 fprintf (vect_dump, "pattern recognized: "); 2426 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 2427 } 2428 2429 /* Mark the stmts that are involved in the pattern. */ 2430 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); 2431 2432 /* Patterns cannot be vectorized using SLP, because they change the order of 2433 computation. */ 2434 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) 2435 if (next == stmt) 2436 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 2437 2438 /* It is possible that additional pattern stmts are created and inserted in 2439 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the 2440 relevant statements. */ 2441 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) 2442 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); 2443 i++) 2444 { 2445 stmt_info = vinfo_for_stmt (stmt); 2446 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 2447 if (vect_print_dump_info (REPORT_DETAILS)) 2448 { 2449 fprintf (vect_dump, "additional pattern stmt: "); 2450 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 2451 } 2452 2453 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); 2454 } 2455 } 2456 2457 2458 /* Function vect_pattern_recog 2459 2460 Input: 2461 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 2462 computation idioms. 2463 2464 Output - for each computation idiom that is detected we create a new stmt 2465 that provides the same functionality and that can be vectorized. We 2466 also record some information in the struct_stmt_info of the relevant 2467 stmts, as explained below: 2468 2469 At the entry to this function we have the following stmts, with the 2470 following initial value in the STMT_VINFO fields: 2471 2472 stmt in_pattern_p related_stmt vec_stmt 2473 S1: a_i = .... - - - 2474 S2: a_2 = ..use(a_i).. - - - 2475 S3: a_1 = ..use(a_2).. - - - 2476 S4: a_0 = ..use(a_1).. - - - 2477 S5: ... = ..use(a_0).. - - - 2478 2479 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 2480 represented by a single stmt. We then: 2481 - create a new stmt S6 equivalent to the pattern (the stmt is not 2482 inserted into the code) 2483 - fill in the STMT_VINFO fields as follows: 2484 2485 in_pattern_p related_stmt vec_stmt 2486 S1: a_i = .... - - - 2487 S2: a_2 = ..use(a_i).. - - - 2488 S3: a_1 = ..use(a_2).. - - - 2489 S4: a_0 = ..use(a_1).. true S6 - 2490 '---> S6: a_new = .... - S4 - 2491 S5: ... = ..use(a_0).. - - - 2492 2493 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 2494 to each other through the RELATED_STMT field). 2495 2496 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 2497 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 2498 remain irrelevant unless used by stmts other than S4. 2499 2500 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 2501 (because they are marked as irrelevant). It will vectorize S6, and record 2502 a pointer to the new vector stmt VS6 from S6 (as usual). 2503 S4 will be skipped, and S5 will be vectorized as usual: 2504 2505 in_pattern_p related_stmt vec_stmt 2506 S1: a_i = .... - - - 2507 S2: a_2 = ..use(a_i).. - - - 2508 S3: a_1 = ..use(a_2).. - - - 2509 > VS6: va_new = .... - - - 2510 S4: a_0 = ..use(a_1).. true S6 VS6 2511 '---> S6: a_new = .... - S4 VS6 2512 > VS5: ... = ..vuse(va_new).. - - - 2513 S5: ... = ..use(a_0).. - - - 2514 2515 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used 2516 elsewhere), and we'll end up with: 2517 2518 VS6: va_new = .... 2519 VS5: ... = ..vuse(va_new).. 2520 2521 In case of more than one pattern statements, e.g., widen-mult with 2522 intermediate type: 2523 2524 S1 a_t = ; 2525 S2 a_T = (TYPE) a_t; 2526 '--> S3: a_it = (interm_type) a_t; 2527 S4 prod_T = a_T * CONST; 2528 '--> S5: prod_T' = a_it w* CONST; 2529 2530 there may be other users of a_T outside the pattern. In that case S2 will 2531 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed 2532 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will 2533 be recorded in S3. */ 2534 2535 void 2536 vect_pattern_recog (loop_vec_info loop_vinfo) 2537 { 2538 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2539 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 2540 unsigned int nbbs = loop->num_nodes; 2541 gimple_stmt_iterator si; 2542 unsigned int i, j; 2543 vect_recog_func_ptr vect_recog_func; 2544 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); 2545 2546 if (vect_print_dump_info (REPORT_DETAILS)) 2547 fprintf (vect_dump, "=== vect_pattern_recog ==="); 2548 2549 /* Scan through the loop stmts, applying the pattern recognition 2550 functions starting at each stmt visited: */ 2551 for (i = 0; i < nbbs; i++) 2552 { 2553 basic_block bb = bbs[i]; 2554 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 2555 { 2556 /* Scan over all generic vect_recog_xxx_pattern functions. */ 2557 for (j = 0; j < NUM_PATTERNS; j++) 2558 { 2559 vect_recog_func = vect_vect_recog_func_ptrs[j]; 2560 vect_pattern_recog_1 (vect_recog_func, si, 2561 &stmts_to_replace); 2562 } 2563 } 2564 } 2565 2566 VEC_free (gimple, heap, stmts_to_replace); 2567 } 2568