1 /* Lower vector operations to scalar operations. 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "rtl.h" 25 #include "tree.h" 26 #include "gimple.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "expmed.h" 30 #include "optabs-tree.h" 31 #include "diagnostic.h" 32 #include "fold-const.h" 33 #include "stor-layout.h" 34 #include "langhooks.h" 35 #include "tree-eh.h" 36 #include "gimple-iterator.h" 37 #include "gimplify-me.h" 38 #include "gimplify.h" 39 #include "tree-cfg.h" 40 #include "tree-vector-builder.h" 41 #include "vec-perm-indices.h" 42 43 44 static void expand_vector_operations_1 (gimple_stmt_iterator *); 45 46 /* Return the number of elements in a vector type TYPE that we have 47 already decided needs to be expanded piecewise. We don't support 48 this kind of expansion for variable-length vectors, since we should 49 always check for target support before introducing uses of those. */ 50 static unsigned int 51 nunits_for_known_piecewise_op (const_tree type) 52 { 53 return TYPE_VECTOR_SUBPARTS (type).to_constant (); 54 } 55 56 /* Return true if TYPE1 has more elements than TYPE2, where either 57 type may be a vector or a scalar. */ 58 59 static inline bool 60 subparts_gt (tree type1, tree type2) 61 { 62 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1; 63 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1; 64 return known_gt (n1, n2); 65 } 66 67 /* Build a constant of type TYPE, made of VALUE's bits replicated 68 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 69 static tree 70 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 71 { 72 int width = tree_to_uhwi (TYPE_SIZE (inner_type)); 73 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) 74 / HOST_BITS_PER_WIDE_INT; 75 unsigned HOST_WIDE_INT low, mask; 76 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; 77 int i; 78 79 gcc_assert (n && n <= WIDE_INT_MAX_ELTS); 80 81 if (width == HOST_BITS_PER_WIDE_INT) 82 low = value; 83 else 84 { 85 mask = ((HOST_WIDE_INT)1 << width) - 1; 86 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 87 } 88 89 for (i = 0; i < n; i++) 90 a[i] = low; 91 92 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); 93 return wide_int_to_tree 94 (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); 95 } 96 97 static GTY(()) tree vector_inner_type; 98 static GTY(()) tree vector_last_type; 99 static GTY(()) int vector_last_nunits; 100 101 /* Return a suitable vector types made of SUBPARTS units each of mode 102 "word_mode" (the global variable). */ 103 static tree 104 build_word_mode_vector_type (int nunits) 105 { 106 if (!vector_inner_type) 107 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); 108 else if (vector_last_nunits == nunits) 109 { 110 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); 111 return vector_last_type; 112 } 113 114 vector_last_nunits = nunits; 115 vector_last_type = build_vector_type (vector_inner_type, nunits); 116 return vector_last_type; 117 } 118 119 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 120 tree, tree, tree, tree, tree, enum tree_code, 121 tree); 122 123 static inline tree 124 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 125 tree t, tree bitsize, tree bitpos) 126 { 127 if (TREE_CODE (t) == SSA_NAME) 128 { 129 gimple *def_stmt = SSA_NAME_DEF_STMT (t); 130 if (is_gimple_assign (def_stmt) 131 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 132 || (bitpos 133 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))) 134 t = gimple_assign_rhs1 (def_stmt); 135 } 136 if (bitpos) 137 { 138 if (TREE_CODE (type) == BOOLEAN_TYPE) 139 { 140 tree itype 141 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0); 142 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t, 143 bitsize, bitpos); 144 return gimplify_build2 (gsi, NE_EXPR, type, field, 145 build_zero_cst (itype)); 146 } 147 else 148 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 149 } 150 else 151 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 152 } 153 154 static tree 155 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 156 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 157 enum tree_code code, tree type ATTRIBUTE_UNUSED) 158 { 159 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 160 return gimplify_build1 (gsi, code, inner_type, a); 161 } 162 163 static tree 164 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 165 tree bitpos, tree bitsize, enum tree_code code, 166 tree type ATTRIBUTE_UNUSED) 167 { 168 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 169 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 170 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 171 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 172 return gimplify_build2 (gsi, code, inner_type, a, b); 173 } 174 175 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 176 177 INNER_TYPE is the type of A and B elements 178 179 returned expression is of signed integer type with the 180 size equal to the size of INNER_TYPE. */ 181 static tree 182 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 183 tree bitpos, tree bitsize, enum tree_code code, tree type) 184 { 185 tree stype = TREE_TYPE (type); 186 tree cst_false = build_zero_cst (stype); 187 tree cst_true = build_all_ones_cst (stype); 188 tree cmp; 189 190 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 191 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 192 193 cmp = build2 (code, boolean_type_node, a, b); 194 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false); 195 } 196 197 /* Expand vector addition to scalars. This does bit twiddling 198 in order to increase parallelism: 199 200 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ 201 (a ^ b) & 0x80808080 202 203 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ 204 (a ^ ~b) & 0x80808080 205 206 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) 207 208 This optimization should be done only if 4 vector items or more 209 fit into a word. */ 210 static tree 211 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 212 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 213 enum tree_code code, tree type ATTRIBUTE_UNUSED) 214 { 215 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 216 unsigned HOST_WIDE_INT max; 217 tree low_bits, high_bits, a_low, b_low, result_low, signs; 218 219 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 220 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 221 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 222 223 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); 224 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 225 226 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); 227 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 228 if (code == PLUS_EXPR) 229 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); 230 else 231 { 232 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); 233 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); 234 } 235 236 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 237 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); 238 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 239 } 240 241 static tree 242 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 243 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 244 tree bitsize ATTRIBUTE_UNUSED, 245 enum tree_code code ATTRIBUTE_UNUSED, 246 tree type ATTRIBUTE_UNUSED) 247 { 248 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 249 HOST_WIDE_INT max; 250 tree low_bits, high_bits, b_low, result_low, signs; 251 252 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 253 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 254 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 255 256 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 257 258 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 259 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); 260 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 261 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); 262 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 263 } 264 265 /* Expand a vector operation to scalars, by using many operations 266 whose type is the vector type's inner type. */ 267 static tree 268 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 269 tree type, tree inner_type, 270 tree a, tree b, enum tree_code code) 271 { 272 vec<constructor_elt, va_gc> *v; 273 tree part_width = TYPE_SIZE (inner_type); 274 tree index = bitsize_int (0); 275 int nunits = nunits_for_known_piecewise_op (type); 276 int delta = tree_to_uhwi (part_width) 277 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); 278 int i; 279 location_t loc = gimple_location (gsi_stmt (*gsi)); 280 281 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) 282 warning_at (loc, OPT_Wvector_operation_performance, 283 "vector operation will be expanded piecewise"); 284 else 285 warning_at (loc, OPT_Wvector_operation_performance, 286 "vector operation will be expanded in parallel"); 287 288 vec_alloc (v, (nunits + delta - 1) / delta); 289 for (i = 0; i < nunits; 290 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) 291 { 292 tree result = f (gsi, inner_type, a, b, index, part_width, code, type); 293 constructor_elt ce = {NULL_TREE, result}; 294 v->quick_push (ce); 295 } 296 297 return build_constructor (type, v); 298 } 299 300 /* Expand a vector operation to scalars with the freedom to use 301 a scalar integer type, or to use a different size for the items 302 in the vector type. */ 303 static tree 304 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 305 tree a, tree b, 306 enum tree_code code) 307 { 308 tree result, compute_type; 309 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; 310 location_t loc = gimple_location (gsi_stmt (*gsi)); 311 312 /* We have three strategies. If the type is already correct, just do 313 the operation an element at a time. Else, if the vector is wider than 314 one word, do it a word at a time; finally, if the vector is smaller 315 than one word, do it as a scalar. */ 316 if (TYPE_MODE (TREE_TYPE (type)) == word_mode) 317 return expand_vector_piecewise (gsi, f, 318 type, TREE_TYPE (type), 319 a, b, code); 320 else if (n_words > 1) 321 { 322 tree word_type = build_word_mode_vector_type (n_words); 323 result = expand_vector_piecewise (gsi, f, 324 word_type, TREE_TYPE (word_type), 325 a, b, code); 326 result = force_gimple_operand_gsi (gsi, result, true, NULL, true, 327 GSI_SAME_STMT); 328 } 329 else 330 { 331 /* Use a single scalar operation with a mode no wider than word_mode. */ 332 scalar_int_mode mode 333 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require (); 334 compute_type = lang_hooks.types.type_for_mode (mode, 1); 335 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type); 336 warning_at (loc, OPT_Wvector_operation_performance, 337 "vector operation will be expanded with a " 338 "single scalar operation"); 339 } 340 341 return result; 342 } 343 344 /* Expand a vector operation to scalars; for integer types we can use 345 special bit twiddling tricks to do the sums a word at a time, using 346 function F_PARALLEL instead of F. These tricks are done only if 347 they can process at least four items, that is, only if the vector 348 holds at least four items and if a word can hold four items. */ 349 static tree 350 expand_vector_addition (gimple_stmt_iterator *gsi, 351 elem_op_func f, elem_op_func f_parallel, 352 tree type, tree a, tree b, enum tree_code code) 353 { 354 int parts_per_word = UNITS_PER_WORD 355 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 356 357 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 358 && parts_per_word >= 4 359 && nunits_for_known_piecewise_op (type) >= 4) 360 return expand_vector_parallel (gsi, f_parallel, 361 type, a, b, code); 362 else 363 return expand_vector_piecewise (gsi, f, 364 type, TREE_TYPE (type), 365 a, b, code); 366 } 367 368 /* Try to expand vector comparison expression OP0 CODE OP1 by 369 querying optab if the following expression: 370 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> 371 can be expanded. */ 372 static tree 373 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, 374 tree op1, enum tree_code code) 375 { 376 tree t; 377 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) 378 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) 379 t = expand_vector_piecewise (gsi, do_compare, type, 380 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); 381 else 382 t = NULL_TREE; 383 384 return t; 385 } 386 387 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type 388 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding 389 the result if successful, otherwise return NULL_TREE. */ 390 static tree 391 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) 392 { 393 optab op; 394 unsigned int i, nunits = nunits_for_known_piecewise_op (type); 395 bool scalar_shift = true; 396 397 for (i = 1; i < nunits; i++) 398 { 399 if (shiftcnts[i] != shiftcnts[0]) 400 scalar_shift = false; 401 } 402 403 if (scalar_shift && shiftcnts[0] == 0) 404 return op0; 405 406 if (scalar_shift) 407 { 408 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); 409 if (op != unknown_optab 410 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 411 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 412 build_int_cst (NULL_TREE, shiftcnts[0])); 413 } 414 415 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 416 if (op != unknown_optab 417 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 418 { 419 tree_vector_builder vec (type, nunits, 1); 420 for (i = 0; i < nunits; i++) 421 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i])); 422 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ()); 423 } 424 425 return NULL_TREE; 426 } 427 428 /* Try to expand integer vector division by constant using 429 widening multiply, shifts and additions. */ 430 static tree 431 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, 432 tree op1, enum tree_code code) 433 { 434 bool use_pow2 = true; 435 bool has_vector_shift = true; 436 int mode = -1, this_mode; 437 int pre_shift = -1, post_shift; 438 unsigned int nunits = nunits_for_known_piecewise_op (type); 439 int *shifts = XALLOCAVEC (int, nunits * 4); 440 int *pre_shifts = shifts + nunits; 441 int *post_shifts = pre_shifts + nunits; 442 int *shift_temps = post_shifts + nunits; 443 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); 444 int prec = TYPE_PRECISION (TREE_TYPE (type)); 445 int dummy_int; 446 unsigned int i; 447 signop sign_p = TYPE_SIGN (TREE_TYPE (type)); 448 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); 449 tree cur_op, mulcst, tem; 450 optab op; 451 452 if (prec > HOST_BITS_PER_WIDE_INT) 453 return NULL_TREE; 454 455 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 456 if (op == unknown_optab 457 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 458 has_vector_shift = false; 459 460 /* Analysis phase. Determine if all op1 elements are either power 461 of two and it is possible to expand it using shifts (or for remainder 462 using masking). Additionally compute the multiplicative constants 463 and pre and post shifts if the division is to be expanded using 464 widening or high part multiplication plus shifts. */ 465 for (i = 0; i < nunits; i++) 466 { 467 tree cst = VECTOR_CST_ELT (op1, i); 468 unsigned HOST_WIDE_INT ml; 469 470 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) 471 return NULL_TREE; 472 pre_shifts[i] = 0; 473 post_shifts[i] = 0; 474 mulc[i] = 0; 475 if (use_pow2 476 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) 477 use_pow2 = false; 478 if (use_pow2) 479 { 480 shifts[i] = tree_log2 (cst); 481 if (shifts[i] != shifts[0] 482 && code == TRUNC_DIV_EXPR 483 && !has_vector_shift) 484 use_pow2 = false; 485 } 486 if (mode == -2) 487 continue; 488 if (sign_p == UNSIGNED) 489 { 490 unsigned HOST_WIDE_INT mh; 491 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; 492 493 if (d >= (HOST_WIDE_INT_1U << (prec - 1))) 494 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ 495 return NULL_TREE; 496 497 if (d <= 1) 498 { 499 mode = -2; 500 continue; 501 } 502 503 /* Find a suitable multiplier and right shift count 504 instead of multiplying with D. */ 505 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 506 507 /* If the suggested multiplier is more than SIZE bits, we can 508 do better for even divisors, using an initial right shift. */ 509 if ((mh != 0 && (d & 1) == 0) 510 || (!has_vector_shift && pre_shift != -1)) 511 { 512 if (has_vector_shift) 513 pre_shift = ctz_or_zero (d); 514 else if (pre_shift == -1) 515 { 516 unsigned int j; 517 for (j = 0; j < nunits; j++) 518 { 519 tree cst2 = VECTOR_CST_ELT (op1, j); 520 unsigned HOST_WIDE_INT d2; 521 int this_pre_shift; 522 523 if (!tree_fits_uhwi_p (cst2)) 524 return NULL_TREE; 525 d2 = tree_to_uhwi (cst2) & mask; 526 if (d2 == 0) 527 return NULL_TREE; 528 this_pre_shift = floor_log2 (d2 & -d2); 529 if (pre_shift == -1 || this_pre_shift < pre_shift) 530 pre_shift = this_pre_shift; 531 } 532 if (i != 0 && pre_shift != 0) 533 { 534 /* Restart. */ 535 i = -1U; 536 mode = -1; 537 continue; 538 } 539 } 540 if (pre_shift != 0) 541 { 542 if ((d >> pre_shift) <= 1) 543 { 544 mode = -2; 545 continue; 546 } 547 mh = choose_multiplier (d >> pre_shift, prec, 548 prec - pre_shift, 549 &ml, &post_shift, &dummy_int); 550 gcc_assert (!mh); 551 pre_shifts[i] = pre_shift; 552 } 553 } 554 if (!mh) 555 this_mode = 0; 556 else 557 this_mode = 1; 558 } 559 else 560 { 561 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); 562 unsigned HOST_WIDE_INT abs_d; 563 564 if (d == -1) 565 return NULL_TREE; 566 567 /* Since d might be INT_MIN, we have to cast to 568 unsigned HOST_WIDE_INT before negating to avoid 569 undefined signed overflow. */ 570 abs_d = (d >= 0 571 ? (unsigned HOST_WIDE_INT) d 572 : - (unsigned HOST_WIDE_INT) d); 573 574 /* n rem d = n rem -d */ 575 if (code == TRUNC_MOD_EXPR && d < 0) 576 d = abs_d; 577 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1)) 578 { 579 /* This case is not handled correctly below. */ 580 mode = -2; 581 continue; 582 } 583 if (abs_d <= 1) 584 { 585 mode = -2; 586 continue; 587 } 588 589 choose_multiplier (abs_d, prec, prec - 1, &ml, 590 &post_shift, &dummy_int); 591 if (ml >= HOST_WIDE_INT_1U << (prec - 1)) 592 { 593 this_mode = 4 + (d < 0); 594 ml |= HOST_WIDE_INT_M1U << (prec - 1); 595 } 596 else 597 this_mode = 2 + (d < 0); 598 } 599 mulc[i] = ml; 600 post_shifts[i] = post_shift; 601 if ((i && !has_vector_shift && post_shifts[0] != post_shift) 602 || post_shift >= prec 603 || pre_shifts[i] >= prec) 604 this_mode = -2; 605 606 if (i == 0) 607 mode = this_mode; 608 else if (mode != this_mode) 609 mode = -2; 610 } 611 612 if (use_pow2) 613 { 614 tree addend = NULL_TREE; 615 if (sign_p == SIGNED) 616 { 617 tree uns_type; 618 619 /* Both division and remainder sequences need 620 op0 < 0 ? mask : 0 computed. It can be either computed as 621 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) 622 if none of the shifts is 0, or as the conditional. */ 623 for (i = 0; i < nunits; i++) 624 if (shifts[i] == 0) 625 break; 626 uns_type 627 = build_vector_type (build_nonstandard_integer_type (prec, 1), 628 nunits); 629 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) 630 { 631 for (i = 0; i < nunits; i++) 632 shift_temps[i] = prec - 1; 633 cur_op = add_rshift (gsi, type, op0, shift_temps); 634 if (cur_op != NULL_TREE) 635 { 636 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 637 uns_type, cur_op); 638 for (i = 0; i < nunits; i++) 639 shift_temps[i] = prec - shifts[i]; 640 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); 641 if (cur_op != NULL_TREE) 642 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 643 type, cur_op); 644 } 645 } 646 if (addend == NULL_TREE 647 && expand_vec_cond_expr_p (type, type, LT_EXPR)) 648 { 649 tree zero, cst, cond, mask_type; 650 gimple *stmt; 651 652 mask_type = build_same_sized_truth_vector_type (type); 653 zero = build_zero_cst (type); 654 cond = build2 (LT_EXPR, mask_type, op0, zero); 655 tree_vector_builder vec (type, nunits, 1); 656 for (i = 0; i < nunits; i++) 657 vec.quick_push (build_int_cst (TREE_TYPE (type), 658 (HOST_WIDE_INT_1U 659 << shifts[i]) - 1)); 660 cst = vec.build (); 661 addend = make_ssa_name (type); 662 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, 663 cst, zero); 664 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 665 } 666 } 667 if (code == TRUNC_DIV_EXPR) 668 { 669 if (sign_p == UNSIGNED) 670 { 671 /* q = op0 >> shift; */ 672 cur_op = add_rshift (gsi, type, op0, shifts); 673 if (cur_op != NULL_TREE) 674 return cur_op; 675 } 676 else if (addend != NULL_TREE) 677 { 678 /* t1 = op0 + addend; 679 q = t1 >> shift; */ 680 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 681 if (op != unknown_optab 682 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 683 { 684 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); 685 cur_op = add_rshift (gsi, type, cur_op, shifts); 686 if (cur_op != NULL_TREE) 687 return cur_op; 688 } 689 } 690 } 691 else 692 { 693 tree mask; 694 tree_vector_builder vec (type, nunits, 1); 695 for (i = 0; i < nunits; i++) 696 vec.quick_push (build_int_cst (TREE_TYPE (type), 697 (HOST_WIDE_INT_1U 698 << shifts[i]) - 1)); 699 mask = vec.build (); 700 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); 701 if (op != unknown_optab 702 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 703 { 704 if (sign_p == UNSIGNED) 705 /* r = op0 & mask; */ 706 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); 707 else if (addend != NULL_TREE) 708 { 709 /* t1 = op0 + addend; 710 t2 = t1 & mask; 711 r = t2 - addend; */ 712 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 713 if (op != unknown_optab 714 && optab_handler (op, TYPE_MODE (type)) 715 != CODE_FOR_nothing) 716 { 717 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, 718 addend); 719 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, 720 cur_op, mask); 721 op = optab_for_tree_code (MINUS_EXPR, type, 722 optab_default); 723 if (op != unknown_optab 724 && optab_handler (op, TYPE_MODE (type)) 725 != CODE_FOR_nothing) 726 return gimplify_build2 (gsi, MINUS_EXPR, type, 727 cur_op, addend); 728 } 729 } 730 } 731 } 732 } 733 734 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 735 return NULL_TREE; 736 737 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) 738 return NULL_TREE; 739 740 cur_op = op0; 741 742 switch (mode) 743 { 744 case 0: 745 gcc_assert (sign_p == UNSIGNED); 746 /* t1 = oprnd0 >> pre_shift; 747 t2 = t1 h* ml; 748 q = t2 >> post_shift; */ 749 cur_op = add_rshift (gsi, type, cur_op, pre_shifts); 750 if (cur_op == NULL_TREE) 751 return NULL_TREE; 752 break; 753 case 1: 754 gcc_assert (sign_p == UNSIGNED); 755 for (i = 0; i < nunits; i++) 756 { 757 shift_temps[i] = 1; 758 post_shifts[i]--; 759 } 760 break; 761 case 2: 762 case 3: 763 case 4: 764 case 5: 765 gcc_assert (sign_p == SIGNED); 766 for (i = 0; i < nunits; i++) 767 shift_temps[i] = prec - 1; 768 break; 769 default: 770 return NULL_TREE; 771 } 772 773 tree_vector_builder vec (type, nunits, 1); 774 for (i = 0; i < nunits; i++) 775 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i])); 776 mulcst = vec.build (); 777 778 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); 779 780 switch (mode) 781 { 782 case 0: 783 /* t1 = oprnd0 >> pre_shift; 784 t2 = t1 h* ml; 785 q = t2 >> post_shift; */ 786 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 787 break; 788 case 1: 789 /* t1 = oprnd0 h* ml; 790 t2 = oprnd0 - t1; 791 t3 = t2 >> 1; 792 t4 = t1 + t3; 793 q = t4 >> (post_shift - 1); */ 794 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 795 if (op == unknown_optab 796 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 797 return NULL_TREE; 798 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); 799 tem = add_rshift (gsi, type, tem, shift_temps); 800 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 801 if (op == unknown_optab 802 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 803 return NULL_TREE; 804 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); 805 cur_op = add_rshift (gsi, type, tem, post_shifts); 806 if (cur_op == NULL_TREE) 807 return NULL_TREE; 808 break; 809 case 2: 810 case 3: 811 case 4: 812 case 5: 813 /* t1 = oprnd0 h* ml; 814 t2 = t1; [ iff (mode & 2) != 0 ] 815 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] 816 t3 = t2 >> post_shift; 817 t4 = oprnd0 >> (prec - 1); 818 q = t3 - t4; [ iff (mode & 1) == 0 ] 819 q = t4 - t3; [ iff (mode & 1) != 0 ] */ 820 if ((mode & 2) == 0) 821 { 822 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 823 if (op == unknown_optab 824 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 825 return NULL_TREE; 826 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); 827 } 828 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 829 if (cur_op == NULL_TREE) 830 return NULL_TREE; 831 tem = add_rshift (gsi, type, op0, shift_temps); 832 if (tem == NULL_TREE) 833 return NULL_TREE; 834 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 835 if (op == unknown_optab 836 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 837 return NULL_TREE; 838 if ((mode & 1) == 0) 839 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); 840 else 841 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); 842 break; 843 default: 844 gcc_unreachable (); 845 } 846 847 if (code == TRUNC_DIV_EXPR) 848 return cur_op; 849 850 /* We divided. Now finish by: 851 t1 = q * oprnd1; 852 r = oprnd0 - t1; */ 853 op = optab_for_tree_code (MULT_EXPR, type, optab_default); 854 if (op == unknown_optab 855 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 856 return NULL_TREE; 857 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); 858 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 859 if (op == unknown_optab 860 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 861 return NULL_TREE; 862 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); 863 } 864 865 /* Expand a vector condition to scalars, by using many conditions 866 on the vector's elements. */ 867 static void 868 expand_vector_condition (gimple_stmt_iterator *gsi) 869 { 870 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 871 tree type = gimple_expr_type (stmt); 872 tree a = gimple_assign_rhs1 (stmt); 873 tree a1 = a; 874 tree a2 = NULL_TREE; 875 bool a_is_comparison = false; 876 tree b = gimple_assign_rhs2 (stmt); 877 tree c = gimple_assign_rhs3 (stmt); 878 vec<constructor_elt, va_gc> *v; 879 tree constr; 880 tree inner_type = TREE_TYPE (type); 881 tree cond_type = TREE_TYPE (TREE_TYPE (a)); 882 tree comp_inner_type = cond_type; 883 tree width = TYPE_SIZE (inner_type); 884 tree index = bitsize_int (0); 885 tree comp_width = width; 886 tree comp_index = index; 887 int i; 888 location_t loc = gimple_location (gsi_stmt (*gsi)); 889 890 if (!is_gimple_val (a)) 891 { 892 gcc_assert (COMPARISON_CLASS_P (a)); 893 a_is_comparison = true; 894 a1 = TREE_OPERAND (a, 0); 895 a2 = TREE_OPERAND (a, 1); 896 comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); 897 comp_width = TYPE_SIZE (comp_inner_type); 898 } 899 900 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) 901 return; 902 903 /* Handle vector boolean types with bitmasks. If there is a comparison 904 and we can expand the comparison into the vector boolean bitmask, 905 or otherwise if it is compatible with type, we can transform 906 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5; 907 into 908 tmp_6 = x_2 < y_3; 909 tmp_7 = tmp_6 & vbfld_4; 910 tmp_8 = ~tmp_6; 911 tmp_9 = tmp_8 & vbfld_5; 912 vbfld_1 = tmp_7 | tmp_9; 913 Similarly for vbfld_10 instead of x_2 < y_3. */ 914 if (VECTOR_BOOLEAN_TYPE_P (type) 915 && SCALAR_INT_MODE_P (TYPE_MODE (type)) 916 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)), 917 TYPE_VECTOR_SUBPARTS (type) 918 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type)))) 919 && (a_is_comparison 920 ? useless_type_conversion_p (type, TREE_TYPE (a)) 921 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) 922 { 923 if (a_is_comparison) 924 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); 925 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); 926 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); 927 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); 928 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2); 929 gimple_assign_set_rhs_from_tree (gsi, a); 930 update_stmt (gsi_stmt (*gsi)); 931 return; 932 } 933 934 /* TODO: try and find a smaller vector type. */ 935 936 warning_at (loc, OPT_Wvector_operation_performance, 937 "vector condition will be expanded piecewise"); 938 939 int nunits = nunits_for_known_piecewise_op (type); 940 vec_alloc (v, nunits); 941 for (i = 0; i < nunits; i++) 942 { 943 tree aa, result; 944 tree bb = tree_vec_extract (gsi, inner_type, b, width, index); 945 tree cc = tree_vec_extract (gsi, inner_type, c, width, index); 946 if (a_is_comparison) 947 { 948 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, 949 comp_width, comp_index); 950 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, 951 comp_width, comp_index); 952 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); 953 } 954 else 955 aa = tree_vec_extract (gsi, cond_type, a, width, index); 956 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); 957 constructor_elt ce = {NULL_TREE, result}; 958 v->quick_push (ce); 959 index = int_const_binop (PLUS_EXPR, index, width); 960 if (width == comp_width) 961 comp_index = index; 962 else 963 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width); 964 } 965 966 constr = build_constructor (type, v); 967 gimple_assign_set_rhs_from_tree (gsi, constr); 968 update_stmt (gsi_stmt (*gsi)); 969 } 970 971 static tree 972 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 973 gassign *assign, enum tree_code code) 974 { 975 machine_mode compute_mode = TYPE_MODE (compute_type); 976 977 /* If the compute mode is not a vector mode (hence we are not decomposing 978 a BLKmode vector to smaller, hardware-supported vectors), we may want 979 to expand the operations in parallel. */ 980 if (!VECTOR_MODE_P (compute_mode)) 981 switch (code) 982 { 983 case PLUS_EXPR: 984 case MINUS_EXPR: 985 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 986 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 987 gimple_assign_rhs1 (assign), 988 gimple_assign_rhs2 (assign), code); 989 break; 990 991 case NEGATE_EXPR: 992 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 993 return expand_vector_addition (gsi, do_unop, do_negate, type, 994 gimple_assign_rhs1 (assign), 995 NULL_TREE, code); 996 break; 997 998 case BIT_AND_EXPR: 999 case BIT_IOR_EXPR: 1000 case BIT_XOR_EXPR: 1001 return expand_vector_parallel (gsi, do_binop, type, 1002 gimple_assign_rhs1 (assign), 1003 gimple_assign_rhs2 (assign), code); 1004 1005 case BIT_NOT_EXPR: 1006 return expand_vector_parallel (gsi, do_unop, type, 1007 gimple_assign_rhs1 (assign), 1008 NULL_TREE, code); 1009 case EQ_EXPR: 1010 case NE_EXPR: 1011 case GT_EXPR: 1012 case LT_EXPR: 1013 case GE_EXPR: 1014 case LE_EXPR: 1015 case UNEQ_EXPR: 1016 case UNGT_EXPR: 1017 case UNLT_EXPR: 1018 case UNGE_EXPR: 1019 case UNLE_EXPR: 1020 case LTGT_EXPR: 1021 case ORDERED_EXPR: 1022 case UNORDERED_EXPR: 1023 { 1024 tree rhs1 = gimple_assign_rhs1 (assign); 1025 tree rhs2 = gimple_assign_rhs2 (assign); 1026 1027 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); 1028 } 1029 1030 case TRUNC_DIV_EXPR: 1031 case TRUNC_MOD_EXPR: 1032 { 1033 tree rhs1 = gimple_assign_rhs1 (assign); 1034 tree rhs2 = gimple_assign_rhs2 (assign); 1035 tree ret; 1036 1037 if (!optimize 1038 || !VECTOR_INTEGER_TYPE_P (type) 1039 || TREE_CODE (rhs2) != VECTOR_CST 1040 || !VECTOR_MODE_P (TYPE_MODE (type))) 1041 break; 1042 1043 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); 1044 if (ret != NULL_TREE) 1045 return ret; 1046 break; 1047 } 1048 1049 default: 1050 break; 1051 } 1052 1053 if (TREE_CODE_CLASS (code) == tcc_unary) 1054 return expand_vector_piecewise (gsi, do_unop, type, compute_type, 1055 gimple_assign_rhs1 (assign), 1056 NULL_TREE, code); 1057 else 1058 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 1059 gimple_assign_rhs1 (assign), 1060 gimple_assign_rhs2 (assign), code); 1061 } 1062 1063 /* Try to optimize 1064 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; 1065 style stmts into: 1066 _9 = { b_7, b_7, b_7, b_7 }; 1067 a_5 = _9 + { 0, 3, 6, 9 }; 1068 because vector splat operation is usually more efficient 1069 than piecewise initialization of the vector. */ 1070 1071 static void 1072 optimize_vector_constructor (gimple_stmt_iterator *gsi) 1073 { 1074 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1075 tree lhs = gimple_assign_lhs (stmt); 1076 tree rhs = gimple_assign_rhs1 (stmt); 1077 tree type = TREE_TYPE (rhs); 1078 unsigned int i, j; 1079 unsigned HOST_WIDE_INT nelts; 1080 bool all_same = true; 1081 constructor_elt *elt; 1082 gimple *g; 1083 tree base = NULL_TREE; 1084 optab op; 1085 1086 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts) 1087 || nelts <= 2 1088 || CONSTRUCTOR_NELTS (rhs) != nelts) 1089 return; 1090 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 1091 if (op == unknown_optab 1092 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 1093 return; 1094 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) 1095 if (TREE_CODE (elt->value) != SSA_NAME 1096 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) 1097 return; 1098 else 1099 { 1100 tree this_base = elt->value; 1101 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) 1102 all_same = false; 1103 for (j = 0; j < nelts + 1; j++) 1104 { 1105 g = SSA_NAME_DEF_STMT (this_base); 1106 if (is_gimple_assign (g) 1107 && gimple_assign_rhs_code (g) == PLUS_EXPR 1108 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 1109 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME 1110 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) 1111 this_base = gimple_assign_rhs1 (g); 1112 else 1113 break; 1114 } 1115 if (i == 0) 1116 base = this_base; 1117 else if (this_base != base) 1118 return; 1119 } 1120 if (all_same) 1121 return; 1122 tree_vector_builder cst (type, nelts, 1); 1123 for (i = 0; i < nelts; i++) 1124 { 1125 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value; 1126 tree elt = build_zero_cst (TREE_TYPE (base)); 1127 while (this_base != base) 1128 { 1129 g = SSA_NAME_DEF_STMT (this_base); 1130 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base), 1131 elt, gimple_assign_rhs2 (g)); 1132 if (elt == NULL_TREE 1133 || TREE_CODE (elt) != INTEGER_CST 1134 || TREE_OVERFLOW (elt)) 1135 return; 1136 this_base = gimple_assign_rhs1 (g); 1137 } 1138 cst.quick_push (elt); 1139 } 1140 for (i = 0; i < nelts; i++) 1141 CONSTRUCTOR_ELT (rhs, i)->value = base; 1142 g = gimple_build_assign (make_ssa_name (type), rhs); 1143 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1144 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), 1145 cst.build ()); 1146 gsi_replace (gsi, g, false); 1147 } 1148 1149 /* Return a type for the widest vector mode whose components are of type 1150 TYPE, or NULL_TREE if none is found. */ 1151 1152 static tree 1153 type_for_widest_vector_mode (tree type, optab op) 1154 { 1155 machine_mode inner_mode = TYPE_MODE (type); 1156 machine_mode best_mode = VOIDmode, mode; 1157 poly_int64 best_nunits = 0; 1158 1159 if (SCALAR_FLOAT_MODE_P (inner_mode)) 1160 mode = MIN_MODE_VECTOR_FLOAT; 1161 else if (SCALAR_FRACT_MODE_P (inner_mode)) 1162 mode = MIN_MODE_VECTOR_FRACT; 1163 else if (SCALAR_UFRACT_MODE_P (inner_mode)) 1164 mode = MIN_MODE_VECTOR_UFRACT; 1165 else if (SCALAR_ACCUM_MODE_P (inner_mode)) 1166 mode = MIN_MODE_VECTOR_ACCUM; 1167 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 1168 mode = MIN_MODE_VECTOR_UACCUM; 1169 else if (inner_mode == BImode) 1170 mode = MIN_MODE_VECTOR_BOOL; 1171 else 1172 mode = MIN_MODE_VECTOR_INT; 1173 1174 FOR_EACH_MODE_FROM (mode, mode) 1175 if (GET_MODE_INNER (mode) == inner_mode 1176 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits) 1177 && optab_handler (op, mode) != CODE_FOR_nothing) 1178 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 1179 1180 if (best_mode == VOIDmode) 1181 return NULL_TREE; 1182 else 1183 return build_vector_type_for_mode (type, best_mode); 1184 } 1185 1186 1187 /* Build a reference to the element of the vector VECT. Function 1188 returns either the element itself, either BIT_FIELD_REF, or an 1189 ARRAY_REF expression. 1190 1191 GSI is required to insert temporary variables while building a 1192 refernece to the element of the vector VECT. 1193 1194 PTMPVEC is a pointer to the temporary variable for caching 1195 purposes. In case when PTMPVEC is NULL new temporary variable 1196 will be created. */ 1197 static tree 1198 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) 1199 { 1200 tree vect_type, vect_elt_type; 1201 gimple *asgn; 1202 tree tmpvec; 1203 tree arraytype; 1204 bool need_asgn = true; 1205 unsigned int elements; 1206 1207 vect_type = TREE_TYPE (vect); 1208 vect_elt_type = TREE_TYPE (vect_type); 1209 elements = nunits_for_known_piecewise_op (vect_type); 1210 1211 if (TREE_CODE (idx) == INTEGER_CST) 1212 { 1213 unsigned HOST_WIDE_INT index; 1214 1215 /* Given that we're about to compute a binary modulus, 1216 we don't care about the high bits of the value. */ 1217 index = TREE_INT_CST_LOW (idx); 1218 if (!tree_fits_uhwi_p (idx) || index >= elements) 1219 { 1220 index &= elements - 1; 1221 idx = build_int_cst (TREE_TYPE (idx), index); 1222 } 1223 1224 /* When lowering a vector statement sequence do some easy 1225 simplification by looking through intermediate vector results. */ 1226 if (TREE_CODE (vect) == SSA_NAME) 1227 { 1228 gimple *def_stmt = SSA_NAME_DEF_STMT (vect); 1229 if (is_gimple_assign (def_stmt) 1230 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 1231 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) 1232 vect = gimple_assign_rhs1 (def_stmt); 1233 } 1234 1235 if (TREE_CODE (vect) == VECTOR_CST) 1236 return VECTOR_CST_ELT (vect, index); 1237 else if (TREE_CODE (vect) == CONSTRUCTOR 1238 && (CONSTRUCTOR_NELTS (vect) == 0 1239 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) 1240 != VECTOR_TYPE)) 1241 { 1242 if (index < CONSTRUCTOR_NELTS (vect)) 1243 return CONSTRUCTOR_ELT (vect, index)->value; 1244 return build_zero_cst (vect_elt_type); 1245 } 1246 else 1247 { 1248 tree size = TYPE_SIZE (vect_elt_type); 1249 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), 1250 size); 1251 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); 1252 } 1253 } 1254 1255 if (!ptmpvec) 1256 tmpvec = create_tmp_var (vect_type, "vectmp"); 1257 else if (!*ptmpvec) 1258 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); 1259 else 1260 { 1261 tmpvec = *ptmpvec; 1262 need_asgn = false; 1263 } 1264 1265 if (need_asgn) 1266 { 1267 TREE_ADDRESSABLE (tmpvec) = 1; 1268 asgn = gimple_build_assign (tmpvec, vect); 1269 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); 1270 } 1271 1272 arraytype = build_array_type_nelts (vect_elt_type, elements); 1273 return build4 (ARRAY_REF, vect_elt_type, 1274 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), 1275 idx, NULL_TREE, NULL_TREE); 1276 } 1277 1278 /* Check if VEC_PERM_EXPR within the given setting is supported 1279 by hardware, or lower it piecewise. 1280 1281 When VEC_PERM_EXPR has the same first and second operands: 1282 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be 1283 {v0[mask[0]], v0[mask[1]], ...} 1284 MASK and V0 must have the same number of elements. 1285 1286 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to 1287 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} 1288 V0 and V1 must have the same type. MASK, V0, V1 must have the 1289 same number of arguments. */ 1290 1291 static void 1292 lower_vec_perm (gimple_stmt_iterator *gsi) 1293 { 1294 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1295 tree mask = gimple_assign_rhs3 (stmt); 1296 tree vec0 = gimple_assign_rhs1 (stmt); 1297 tree vec1 = gimple_assign_rhs2 (stmt); 1298 tree vect_type = TREE_TYPE (vec0); 1299 tree mask_type = TREE_TYPE (mask); 1300 tree vect_elt_type = TREE_TYPE (vect_type); 1301 tree mask_elt_type = TREE_TYPE (mask_type); 1302 unsigned HOST_WIDE_INT elements; 1303 vec<constructor_elt, va_gc> *v; 1304 tree constr, t, si, i_val; 1305 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; 1306 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); 1307 location_t loc = gimple_location (gsi_stmt (*gsi)); 1308 unsigned i; 1309 1310 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements)) 1311 return; 1312 1313 if (TREE_CODE (mask) == SSA_NAME) 1314 { 1315 gimple *def_stmt = SSA_NAME_DEF_STMT (mask); 1316 if (is_gimple_assign (def_stmt) 1317 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) 1318 mask = gimple_assign_rhs1 (def_stmt); 1319 } 1320 1321 vec_perm_builder sel_int; 1322 1323 if (TREE_CODE (mask) == VECTOR_CST 1324 && tree_to_vec_perm_builder (&sel_int, mask)) 1325 { 1326 vec_perm_indices indices (sel_int, 2, elements); 1327 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices)) 1328 { 1329 gimple_assign_set_rhs3 (stmt, mask); 1330 update_stmt (stmt); 1331 return; 1332 } 1333 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero 1334 vector as VEC1 and a right element shift MASK. */ 1335 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type)) 1336 != CODE_FOR_nothing 1337 && TREE_CODE (vec1) == VECTOR_CST 1338 && initializer_zerop (vec1) 1339 && maybe_ne (indices[0], 0) 1340 && known_lt (poly_uint64 (indices[0]), elements)) 1341 { 1342 bool ok_p = indices.series_p (0, 1, indices[0], 1); 1343 if (!ok_p) 1344 { 1345 for (i = 1; i < elements; ++i) 1346 { 1347 poly_uint64 actual = indices[i]; 1348 poly_uint64 expected = i + indices[0]; 1349 /* Indices into the second vector are all equivalent. */ 1350 if (maybe_lt (actual, elements) 1351 ? maybe_ne (actual, expected) 1352 : maybe_lt (expected, elements)) 1353 break; 1354 } 1355 ok_p = i == elements; 1356 } 1357 if (ok_p) 1358 { 1359 gimple_assign_set_rhs3 (stmt, mask); 1360 update_stmt (stmt); 1361 return; 1362 } 1363 } 1364 } 1365 else if (can_vec_perm_var_p (TYPE_MODE (vect_type))) 1366 return; 1367 1368 warning_at (loc, OPT_Wvector_operation_performance, 1369 "vector shuffling operation will be expanded piecewise"); 1370 1371 vec_alloc (v, elements); 1372 for (i = 0; i < elements; i++) 1373 { 1374 si = size_int (i); 1375 i_val = vector_element (gsi, mask, si, &masktmp); 1376 1377 if (TREE_CODE (i_val) == INTEGER_CST) 1378 { 1379 unsigned HOST_WIDE_INT index; 1380 1381 index = TREE_INT_CST_LOW (i_val); 1382 if (!tree_fits_uhwi_p (i_val) || index >= elements) 1383 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); 1384 1385 if (two_operand_p && (index & elements) != 0) 1386 t = vector_element (gsi, vec1, i_val, &vec1tmp); 1387 else 1388 t = vector_element (gsi, vec0, i_val, &vec0tmp); 1389 1390 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 1391 true, GSI_SAME_STMT); 1392 } 1393 else 1394 { 1395 tree cond = NULL_TREE, v0_val; 1396 1397 if (two_operand_p) 1398 { 1399 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1400 build_int_cst (mask_elt_type, elements)); 1401 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1402 true, GSI_SAME_STMT); 1403 } 1404 1405 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1406 build_int_cst (mask_elt_type, elements - 1)); 1407 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, 1408 true, GSI_SAME_STMT); 1409 1410 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); 1411 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, 1412 true, GSI_SAME_STMT); 1413 1414 if (two_operand_p) 1415 { 1416 tree v1_val; 1417 1418 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); 1419 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, 1420 true, GSI_SAME_STMT); 1421 1422 cond = fold_build2 (EQ_EXPR, boolean_type_node, 1423 cond, build_zero_cst (mask_elt_type)); 1424 cond = fold_build3 (COND_EXPR, vect_elt_type, 1425 cond, v0_val, v1_val); 1426 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1427 true, GSI_SAME_STMT); 1428 } 1429 else 1430 t = v0_val; 1431 } 1432 1433 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); 1434 } 1435 1436 constr = build_constructor (vect_type, v); 1437 gimple_assign_set_rhs_from_tree (gsi, constr); 1438 update_stmt (gsi_stmt (*gsi)); 1439 } 1440 1441 /* If OP is a uniform vector return the element it is a splat from. */ 1442 1443 static tree 1444 ssa_uniform_vector_p (tree op) 1445 { 1446 if (TREE_CODE (op) == VECTOR_CST 1447 || TREE_CODE (op) == VEC_DUPLICATE_EXPR 1448 || TREE_CODE (op) == CONSTRUCTOR) 1449 return uniform_vector_p (op); 1450 if (TREE_CODE (op) == SSA_NAME) 1451 { 1452 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 1453 if (gimple_assign_single_p (def_stmt)) 1454 return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); 1455 } 1456 return NULL_TREE; 1457 } 1458 1459 /* Return type in which CODE operation with optab OP can be 1460 computed. */ 1461 1462 static tree 1463 get_compute_type (enum tree_code code, optab op, tree type) 1464 { 1465 /* For very wide vectors, try using a smaller vector mode. */ 1466 tree compute_type = type; 1467 if (op 1468 && (!VECTOR_MODE_P (TYPE_MODE (type)) 1469 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) 1470 { 1471 tree vector_compute_type 1472 = type_for_widest_vector_mode (TREE_TYPE (type), op); 1473 if (vector_compute_type != NULL_TREE 1474 && subparts_gt (compute_type, vector_compute_type) 1475 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U) 1476 && (optab_handler (op, TYPE_MODE (vector_compute_type)) 1477 != CODE_FOR_nothing)) 1478 compute_type = vector_compute_type; 1479 } 1480 1481 /* If we are breaking a BLKmode vector into smaller pieces, 1482 type_for_widest_vector_mode has already looked into the optab, 1483 so skip these checks. */ 1484 if (compute_type == type) 1485 { 1486 machine_mode compute_mode = TYPE_MODE (compute_type); 1487 if (VECTOR_MODE_P (compute_mode)) 1488 { 1489 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) 1490 return compute_type; 1491 if (code == MULT_HIGHPART_EXPR 1492 && can_mult_highpart_p (compute_mode, 1493 TYPE_UNSIGNED (compute_type))) 1494 return compute_type; 1495 } 1496 /* There is no operation in hardware, so fall back to scalars. */ 1497 compute_type = TREE_TYPE (type); 1498 } 1499 1500 return compute_type; 1501 } 1502 1503 static tree 1504 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 1505 tree bitpos, tree bitsize, enum tree_code code, 1506 tree type ATTRIBUTE_UNUSED) 1507 { 1508 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 1509 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 1510 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 1511 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 1512 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); 1513 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b); 1514 } 1515 1516 /* Expand a vector COND_EXPR to scalars, piecewise. */ 1517 static void 1518 expand_vector_scalar_condition (gimple_stmt_iterator *gsi) 1519 { 1520 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1521 tree type = gimple_expr_type (stmt); 1522 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); 1523 machine_mode compute_mode = TYPE_MODE (compute_type); 1524 gcc_assert (compute_mode != BLKmode); 1525 tree lhs = gimple_assign_lhs (stmt); 1526 tree rhs2 = gimple_assign_rhs2 (stmt); 1527 tree rhs3 = gimple_assign_rhs3 (stmt); 1528 tree new_rhs; 1529 1530 /* If the compute mode is not a vector mode (hence we are not decomposing 1531 a BLKmode vector to smaller, hardware-supported vectors), we may want 1532 to expand the operations in parallel. */ 1533 if (!VECTOR_MODE_P (compute_mode)) 1534 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, 1535 COND_EXPR); 1536 else 1537 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, 1538 rhs2, rhs3, COND_EXPR); 1539 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1540 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1541 new_rhs); 1542 1543 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1544 way to do it is change expand_vector_operation and its callees to 1545 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1546 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1547 update_stmt (gsi_stmt (*gsi)); 1548 } 1549 1550 /* Process one statement. If we identify a vector operation, expand it. */ 1551 1552 static void 1553 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 1554 { 1555 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; 1556 enum tree_code code; 1557 optab op = unknown_optab; 1558 enum gimple_rhs_class rhs_class; 1559 tree new_rhs; 1560 1561 /* Only consider code == GIMPLE_ASSIGN. */ 1562 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); 1563 if (!stmt) 1564 return; 1565 1566 code = gimple_assign_rhs_code (stmt); 1567 rhs_class = get_gimple_rhs_class (code); 1568 lhs = gimple_assign_lhs (stmt); 1569 1570 if (code == VEC_PERM_EXPR) 1571 { 1572 lower_vec_perm (gsi); 1573 return; 1574 } 1575 1576 if (code == VEC_COND_EXPR) 1577 { 1578 expand_vector_condition (gsi); 1579 return; 1580 } 1581 1582 if (code == COND_EXPR 1583 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE 1584 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) 1585 { 1586 expand_vector_scalar_condition (gsi); 1587 return; 1588 } 1589 1590 if (code == CONSTRUCTOR 1591 && TREE_CODE (lhs) == SSA_NAME 1592 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) 1593 && !gimple_clobber_p (stmt) 1594 && optimize) 1595 { 1596 optimize_vector_constructor (gsi); 1597 return; 1598 } 1599 1600 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 1601 return; 1602 1603 rhs1 = gimple_assign_rhs1 (stmt); 1604 type = gimple_expr_type (stmt); 1605 if (rhs_class == GIMPLE_BINARY_RHS) 1606 rhs2 = gimple_assign_rhs2 (stmt); 1607 1608 if (!VECTOR_TYPE_P (type) 1609 || !VECTOR_TYPE_P (TREE_TYPE (rhs1))) 1610 return; 1611 1612 /* A scalar operation pretending to be a vector one. */ 1613 if (VECTOR_BOOLEAN_TYPE_P (type) 1614 && !VECTOR_MODE_P (TYPE_MODE (type)) 1615 && TYPE_MODE (type) != BLKmode) 1616 return; 1617 1618 /* If the vector operation is operating on all same vector elements 1619 implement it with a scalar operation and a splat if the target 1620 supports the scalar operation. */ 1621 tree srhs1, srhs2 = NULL_TREE; 1622 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE 1623 && (rhs2 == NULL_TREE 1624 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2)) 1625 && (srhs2 = rhs2)) 1626 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1627 /* As we query direct optabs restrict to non-convert operations. */ 1628 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) 1629 { 1630 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); 1631 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB 1632 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) 1633 { 1634 tree slhs = make_ssa_name (TREE_TYPE (srhs1)); 1635 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); 1636 gsi_insert_before (gsi, repl, GSI_SAME_STMT); 1637 gimple_assign_set_rhs_from_tree (gsi, 1638 build_vector_from_val (type, slhs)); 1639 update_stmt (stmt); 1640 return; 1641 } 1642 } 1643 1644 if (CONVERT_EXPR_CODE_P (code) 1645 || code == FLOAT_EXPR 1646 || code == FIX_TRUNC_EXPR 1647 || code == VIEW_CONVERT_EXPR) 1648 return; 1649 1650 /* The signedness is determined from input argument. */ 1651 if (code == VEC_UNPACK_FLOAT_HI_EXPR 1652 || code == VEC_UNPACK_FLOAT_LO_EXPR) 1653 { 1654 type = TREE_TYPE (rhs1); 1655 /* We do not know how to scalarize those. */ 1656 return; 1657 } 1658 1659 /* For widening/narrowing vector operations, the relevant type is of the 1660 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is 1661 calculated in the same way above. */ 1662 if (code == WIDEN_SUM_EXPR 1663 || code == VEC_WIDEN_MULT_HI_EXPR 1664 || code == VEC_WIDEN_MULT_LO_EXPR 1665 || code == VEC_WIDEN_MULT_EVEN_EXPR 1666 || code == VEC_WIDEN_MULT_ODD_EXPR 1667 || code == VEC_UNPACK_HI_EXPR 1668 || code == VEC_UNPACK_LO_EXPR 1669 || code == VEC_PACK_TRUNC_EXPR 1670 || code == VEC_PACK_SAT_EXPR 1671 || code == VEC_PACK_FIX_TRUNC_EXPR 1672 || code == VEC_WIDEN_LSHIFT_HI_EXPR 1673 || code == VEC_WIDEN_LSHIFT_LO_EXPR) 1674 { 1675 type = TREE_TYPE (rhs1); 1676 /* We do not know how to scalarize those. */ 1677 return; 1678 } 1679 1680 /* Choose between vector shift/rotate by vector and vector shift/rotate by 1681 scalar */ 1682 if (code == LSHIFT_EXPR 1683 || code == RSHIFT_EXPR 1684 || code == LROTATE_EXPR 1685 || code == RROTATE_EXPR) 1686 { 1687 optab opv; 1688 1689 /* Check whether we have vector <op> {x,x,x,x} where x 1690 could be a scalar variable or a constant. Transform 1691 vector <op> {x,x,x,x} ==> vector <op> scalar. */ 1692 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1693 { 1694 tree first; 1695 1696 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1697 { 1698 gimple_assign_set_rhs2 (stmt, first); 1699 update_stmt (stmt); 1700 rhs2 = first; 1701 } 1702 } 1703 1704 opv = optab_for_tree_code (code, type, optab_vector); 1705 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1706 op = opv; 1707 else 1708 { 1709 op = optab_for_tree_code (code, type, optab_scalar); 1710 1711 compute_type = get_compute_type (code, op, type); 1712 if (compute_type == type) 1713 return; 1714 /* The rtl expander will expand vector/scalar as vector/vector 1715 if necessary. Pick one with wider vector type. */ 1716 tree compute_vtype = get_compute_type (code, opv, type); 1717 if (subparts_gt (compute_vtype, compute_type)) 1718 { 1719 compute_type = compute_vtype; 1720 op = opv; 1721 } 1722 } 1723 1724 if (code == LROTATE_EXPR || code == RROTATE_EXPR) 1725 { 1726 if (compute_type == NULL_TREE) 1727 compute_type = get_compute_type (code, op, type); 1728 if (compute_type == type) 1729 return; 1730 /* Before splitting vector rotates into scalar rotates, 1731 see if we can't use vector shifts and BIT_IOR_EXPR 1732 instead. For vector by vector rotates we'd also 1733 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there 1734 for now, fold doesn't seem to create such rotates anyway. */ 1735 if (compute_type == TREE_TYPE (type) 1736 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1737 { 1738 optab oplv = vashl_optab, opl = ashl_optab; 1739 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; 1740 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); 1741 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); 1742 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); 1743 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); 1744 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); 1745 /* The rtl expander will expand vector/scalar as vector/vector 1746 if necessary. Pick one with wider vector type. */ 1747 if (subparts_gt (compute_lvtype, compute_ltype)) 1748 { 1749 compute_ltype = compute_lvtype; 1750 opl = oplv; 1751 } 1752 if (subparts_gt (compute_rvtype, compute_rtype)) 1753 { 1754 compute_rtype = compute_rvtype; 1755 opr = oprv; 1756 } 1757 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and 1758 BIT_IOR_EXPR. */ 1759 compute_type = compute_ltype; 1760 if (subparts_gt (compute_type, compute_rtype)) 1761 compute_type = compute_rtype; 1762 if (subparts_gt (compute_type, compute_otype)) 1763 compute_type = compute_otype; 1764 /* Verify all 3 operations can be performed in that type. */ 1765 if (compute_type != TREE_TYPE (type)) 1766 { 1767 if (optab_handler (opl, TYPE_MODE (compute_type)) 1768 == CODE_FOR_nothing 1769 || optab_handler (opr, TYPE_MODE (compute_type)) 1770 == CODE_FOR_nothing 1771 || optab_handler (opo, TYPE_MODE (compute_type)) 1772 == CODE_FOR_nothing) 1773 compute_type = TREE_TYPE (type); 1774 } 1775 } 1776 } 1777 } 1778 else 1779 op = optab_for_tree_code (code, type, optab_default); 1780 1781 /* Optabs will try converting a negation into a subtraction, so 1782 look for it as well. TODO: negation of floating-point vectors 1783 might be turned into an exclusive OR toggling the sign bit. */ 1784 if (op == unknown_optab 1785 && code == NEGATE_EXPR 1786 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 1787 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 1788 1789 if (compute_type == NULL_TREE) 1790 compute_type = get_compute_type (code, op, type); 1791 if (compute_type == type) 1792 return; 1793 1794 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 1795 1796 /* Leave expression untouched for later expansion. */ 1797 if (new_rhs == NULL_TREE) 1798 return; 1799 1800 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1801 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1802 new_rhs); 1803 1804 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1805 way to do it is change expand_vector_operation and its callees to 1806 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1807 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1808 update_stmt (gsi_stmt (*gsi)); 1809 } 1810 1811 /* Use this to lower vector operations introduced by the vectorizer, 1812 if it may need the bit-twiddling tricks implemented in this file. */ 1813 1814 static unsigned int 1815 expand_vector_operations (void) 1816 { 1817 gimple_stmt_iterator gsi; 1818 basic_block bb; 1819 bool cfg_changed = false; 1820 1821 FOR_EACH_BB_FN (bb, cfun) 1822 { 1823 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1824 { 1825 expand_vector_operations_1 (&gsi); 1826 /* ??? If we do not cleanup EH then we will ICE in 1827 verification. But in reality we have created wrong-code 1828 as we did not properly transition EH info and edges to 1829 the piecewise computations. */ 1830 if (maybe_clean_eh_stmt (gsi_stmt (gsi)) 1831 && gimple_purge_dead_eh_edges (bb)) 1832 cfg_changed = true; 1833 } 1834 } 1835 1836 return cfg_changed ? TODO_cleanup_cfg : 0; 1837 } 1838 1839 namespace { 1840 1841 const pass_data pass_data_lower_vector = 1842 { 1843 GIMPLE_PASS, /* type */ 1844 "veclower", /* name */ 1845 OPTGROUP_VEC, /* optinfo_flags */ 1846 TV_NONE, /* tv_id */ 1847 PROP_cfg, /* properties_required */ 1848 PROP_gimple_lvec, /* properties_provided */ 1849 0, /* properties_destroyed */ 1850 0, /* todo_flags_start */ 1851 TODO_update_ssa, /* todo_flags_finish */ 1852 }; 1853 1854 class pass_lower_vector : public gimple_opt_pass 1855 { 1856 public: 1857 pass_lower_vector (gcc::context *ctxt) 1858 : gimple_opt_pass (pass_data_lower_vector, ctxt) 1859 {} 1860 1861 /* opt_pass methods: */ 1862 virtual bool gate (function *fun) 1863 { 1864 return !(fun->curr_properties & PROP_gimple_lvec); 1865 } 1866 1867 virtual unsigned int execute (function *) 1868 { 1869 return expand_vector_operations (); 1870 } 1871 1872 }; // class pass_lower_vector 1873 1874 } // anon namespace 1875 1876 gimple_opt_pass * 1877 make_pass_lower_vector (gcc::context *ctxt) 1878 { 1879 return new pass_lower_vector (ctxt); 1880 } 1881 1882 namespace { 1883 1884 const pass_data pass_data_lower_vector_ssa = 1885 { 1886 GIMPLE_PASS, /* type */ 1887 "veclower2", /* name */ 1888 OPTGROUP_VEC, /* optinfo_flags */ 1889 TV_NONE, /* tv_id */ 1890 PROP_cfg, /* properties_required */ 1891 PROP_gimple_lvec, /* properties_provided */ 1892 0, /* properties_destroyed */ 1893 0, /* todo_flags_start */ 1894 ( TODO_update_ssa 1895 | TODO_cleanup_cfg ), /* todo_flags_finish */ 1896 }; 1897 1898 class pass_lower_vector_ssa : public gimple_opt_pass 1899 { 1900 public: 1901 pass_lower_vector_ssa (gcc::context *ctxt) 1902 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) 1903 {} 1904 1905 /* opt_pass methods: */ 1906 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } 1907 virtual unsigned int execute (function *) 1908 { 1909 return expand_vector_operations (); 1910 } 1911 1912 }; // class pass_lower_vector_ssa 1913 1914 } // anon namespace 1915 1916 gimple_opt_pass * 1917 make_pass_lower_vector_ssa (gcc::context *ctxt) 1918 { 1919 return new pass_lower_vector_ssa (ctxt); 1920 } 1921 1922 #include "gt-tree-vect-generic.h" 1923