1 /* Lower vector operations to scalar operations. 2 Copyright (C) 2004-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "rtl.h" 25 #include "tree.h" 26 #include "gimple.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "expmed.h" 30 #include "optabs-tree.h" 31 #include "diagnostic.h" 32 #include "fold-const.h" 33 #include "stor-layout.h" 34 #include "langhooks.h" 35 #include "tree-eh.h" 36 #include "gimple-iterator.h" 37 #include "gimplify-me.h" 38 #include "gimplify.h" 39 #include "tree-cfg.h" 40 #include "tree-vector-builder.h" 41 #include "vec-perm-indices.h" 42 43 44 static void expand_vector_operations_1 (gimple_stmt_iterator *); 45 46 /* Return the number of elements in a vector type TYPE that we have 47 already decided needs to be expanded piecewise. We don't support 48 this kind of expansion for variable-length vectors, since we should 49 always check for target support before introducing uses of those. */ 50 static unsigned int 51 nunits_for_known_piecewise_op (const_tree type) 52 { 53 return TYPE_VECTOR_SUBPARTS (type).to_constant (); 54 } 55 56 /* Return true if TYPE1 has more elements than TYPE2, where either 57 type may be a vector or a scalar. */ 58 59 static inline bool 60 subparts_gt (tree type1, tree type2) 61 { 62 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1; 63 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1; 64 return known_gt (n1, n2); 65 } 66 67 /* Build a constant of type TYPE, made of VALUE's bits replicated 68 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 69 static tree 70 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 71 { 72 int width = tree_to_uhwi (TYPE_SIZE (inner_type)); 73 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) 74 / HOST_BITS_PER_WIDE_INT; 75 unsigned HOST_WIDE_INT low, mask; 76 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; 77 int i; 78 79 gcc_assert (n && n <= WIDE_INT_MAX_ELTS); 80 81 if (width == HOST_BITS_PER_WIDE_INT) 82 low = value; 83 else 84 { 85 mask = ((HOST_WIDE_INT)1 << width) - 1; 86 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 87 } 88 89 for (i = 0; i < n; i++) 90 a[i] = low; 91 92 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); 93 return wide_int_to_tree 94 (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); 95 } 96 97 static GTY(()) tree vector_inner_type; 98 static GTY(()) tree vector_last_type; 99 static GTY(()) int vector_last_nunits; 100 101 /* Return a suitable vector types made of SUBPARTS units each of mode 102 "word_mode" (the global variable). */ 103 static tree 104 build_word_mode_vector_type (int nunits) 105 { 106 if (!vector_inner_type) 107 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); 108 else if (vector_last_nunits == nunits) 109 { 110 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); 111 return vector_last_type; 112 } 113 114 /* We build a new type, but we canonicalize it nevertheless, 115 because it still saves some memory. */ 116 vector_last_nunits = nunits; 117 vector_last_type = type_hash_canon (nunits, 118 build_vector_type (vector_inner_type, 119 nunits)); 120 return vector_last_type; 121 } 122 123 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 124 tree, tree, tree, tree, tree, enum tree_code, 125 tree); 126 127 static inline tree 128 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 129 tree t, tree bitsize, tree bitpos) 130 { 131 if (TREE_CODE (t) == SSA_NAME) 132 { 133 gimple *def_stmt = SSA_NAME_DEF_STMT (t); 134 if (is_gimple_assign (def_stmt) 135 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 136 || (bitpos 137 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))) 138 t = gimple_assign_rhs1 (def_stmt); 139 } 140 if (bitpos) 141 { 142 if (TREE_CODE (type) == BOOLEAN_TYPE) 143 { 144 tree itype 145 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0); 146 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t, 147 bitsize, bitpos); 148 return gimplify_build2 (gsi, NE_EXPR, type, field, 149 build_zero_cst (itype)); 150 } 151 else 152 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 153 } 154 else 155 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 156 } 157 158 static tree 159 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 160 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 161 enum tree_code code, tree type ATTRIBUTE_UNUSED) 162 { 163 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 164 return gimplify_build1 (gsi, code, inner_type, a); 165 } 166 167 static tree 168 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 169 tree bitpos, tree bitsize, enum tree_code code, 170 tree type ATTRIBUTE_UNUSED) 171 { 172 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 173 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 174 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 175 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 176 return gimplify_build2 (gsi, code, inner_type, a, b); 177 } 178 179 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 180 181 INNER_TYPE is the type of A and B elements 182 183 returned expression is of signed integer type with the 184 size equal to the size of INNER_TYPE. */ 185 static tree 186 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 187 tree bitpos, tree bitsize, enum tree_code code, tree type) 188 { 189 tree stype = TREE_TYPE (type); 190 tree cst_false = build_zero_cst (stype); 191 tree cst_true = build_all_ones_cst (stype); 192 tree cmp; 193 194 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 195 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 196 197 cmp = build2 (code, boolean_type_node, a, b); 198 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false); 199 } 200 201 /* Expand vector addition to scalars. This does bit twiddling 202 in order to increase parallelism: 203 204 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ 205 (a ^ b) & 0x80808080 206 207 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ 208 (a ^ ~b) & 0x80808080 209 210 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) 211 212 This optimization should be done only if 4 vector items or more 213 fit into a word. */ 214 static tree 215 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 216 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 217 enum tree_code code, tree type ATTRIBUTE_UNUSED) 218 { 219 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 220 unsigned HOST_WIDE_INT max; 221 tree low_bits, high_bits, a_low, b_low, result_low, signs; 222 223 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 224 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 225 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 226 227 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); 228 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 229 230 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); 231 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 232 if (code == PLUS_EXPR) 233 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); 234 else 235 { 236 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); 237 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); 238 } 239 240 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 241 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); 242 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 243 } 244 245 static tree 246 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 247 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 248 tree bitsize ATTRIBUTE_UNUSED, 249 enum tree_code code ATTRIBUTE_UNUSED, 250 tree type ATTRIBUTE_UNUSED) 251 { 252 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 253 HOST_WIDE_INT max; 254 tree low_bits, high_bits, b_low, result_low, signs; 255 256 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 257 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 258 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 259 260 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 261 262 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 263 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); 264 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 265 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); 266 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 267 } 268 269 /* Expand a vector operation to scalars, by using many operations 270 whose type is the vector type's inner type. */ 271 static tree 272 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 273 tree type, tree inner_type, 274 tree a, tree b, enum tree_code code) 275 { 276 vec<constructor_elt, va_gc> *v; 277 tree part_width = TYPE_SIZE (inner_type); 278 tree index = bitsize_int (0); 279 int nunits = nunits_for_known_piecewise_op (type); 280 int delta = tree_to_uhwi (part_width) 281 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); 282 int i; 283 location_t loc = gimple_location (gsi_stmt (*gsi)); 284 285 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) 286 warning_at (loc, OPT_Wvector_operation_performance, 287 "vector operation will be expanded piecewise"); 288 else 289 warning_at (loc, OPT_Wvector_operation_performance, 290 "vector operation will be expanded in parallel"); 291 292 vec_alloc (v, (nunits + delta - 1) / delta); 293 for (i = 0; i < nunits; 294 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) 295 { 296 tree result = f (gsi, inner_type, a, b, index, part_width, code, type); 297 constructor_elt ce = {NULL_TREE, result}; 298 v->quick_push (ce); 299 } 300 301 return build_constructor (type, v); 302 } 303 304 /* Expand a vector operation to scalars with the freedom to use 305 a scalar integer type, or to use a different size for the items 306 in the vector type. */ 307 static tree 308 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 309 tree a, tree b, 310 enum tree_code code) 311 { 312 tree result, compute_type; 313 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; 314 location_t loc = gimple_location (gsi_stmt (*gsi)); 315 316 /* We have three strategies. If the type is already correct, just do 317 the operation an element at a time. Else, if the vector is wider than 318 one word, do it a word at a time; finally, if the vector is smaller 319 than one word, do it as a scalar. */ 320 if (TYPE_MODE (TREE_TYPE (type)) == word_mode) 321 return expand_vector_piecewise (gsi, f, 322 type, TREE_TYPE (type), 323 a, b, code); 324 else if (n_words > 1) 325 { 326 tree word_type = build_word_mode_vector_type (n_words); 327 result = expand_vector_piecewise (gsi, f, 328 word_type, TREE_TYPE (word_type), 329 a, b, code); 330 result = force_gimple_operand_gsi (gsi, result, true, NULL, true, 331 GSI_SAME_STMT); 332 } 333 else 334 { 335 /* Use a single scalar operation with a mode no wider than word_mode. */ 336 scalar_int_mode mode 337 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require (); 338 compute_type = lang_hooks.types.type_for_mode (mode, 1); 339 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type); 340 warning_at (loc, OPT_Wvector_operation_performance, 341 "vector operation will be expanded with a " 342 "single scalar operation"); 343 } 344 345 return result; 346 } 347 348 /* Expand a vector operation to scalars; for integer types we can use 349 special bit twiddling tricks to do the sums a word at a time, using 350 function F_PARALLEL instead of F. These tricks are done only if 351 they can process at least four items, that is, only if the vector 352 holds at least four items and if a word can hold four items. */ 353 static tree 354 expand_vector_addition (gimple_stmt_iterator *gsi, 355 elem_op_func f, elem_op_func f_parallel, 356 tree type, tree a, tree b, enum tree_code code) 357 { 358 int parts_per_word = UNITS_PER_WORD 359 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 360 361 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 362 && parts_per_word >= 4 363 && nunits_for_known_piecewise_op (type) >= 4) 364 return expand_vector_parallel (gsi, f_parallel, 365 type, a, b, code); 366 else 367 return expand_vector_piecewise (gsi, f, 368 type, TREE_TYPE (type), 369 a, b, code); 370 } 371 372 /* Try to expand vector comparison expression OP0 CODE OP1 by 373 querying optab if the following expression: 374 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> 375 can be expanded. */ 376 static tree 377 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, 378 tree op1, enum tree_code code) 379 { 380 tree t; 381 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) 382 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) 383 t = expand_vector_piecewise (gsi, do_compare, type, 384 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); 385 else 386 t = NULL_TREE; 387 388 return t; 389 } 390 391 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type 392 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding 393 the result if successful, otherwise return NULL_TREE. */ 394 static tree 395 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) 396 { 397 optab op; 398 unsigned int i, nunits = nunits_for_known_piecewise_op (type); 399 bool scalar_shift = true; 400 401 for (i = 1; i < nunits; i++) 402 { 403 if (shiftcnts[i] != shiftcnts[0]) 404 scalar_shift = false; 405 } 406 407 if (scalar_shift && shiftcnts[0] == 0) 408 return op0; 409 410 if (scalar_shift) 411 { 412 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); 413 if (op != unknown_optab 414 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 415 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 416 build_int_cst (NULL_TREE, shiftcnts[0])); 417 } 418 419 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 420 if (op != unknown_optab 421 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 422 { 423 tree_vector_builder vec (type, nunits, 1); 424 for (i = 0; i < nunits; i++) 425 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i])); 426 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ()); 427 } 428 429 return NULL_TREE; 430 } 431 432 /* Try to expand integer vector division by constant using 433 widening multiply, shifts and additions. */ 434 static tree 435 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, 436 tree op1, enum tree_code code) 437 { 438 bool use_pow2 = true; 439 bool has_vector_shift = true; 440 int mode = -1, this_mode; 441 int pre_shift = -1, post_shift; 442 unsigned int nunits = nunits_for_known_piecewise_op (type); 443 int *shifts = XALLOCAVEC (int, nunits * 4); 444 int *pre_shifts = shifts + nunits; 445 int *post_shifts = pre_shifts + nunits; 446 int *shift_temps = post_shifts + nunits; 447 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); 448 int prec = TYPE_PRECISION (TREE_TYPE (type)); 449 int dummy_int; 450 unsigned int i; 451 signop sign_p = TYPE_SIGN (TREE_TYPE (type)); 452 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); 453 tree cur_op, mulcst, tem; 454 optab op; 455 456 if (prec > HOST_BITS_PER_WIDE_INT) 457 return NULL_TREE; 458 459 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 460 if (op == unknown_optab 461 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 462 has_vector_shift = false; 463 464 /* Analysis phase. Determine if all op1 elements are either power 465 of two and it is possible to expand it using shifts (or for remainder 466 using masking). Additionally compute the multiplicative constants 467 and pre and post shifts if the division is to be expanded using 468 widening or high part multiplication plus shifts. */ 469 for (i = 0; i < nunits; i++) 470 { 471 tree cst = VECTOR_CST_ELT (op1, i); 472 unsigned HOST_WIDE_INT ml; 473 474 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) 475 return NULL_TREE; 476 pre_shifts[i] = 0; 477 post_shifts[i] = 0; 478 mulc[i] = 0; 479 if (use_pow2 480 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) 481 use_pow2 = false; 482 if (use_pow2) 483 { 484 shifts[i] = tree_log2 (cst); 485 if (shifts[i] != shifts[0] 486 && code == TRUNC_DIV_EXPR 487 && !has_vector_shift) 488 use_pow2 = false; 489 } 490 if (mode == -2) 491 continue; 492 if (sign_p == UNSIGNED) 493 { 494 unsigned HOST_WIDE_INT mh; 495 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; 496 497 if (d >= (HOST_WIDE_INT_1U << (prec - 1))) 498 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ 499 return NULL_TREE; 500 501 if (d <= 1) 502 { 503 mode = -2; 504 continue; 505 } 506 507 /* Find a suitable multiplier and right shift count 508 instead of multiplying with D. */ 509 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 510 511 /* If the suggested multiplier is more than SIZE bits, we can 512 do better for even divisors, using an initial right shift. */ 513 if ((mh != 0 && (d & 1) == 0) 514 || (!has_vector_shift && pre_shift != -1)) 515 { 516 if (has_vector_shift) 517 pre_shift = ctz_or_zero (d); 518 else if (pre_shift == -1) 519 { 520 unsigned int j; 521 for (j = 0; j < nunits; j++) 522 { 523 tree cst2 = VECTOR_CST_ELT (op1, j); 524 unsigned HOST_WIDE_INT d2; 525 int this_pre_shift; 526 527 if (!tree_fits_uhwi_p (cst2)) 528 return NULL_TREE; 529 d2 = tree_to_uhwi (cst2) & mask; 530 if (d2 == 0) 531 return NULL_TREE; 532 this_pre_shift = floor_log2 (d2 & -d2); 533 if (pre_shift == -1 || this_pre_shift < pre_shift) 534 pre_shift = this_pre_shift; 535 } 536 if (i != 0 && pre_shift != 0) 537 { 538 /* Restart. */ 539 i = -1U; 540 mode = -1; 541 continue; 542 } 543 } 544 if (pre_shift != 0) 545 { 546 if ((d >> pre_shift) <= 1) 547 { 548 mode = -2; 549 continue; 550 } 551 mh = choose_multiplier (d >> pre_shift, prec, 552 prec - pre_shift, 553 &ml, &post_shift, &dummy_int); 554 gcc_assert (!mh); 555 pre_shifts[i] = pre_shift; 556 } 557 } 558 if (!mh) 559 this_mode = 0; 560 else 561 this_mode = 1; 562 } 563 else 564 { 565 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); 566 unsigned HOST_WIDE_INT abs_d; 567 568 if (d == -1) 569 return NULL_TREE; 570 571 /* Since d might be INT_MIN, we have to cast to 572 unsigned HOST_WIDE_INT before negating to avoid 573 undefined signed overflow. */ 574 abs_d = (d >= 0 575 ? (unsigned HOST_WIDE_INT) d 576 : - (unsigned HOST_WIDE_INT) d); 577 578 /* n rem d = n rem -d */ 579 if (code == TRUNC_MOD_EXPR && d < 0) 580 d = abs_d; 581 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1)) 582 { 583 /* This case is not handled correctly below. */ 584 mode = -2; 585 continue; 586 } 587 if (abs_d <= 1) 588 { 589 mode = -2; 590 continue; 591 } 592 593 choose_multiplier (abs_d, prec, prec - 1, &ml, 594 &post_shift, &dummy_int); 595 if (ml >= HOST_WIDE_INT_1U << (prec - 1)) 596 { 597 this_mode = 4 + (d < 0); 598 ml |= HOST_WIDE_INT_M1U << (prec - 1); 599 } 600 else 601 this_mode = 2 + (d < 0); 602 } 603 mulc[i] = ml; 604 post_shifts[i] = post_shift; 605 if ((i && !has_vector_shift && post_shifts[0] != post_shift) 606 || post_shift >= prec 607 || pre_shifts[i] >= prec) 608 this_mode = -2; 609 610 if (i == 0) 611 mode = this_mode; 612 else if (mode != this_mode) 613 mode = -2; 614 } 615 616 if (use_pow2) 617 { 618 tree addend = NULL_TREE; 619 if (sign_p == SIGNED) 620 { 621 tree uns_type; 622 623 /* Both division and remainder sequences need 624 op0 < 0 ? mask : 0 computed. It can be either computed as 625 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) 626 if none of the shifts is 0, or as the conditional. */ 627 for (i = 0; i < nunits; i++) 628 if (shifts[i] == 0) 629 break; 630 uns_type 631 = build_vector_type (build_nonstandard_integer_type (prec, 1), 632 nunits); 633 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) 634 { 635 for (i = 0; i < nunits; i++) 636 shift_temps[i] = prec - 1; 637 cur_op = add_rshift (gsi, type, op0, shift_temps); 638 if (cur_op != NULL_TREE) 639 { 640 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 641 uns_type, cur_op); 642 for (i = 0; i < nunits; i++) 643 shift_temps[i] = prec - shifts[i]; 644 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); 645 if (cur_op != NULL_TREE) 646 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 647 type, cur_op); 648 } 649 } 650 if (addend == NULL_TREE 651 && expand_vec_cond_expr_p (type, type, LT_EXPR)) 652 { 653 tree zero, cst, cond, mask_type; 654 gimple *stmt; 655 656 mask_type = build_same_sized_truth_vector_type (type); 657 zero = build_zero_cst (type); 658 cond = build2 (LT_EXPR, mask_type, op0, zero); 659 tree_vector_builder vec (type, nunits, 1); 660 for (i = 0; i < nunits; i++) 661 vec.quick_push (build_int_cst (TREE_TYPE (type), 662 (HOST_WIDE_INT_1U 663 << shifts[i]) - 1)); 664 cst = vec.build (); 665 addend = make_ssa_name (type); 666 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, 667 cst, zero); 668 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 669 } 670 } 671 if (code == TRUNC_DIV_EXPR) 672 { 673 if (sign_p == UNSIGNED) 674 { 675 /* q = op0 >> shift; */ 676 cur_op = add_rshift (gsi, type, op0, shifts); 677 if (cur_op != NULL_TREE) 678 return cur_op; 679 } 680 else if (addend != NULL_TREE) 681 { 682 /* t1 = op0 + addend; 683 q = t1 >> shift; */ 684 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 685 if (op != unknown_optab 686 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 687 { 688 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); 689 cur_op = add_rshift (gsi, type, cur_op, shifts); 690 if (cur_op != NULL_TREE) 691 return cur_op; 692 } 693 } 694 } 695 else 696 { 697 tree mask; 698 tree_vector_builder vec (type, nunits, 1); 699 for (i = 0; i < nunits; i++) 700 vec.quick_push (build_int_cst (TREE_TYPE (type), 701 (HOST_WIDE_INT_1U 702 << shifts[i]) - 1)); 703 mask = vec.build (); 704 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); 705 if (op != unknown_optab 706 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 707 { 708 if (sign_p == UNSIGNED) 709 /* r = op0 & mask; */ 710 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); 711 else if (addend != NULL_TREE) 712 { 713 /* t1 = op0 + addend; 714 t2 = t1 & mask; 715 r = t2 - addend; */ 716 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 717 if (op != unknown_optab 718 && optab_handler (op, TYPE_MODE (type)) 719 != CODE_FOR_nothing) 720 { 721 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, 722 addend); 723 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, 724 cur_op, mask); 725 op = optab_for_tree_code (MINUS_EXPR, type, 726 optab_default); 727 if (op != unknown_optab 728 && optab_handler (op, TYPE_MODE (type)) 729 != CODE_FOR_nothing) 730 return gimplify_build2 (gsi, MINUS_EXPR, type, 731 cur_op, addend); 732 } 733 } 734 } 735 } 736 } 737 738 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 739 return NULL_TREE; 740 741 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) 742 return NULL_TREE; 743 744 cur_op = op0; 745 746 switch (mode) 747 { 748 case 0: 749 gcc_assert (sign_p == UNSIGNED); 750 /* t1 = oprnd0 >> pre_shift; 751 t2 = t1 h* ml; 752 q = t2 >> post_shift; */ 753 cur_op = add_rshift (gsi, type, cur_op, pre_shifts); 754 if (cur_op == NULL_TREE) 755 return NULL_TREE; 756 break; 757 case 1: 758 gcc_assert (sign_p == UNSIGNED); 759 for (i = 0; i < nunits; i++) 760 { 761 shift_temps[i] = 1; 762 post_shifts[i]--; 763 } 764 break; 765 case 2: 766 case 3: 767 case 4: 768 case 5: 769 gcc_assert (sign_p == SIGNED); 770 for (i = 0; i < nunits; i++) 771 shift_temps[i] = prec - 1; 772 break; 773 default: 774 return NULL_TREE; 775 } 776 777 tree_vector_builder vec (type, nunits, 1); 778 for (i = 0; i < nunits; i++) 779 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i])); 780 mulcst = vec.build (); 781 782 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); 783 784 switch (mode) 785 { 786 case 0: 787 /* t1 = oprnd0 >> pre_shift; 788 t2 = t1 h* ml; 789 q = t2 >> post_shift; */ 790 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 791 break; 792 case 1: 793 /* t1 = oprnd0 h* ml; 794 t2 = oprnd0 - t1; 795 t3 = t2 >> 1; 796 t4 = t1 + t3; 797 q = t4 >> (post_shift - 1); */ 798 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 799 if (op == unknown_optab 800 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 801 return NULL_TREE; 802 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); 803 tem = add_rshift (gsi, type, tem, shift_temps); 804 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 805 if (op == unknown_optab 806 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 807 return NULL_TREE; 808 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); 809 cur_op = add_rshift (gsi, type, tem, post_shifts); 810 if (cur_op == NULL_TREE) 811 return NULL_TREE; 812 break; 813 case 2: 814 case 3: 815 case 4: 816 case 5: 817 /* t1 = oprnd0 h* ml; 818 t2 = t1; [ iff (mode & 2) != 0 ] 819 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] 820 t3 = t2 >> post_shift; 821 t4 = oprnd0 >> (prec - 1); 822 q = t3 - t4; [ iff (mode & 1) == 0 ] 823 q = t4 - t3; [ iff (mode & 1) != 0 ] */ 824 if ((mode & 2) == 0) 825 { 826 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 827 if (op == unknown_optab 828 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 829 return NULL_TREE; 830 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); 831 } 832 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 833 if (cur_op == NULL_TREE) 834 return NULL_TREE; 835 tem = add_rshift (gsi, type, op0, shift_temps); 836 if (tem == NULL_TREE) 837 return NULL_TREE; 838 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 839 if (op == unknown_optab 840 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 841 return NULL_TREE; 842 if ((mode & 1) == 0) 843 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); 844 else 845 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); 846 break; 847 default: 848 gcc_unreachable (); 849 } 850 851 if (code == TRUNC_DIV_EXPR) 852 return cur_op; 853 854 /* We divided. Now finish by: 855 t1 = q * oprnd1; 856 r = oprnd0 - t1; */ 857 op = optab_for_tree_code (MULT_EXPR, type, optab_default); 858 if (op == unknown_optab 859 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 860 return NULL_TREE; 861 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); 862 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 863 if (op == unknown_optab 864 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 865 return NULL_TREE; 866 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); 867 } 868 869 /* Expand a vector condition to scalars, by using many conditions 870 on the vector's elements. */ 871 static void 872 expand_vector_condition (gimple_stmt_iterator *gsi) 873 { 874 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 875 tree type = gimple_expr_type (stmt); 876 tree a = gimple_assign_rhs1 (stmt); 877 tree a1 = a; 878 tree a2 = NULL_TREE; 879 bool a_is_comparison = false; 880 tree b = gimple_assign_rhs2 (stmt); 881 tree c = gimple_assign_rhs3 (stmt); 882 vec<constructor_elt, va_gc> *v; 883 tree constr; 884 tree inner_type = TREE_TYPE (type); 885 tree cond_type = TREE_TYPE (TREE_TYPE (a)); 886 tree comp_inner_type = cond_type; 887 tree width = TYPE_SIZE (inner_type); 888 tree index = bitsize_int (0); 889 tree comp_width = width; 890 tree comp_index = index; 891 int i; 892 location_t loc = gimple_location (gsi_stmt (*gsi)); 893 894 if (!is_gimple_val (a)) 895 { 896 gcc_assert (COMPARISON_CLASS_P (a)); 897 a_is_comparison = true; 898 a1 = TREE_OPERAND (a, 0); 899 a2 = TREE_OPERAND (a, 1); 900 comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); 901 comp_width = TYPE_SIZE (comp_inner_type); 902 } 903 904 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) 905 return; 906 907 /* Handle vector boolean types with bitmasks. If there is a comparison 908 and we can expand the comparison into the vector boolean bitmask, 909 or otherwise if it is compatible with type, we can transform 910 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5; 911 into 912 tmp_6 = x_2 < y_3; 913 tmp_7 = tmp_6 & vbfld_4; 914 tmp_8 = ~tmp_6; 915 tmp_9 = tmp_8 & vbfld_5; 916 vbfld_1 = tmp_7 | tmp_9; 917 Similarly for vbfld_10 instead of x_2 < y_3. */ 918 if (VECTOR_BOOLEAN_TYPE_P (type) 919 && SCALAR_INT_MODE_P (TYPE_MODE (type)) 920 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)), 921 TYPE_VECTOR_SUBPARTS (type) 922 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type)))) 923 && (a_is_comparison 924 ? useless_type_conversion_p (type, TREE_TYPE (a)) 925 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) 926 { 927 if (a_is_comparison) 928 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); 929 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); 930 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); 931 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); 932 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2); 933 gimple_assign_set_rhs_from_tree (gsi, a); 934 update_stmt (gsi_stmt (*gsi)); 935 return; 936 } 937 938 /* TODO: try and find a smaller vector type. */ 939 940 warning_at (loc, OPT_Wvector_operation_performance, 941 "vector condition will be expanded piecewise"); 942 943 int nunits = nunits_for_known_piecewise_op (type); 944 vec_alloc (v, nunits); 945 for (i = 0; i < nunits; i++) 946 { 947 tree aa, result; 948 tree bb = tree_vec_extract (gsi, inner_type, b, width, index); 949 tree cc = tree_vec_extract (gsi, inner_type, c, width, index); 950 if (a_is_comparison) 951 { 952 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, 953 comp_width, comp_index); 954 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, 955 comp_width, comp_index); 956 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); 957 } 958 else 959 aa = tree_vec_extract (gsi, cond_type, a, width, index); 960 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); 961 constructor_elt ce = {NULL_TREE, result}; 962 v->quick_push (ce); 963 index = int_const_binop (PLUS_EXPR, index, width); 964 if (width == comp_width) 965 comp_index = index; 966 else 967 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width); 968 } 969 970 constr = build_constructor (type, v); 971 gimple_assign_set_rhs_from_tree (gsi, constr); 972 update_stmt (gsi_stmt (*gsi)); 973 } 974 975 static tree 976 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 977 gassign *assign, enum tree_code code) 978 { 979 machine_mode compute_mode = TYPE_MODE (compute_type); 980 981 /* If the compute mode is not a vector mode (hence we are not decomposing 982 a BLKmode vector to smaller, hardware-supported vectors), we may want 983 to expand the operations in parallel. */ 984 if (!VECTOR_MODE_P (compute_mode)) 985 switch (code) 986 { 987 case PLUS_EXPR: 988 case MINUS_EXPR: 989 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 990 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 991 gimple_assign_rhs1 (assign), 992 gimple_assign_rhs2 (assign), code); 993 break; 994 995 case NEGATE_EXPR: 996 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 997 return expand_vector_addition (gsi, do_unop, do_negate, type, 998 gimple_assign_rhs1 (assign), 999 NULL_TREE, code); 1000 break; 1001 1002 case BIT_AND_EXPR: 1003 case BIT_IOR_EXPR: 1004 case BIT_XOR_EXPR: 1005 return expand_vector_parallel (gsi, do_binop, type, 1006 gimple_assign_rhs1 (assign), 1007 gimple_assign_rhs2 (assign), code); 1008 1009 case BIT_NOT_EXPR: 1010 return expand_vector_parallel (gsi, do_unop, type, 1011 gimple_assign_rhs1 (assign), 1012 NULL_TREE, code); 1013 case EQ_EXPR: 1014 case NE_EXPR: 1015 case GT_EXPR: 1016 case LT_EXPR: 1017 case GE_EXPR: 1018 case LE_EXPR: 1019 case UNEQ_EXPR: 1020 case UNGT_EXPR: 1021 case UNLT_EXPR: 1022 case UNGE_EXPR: 1023 case UNLE_EXPR: 1024 case LTGT_EXPR: 1025 case ORDERED_EXPR: 1026 case UNORDERED_EXPR: 1027 { 1028 tree rhs1 = gimple_assign_rhs1 (assign); 1029 tree rhs2 = gimple_assign_rhs2 (assign); 1030 1031 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); 1032 } 1033 1034 case TRUNC_DIV_EXPR: 1035 case TRUNC_MOD_EXPR: 1036 { 1037 tree rhs1 = gimple_assign_rhs1 (assign); 1038 tree rhs2 = gimple_assign_rhs2 (assign); 1039 tree ret; 1040 1041 if (!optimize 1042 || !VECTOR_INTEGER_TYPE_P (type) 1043 || TREE_CODE (rhs2) != VECTOR_CST 1044 || !VECTOR_MODE_P (TYPE_MODE (type))) 1045 break; 1046 1047 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); 1048 if (ret != NULL_TREE) 1049 return ret; 1050 break; 1051 } 1052 1053 default: 1054 break; 1055 } 1056 1057 if (TREE_CODE_CLASS (code) == tcc_unary) 1058 return expand_vector_piecewise (gsi, do_unop, type, compute_type, 1059 gimple_assign_rhs1 (assign), 1060 NULL_TREE, code); 1061 else 1062 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 1063 gimple_assign_rhs1 (assign), 1064 gimple_assign_rhs2 (assign), code); 1065 } 1066 1067 /* Try to optimize 1068 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; 1069 style stmts into: 1070 _9 = { b_7, b_7, b_7, b_7 }; 1071 a_5 = _9 + { 0, 3, 6, 9 }; 1072 because vector splat operation is usually more efficient 1073 than piecewise initialization of the vector. */ 1074 1075 static void 1076 optimize_vector_constructor (gimple_stmt_iterator *gsi) 1077 { 1078 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1079 tree lhs = gimple_assign_lhs (stmt); 1080 tree rhs = gimple_assign_rhs1 (stmt); 1081 tree type = TREE_TYPE (rhs); 1082 unsigned int i, j; 1083 unsigned HOST_WIDE_INT nelts; 1084 bool all_same = true; 1085 constructor_elt *elt; 1086 gimple *g; 1087 tree base = NULL_TREE; 1088 optab op; 1089 1090 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts) 1091 || nelts <= 2 1092 || CONSTRUCTOR_NELTS (rhs) != nelts) 1093 return; 1094 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 1095 if (op == unknown_optab 1096 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 1097 return; 1098 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) 1099 if (TREE_CODE (elt->value) != SSA_NAME 1100 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) 1101 return; 1102 else 1103 { 1104 tree this_base = elt->value; 1105 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) 1106 all_same = false; 1107 for (j = 0; j < nelts + 1; j++) 1108 { 1109 g = SSA_NAME_DEF_STMT (this_base); 1110 if (is_gimple_assign (g) 1111 && gimple_assign_rhs_code (g) == PLUS_EXPR 1112 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 1113 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME 1114 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) 1115 this_base = gimple_assign_rhs1 (g); 1116 else 1117 break; 1118 } 1119 if (i == 0) 1120 base = this_base; 1121 else if (this_base != base) 1122 return; 1123 } 1124 if (all_same) 1125 return; 1126 tree_vector_builder cst (type, nelts, 1); 1127 for (i = 0; i < nelts; i++) 1128 { 1129 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value; 1130 tree elt = build_zero_cst (TREE_TYPE (base)); 1131 while (this_base != base) 1132 { 1133 g = SSA_NAME_DEF_STMT (this_base); 1134 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base), 1135 elt, gimple_assign_rhs2 (g)); 1136 if (elt == NULL_TREE 1137 || TREE_CODE (elt) != INTEGER_CST 1138 || TREE_OVERFLOW (elt)) 1139 return; 1140 this_base = gimple_assign_rhs1 (g); 1141 } 1142 cst.quick_push (elt); 1143 } 1144 for (i = 0; i < nelts; i++) 1145 CONSTRUCTOR_ELT (rhs, i)->value = base; 1146 g = gimple_build_assign (make_ssa_name (type), rhs); 1147 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1148 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), 1149 cst.build ()); 1150 gsi_replace (gsi, g, false); 1151 } 1152 1153 /* Return a type for the widest vector mode whose components are of type 1154 TYPE, or NULL_TREE if none is found. */ 1155 1156 static tree 1157 type_for_widest_vector_mode (tree type, optab op) 1158 { 1159 machine_mode inner_mode = TYPE_MODE (type); 1160 machine_mode best_mode = VOIDmode, mode; 1161 poly_int64 best_nunits = 0; 1162 1163 if (SCALAR_FLOAT_MODE_P (inner_mode)) 1164 mode = MIN_MODE_VECTOR_FLOAT; 1165 else if (SCALAR_FRACT_MODE_P (inner_mode)) 1166 mode = MIN_MODE_VECTOR_FRACT; 1167 else if (SCALAR_UFRACT_MODE_P (inner_mode)) 1168 mode = MIN_MODE_VECTOR_UFRACT; 1169 else if (SCALAR_ACCUM_MODE_P (inner_mode)) 1170 mode = MIN_MODE_VECTOR_ACCUM; 1171 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 1172 mode = MIN_MODE_VECTOR_UACCUM; 1173 else if (inner_mode == BImode) 1174 mode = MIN_MODE_VECTOR_BOOL; 1175 else 1176 mode = MIN_MODE_VECTOR_INT; 1177 1178 FOR_EACH_MODE_FROM (mode, mode) 1179 if (GET_MODE_INNER (mode) == inner_mode 1180 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits) 1181 && optab_handler (op, mode) != CODE_FOR_nothing) 1182 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 1183 1184 if (best_mode == VOIDmode) 1185 return NULL_TREE; 1186 else 1187 return build_vector_type_for_mode (type, best_mode); 1188 } 1189 1190 1191 /* Build a reference to the element of the vector VECT. Function 1192 returns either the element itself, either BIT_FIELD_REF, or an 1193 ARRAY_REF expression. 1194 1195 GSI is required to insert temporary variables while building a 1196 refernece to the element of the vector VECT. 1197 1198 PTMPVEC is a pointer to the temporary variable for caching 1199 purposes. In case when PTMPVEC is NULL new temporary variable 1200 will be created. */ 1201 static tree 1202 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) 1203 { 1204 tree vect_type, vect_elt_type; 1205 gimple *asgn; 1206 tree tmpvec; 1207 tree arraytype; 1208 bool need_asgn = true; 1209 unsigned int elements; 1210 1211 vect_type = TREE_TYPE (vect); 1212 vect_elt_type = TREE_TYPE (vect_type); 1213 elements = nunits_for_known_piecewise_op (vect_type); 1214 1215 if (TREE_CODE (idx) == INTEGER_CST) 1216 { 1217 unsigned HOST_WIDE_INT index; 1218 1219 /* Given that we're about to compute a binary modulus, 1220 we don't care about the high bits of the value. */ 1221 index = TREE_INT_CST_LOW (idx); 1222 if (!tree_fits_uhwi_p (idx) || index >= elements) 1223 { 1224 index &= elements - 1; 1225 idx = build_int_cst (TREE_TYPE (idx), index); 1226 } 1227 1228 /* When lowering a vector statement sequence do some easy 1229 simplification by looking through intermediate vector results. */ 1230 if (TREE_CODE (vect) == SSA_NAME) 1231 { 1232 gimple *def_stmt = SSA_NAME_DEF_STMT (vect); 1233 if (is_gimple_assign (def_stmt) 1234 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 1235 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) 1236 vect = gimple_assign_rhs1 (def_stmt); 1237 } 1238 1239 if (TREE_CODE (vect) == VECTOR_CST) 1240 return VECTOR_CST_ELT (vect, index); 1241 else if (TREE_CODE (vect) == CONSTRUCTOR 1242 && (CONSTRUCTOR_NELTS (vect) == 0 1243 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) 1244 != VECTOR_TYPE)) 1245 { 1246 if (index < CONSTRUCTOR_NELTS (vect)) 1247 return CONSTRUCTOR_ELT (vect, index)->value; 1248 return build_zero_cst (vect_elt_type); 1249 } 1250 else 1251 { 1252 tree size = TYPE_SIZE (vect_elt_type); 1253 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), 1254 size); 1255 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); 1256 } 1257 } 1258 1259 if (!ptmpvec) 1260 tmpvec = create_tmp_var (vect_type, "vectmp"); 1261 else if (!*ptmpvec) 1262 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); 1263 else 1264 { 1265 tmpvec = *ptmpvec; 1266 need_asgn = false; 1267 } 1268 1269 if (need_asgn) 1270 { 1271 TREE_ADDRESSABLE (tmpvec) = 1; 1272 asgn = gimple_build_assign (tmpvec, vect); 1273 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); 1274 } 1275 1276 arraytype = build_array_type_nelts (vect_elt_type, elements); 1277 return build4 (ARRAY_REF, vect_elt_type, 1278 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), 1279 idx, NULL_TREE, NULL_TREE); 1280 } 1281 1282 /* Check if VEC_PERM_EXPR within the given setting is supported 1283 by hardware, or lower it piecewise. 1284 1285 When VEC_PERM_EXPR has the same first and second operands: 1286 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be 1287 {v0[mask[0]], v0[mask[1]], ...} 1288 MASK and V0 must have the same number of elements. 1289 1290 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to 1291 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} 1292 V0 and V1 must have the same type. MASK, V0, V1 must have the 1293 same number of arguments. */ 1294 1295 static void 1296 lower_vec_perm (gimple_stmt_iterator *gsi) 1297 { 1298 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1299 tree mask = gimple_assign_rhs3 (stmt); 1300 tree vec0 = gimple_assign_rhs1 (stmt); 1301 tree vec1 = gimple_assign_rhs2 (stmt); 1302 tree vect_type = TREE_TYPE (vec0); 1303 tree mask_type = TREE_TYPE (mask); 1304 tree vect_elt_type = TREE_TYPE (vect_type); 1305 tree mask_elt_type = TREE_TYPE (mask_type); 1306 unsigned HOST_WIDE_INT elements; 1307 vec<constructor_elt, va_gc> *v; 1308 tree constr, t, si, i_val; 1309 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; 1310 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); 1311 location_t loc = gimple_location (gsi_stmt (*gsi)); 1312 unsigned i; 1313 1314 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements)) 1315 return; 1316 1317 if (TREE_CODE (mask) == SSA_NAME) 1318 { 1319 gimple *def_stmt = SSA_NAME_DEF_STMT (mask); 1320 if (is_gimple_assign (def_stmt) 1321 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) 1322 mask = gimple_assign_rhs1 (def_stmt); 1323 } 1324 1325 vec_perm_builder sel_int; 1326 1327 if (TREE_CODE (mask) == VECTOR_CST 1328 && tree_to_vec_perm_builder (&sel_int, mask)) 1329 { 1330 vec_perm_indices indices (sel_int, 2, elements); 1331 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices)) 1332 { 1333 gimple_assign_set_rhs3 (stmt, mask); 1334 update_stmt (stmt); 1335 return; 1336 } 1337 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero 1338 vector as VEC1 and a right element shift MASK. */ 1339 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type)) 1340 != CODE_FOR_nothing 1341 && TREE_CODE (vec1) == VECTOR_CST 1342 && initializer_zerop (vec1) 1343 && maybe_ne (indices[0], 0) 1344 && known_lt (poly_uint64 (indices[0]), elements)) 1345 { 1346 bool ok_p = indices.series_p (0, 1, indices[0], 1); 1347 if (!ok_p) 1348 { 1349 for (i = 1; i < elements; ++i) 1350 { 1351 poly_uint64 actual = indices[i]; 1352 poly_uint64 expected = i + indices[0]; 1353 /* Indices into the second vector are all equivalent. */ 1354 if (maybe_lt (actual, elements) 1355 ? maybe_ne (actual, expected) 1356 : maybe_lt (expected, elements)) 1357 break; 1358 } 1359 ok_p = i == elements; 1360 } 1361 if (ok_p) 1362 { 1363 gimple_assign_set_rhs3 (stmt, mask); 1364 update_stmt (stmt); 1365 return; 1366 } 1367 } 1368 } 1369 else if (can_vec_perm_var_p (TYPE_MODE (vect_type))) 1370 return; 1371 1372 warning_at (loc, OPT_Wvector_operation_performance, 1373 "vector shuffling operation will be expanded piecewise"); 1374 1375 vec_alloc (v, elements); 1376 for (i = 0; i < elements; i++) 1377 { 1378 si = size_int (i); 1379 i_val = vector_element (gsi, mask, si, &masktmp); 1380 1381 if (TREE_CODE (i_val) == INTEGER_CST) 1382 { 1383 unsigned HOST_WIDE_INT index; 1384 1385 index = TREE_INT_CST_LOW (i_val); 1386 if (!tree_fits_uhwi_p (i_val) || index >= elements) 1387 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); 1388 1389 if (two_operand_p && (index & elements) != 0) 1390 t = vector_element (gsi, vec1, i_val, &vec1tmp); 1391 else 1392 t = vector_element (gsi, vec0, i_val, &vec0tmp); 1393 1394 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 1395 true, GSI_SAME_STMT); 1396 } 1397 else 1398 { 1399 tree cond = NULL_TREE, v0_val; 1400 1401 if (two_operand_p) 1402 { 1403 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1404 build_int_cst (mask_elt_type, elements)); 1405 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1406 true, GSI_SAME_STMT); 1407 } 1408 1409 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1410 build_int_cst (mask_elt_type, elements - 1)); 1411 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, 1412 true, GSI_SAME_STMT); 1413 1414 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); 1415 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, 1416 true, GSI_SAME_STMT); 1417 1418 if (two_operand_p) 1419 { 1420 tree v1_val; 1421 1422 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); 1423 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, 1424 true, GSI_SAME_STMT); 1425 1426 cond = fold_build2 (EQ_EXPR, boolean_type_node, 1427 cond, build_zero_cst (mask_elt_type)); 1428 cond = fold_build3 (COND_EXPR, vect_elt_type, 1429 cond, v0_val, v1_val); 1430 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1431 true, GSI_SAME_STMT); 1432 } 1433 else 1434 t = v0_val; 1435 } 1436 1437 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); 1438 } 1439 1440 constr = build_constructor (vect_type, v); 1441 gimple_assign_set_rhs_from_tree (gsi, constr); 1442 update_stmt (gsi_stmt (*gsi)); 1443 } 1444 1445 /* If OP is a uniform vector return the element it is a splat from. */ 1446 1447 static tree 1448 ssa_uniform_vector_p (tree op) 1449 { 1450 if (TREE_CODE (op) == VECTOR_CST 1451 || TREE_CODE (op) == VEC_DUPLICATE_EXPR 1452 || TREE_CODE (op) == CONSTRUCTOR) 1453 return uniform_vector_p (op); 1454 if (TREE_CODE (op) == SSA_NAME) 1455 { 1456 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 1457 if (gimple_assign_single_p (def_stmt)) 1458 return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); 1459 } 1460 return NULL_TREE; 1461 } 1462 1463 /* Return type in which CODE operation with optab OP can be 1464 computed. */ 1465 1466 static tree 1467 get_compute_type (enum tree_code code, optab op, tree type) 1468 { 1469 /* For very wide vectors, try using a smaller vector mode. */ 1470 tree compute_type = type; 1471 if (op 1472 && (!VECTOR_MODE_P (TYPE_MODE (type)) 1473 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) 1474 { 1475 tree vector_compute_type 1476 = type_for_widest_vector_mode (TREE_TYPE (type), op); 1477 if (vector_compute_type != NULL_TREE 1478 && subparts_gt (compute_type, vector_compute_type) 1479 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U) 1480 && (optab_handler (op, TYPE_MODE (vector_compute_type)) 1481 != CODE_FOR_nothing)) 1482 compute_type = vector_compute_type; 1483 } 1484 1485 /* If we are breaking a BLKmode vector into smaller pieces, 1486 type_for_widest_vector_mode has already looked into the optab, 1487 so skip these checks. */ 1488 if (compute_type == type) 1489 { 1490 machine_mode compute_mode = TYPE_MODE (compute_type); 1491 if (VECTOR_MODE_P (compute_mode)) 1492 { 1493 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) 1494 return compute_type; 1495 if (code == MULT_HIGHPART_EXPR 1496 && can_mult_highpart_p (compute_mode, 1497 TYPE_UNSIGNED (compute_type))) 1498 return compute_type; 1499 } 1500 /* There is no operation in hardware, so fall back to scalars. */ 1501 compute_type = TREE_TYPE (type); 1502 } 1503 1504 return compute_type; 1505 } 1506 1507 static tree 1508 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 1509 tree bitpos, tree bitsize, enum tree_code code, 1510 tree type ATTRIBUTE_UNUSED) 1511 { 1512 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 1513 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 1514 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 1515 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 1516 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); 1517 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b); 1518 } 1519 1520 /* Expand a vector COND_EXPR to scalars, piecewise. */ 1521 static void 1522 expand_vector_scalar_condition (gimple_stmt_iterator *gsi) 1523 { 1524 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1525 tree type = gimple_expr_type (stmt); 1526 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); 1527 machine_mode compute_mode = TYPE_MODE (compute_type); 1528 gcc_assert (compute_mode != BLKmode); 1529 tree lhs = gimple_assign_lhs (stmt); 1530 tree rhs2 = gimple_assign_rhs2 (stmt); 1531 tree rhs3 = gimple_assign_rhs3 (stmt); 1532 tree new_rhs; 1533 1534 /* If the compute mode is not a vector mode (hence we are not decomposing 1535 a BLKmode vector to smaller, hardware-supported vectors), we may want 1536 to expand the operations in parallel. */ 1537 if (!VECTOR_MODE_P (compute_mode)) 1538 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, 1539 COND_EXPR); 1540 else 1541 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, 1542 rhs2, rhs3, COND_EXPR); 1543 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1544 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1545 new_rhs); 1546 1547 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1548 way to do it is change expand_vector_operation and its callees to 1549 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1550 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1551 update_stmt (gsi_stmt (*gsi)); 1552 } 1553 1554 /* Process one statement. If we identify a vector operation, expand it. */ 1555 1556 static void 1557 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 1558 { 1559 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; 1560 enum tree_code code; 1561 optab op = unknown_optab; 1562 enum gimple_rhs_class rhs_class; 1563 tree new_rhs; 1564 1565 /* Only consider code == GIMPLE_ASSIGN. */ 1566 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); 1567 if (!stmt) 1568 return; 1569 1570 code = gimple_assign_rhs_code (stmt); 1571 rhs_class = get_gimple_rhs_class (code); 1572 lhs = gimple_assign_lhs (stmt); 1573 1574 if (code == VEC_PERM_EXPR) 1575 { 1576 lower_vec_perm (gsi); 1577 return; 1578 } 1579 1580 if (code == VEC_COND_EXPR) 1581 { 1582 expand_vector_condition (gsi); 1583 return; 1584 } 1585 1586 if (code == COND_EXPR 1587 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE 1588 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) 1589 { 1590 expand_vector_scalar_condition (gsi); 1591 return; 1592 } 1593 1594 if (code == CONSTRUCTOR 1595 && TREE_CODE (lhs) == SSA_NAME 1596 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) 1597 && !gimple_clobber_p (stmt) 1598 && optimize) 1599 { 1600 optimize_vector_constructor (gsi); 1601 return; 1602 } 1603 1604 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 1605 return; 1606 1607 rhs1 = gimple_assign_rhs1 (stmt); 1608 type = gimple_expr_type (stmt); 1609 if (rhs_class == GIMPLE_BINARY_RHS) 1610 rhs2 = gimple_assign_rhs2 (stmt); 1611 1612 if (!VECTOR_TYPE_P (type) 1613 || !VECTOR_TYPE_P (TREE_TYPE (rhs1))) 1614 return; 1615 1616 /* If the vector operation is operating on all same vector elements 1617 implement it with a scalar operation and a splat if the target 1618 supports the scalar operation. */ 1619 tree srhs1, srhs2 = NULL_TREE; 1620 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE 1621 && (rhs2 == NULL_TREE 1622 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2)) 1623 && (srhs2 = rhs2)) 1624 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1625 /* As we query direct optabs restrict to non-convert operations. */ 1626 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) 1627 { 1628 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); 1629 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB 1630 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) 1631 { 1632 tree slhs = make_ssa_name (TREE_TYPE (srhs1)); 1633 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); 1634 gsi_insert_before (gsi, repl, GSI_SAME_STMT); 1635 gimple_assign_set_rhs_from_tree (gsi, 1636 build_vector_from_val (type, slhs)); 1637 update_stmt (stmt); 1638 return; 1639 } 1640 } 1641 1642 /* A scalar operation pretending to be a vector one. */ 1643 if (VECTOR_BOOLEAN_TYPE_P (type) 1644 && !VECTOR_MODE_P (TYPE_MODE (type)) 1645 && TYPE_MODE (type) != BLKmode) 1646 return; 1647 1648 if (CONVERT_EXPR_CODE_P (code) 1649 || code == FLOAT_EXPR 1650 || code == FIX_TRUNC_EXPR 1651 || code == VIEW_CONVERT_EXPR) 1652 return; 1653 1654 /* The signedness is determined from input argument. */ 1655 if (code == VEC_UNPACK_FLOAT_HI_EXPR 1656 || code == VEC_UNPACK_FLOAT_LO_EXPR) 1657 { 1658 type = TREE_TYPE (rhs1); 1659 /* We do not know how to scalarize those. */ 1660 return; 1661 } 1662 1663 /* For widening/narrowing vector operations, the relevant type is of the 1664 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is 1665 calculated in the same way above. */ 1666 if (code == WIDEN_SUM_EXPR 1667 || code == VEC_WIDEN_MULT_HI_EXPR 1668 || code == VEC_WIDEN_MULT_LO_EXPR 1669 || code == VEC_WIDEN_MULT_EVEN_EXPR 1670 || code == VEC_WIDEN_MULT_ODD_EXPR 1671 || code == VEC_UNPACK_HI_EXPR 1672 || code == VEC_UNPACK_LO_EXPR 1673 || code == VEC_PACK_TRUNC_EXPR 1674 || code == VEC_PACK_SAT_EXPR 1675 || code == VEC_PACK_FIX_TRUNC_EXPR 1676 || code == VEC_WIDEN_LSHIFT_HI_EXPR 1677 || code == VEC_WIDEN_LSHIFT_LO_EXPR) 1678 { 1679 type = TREE_TYPE (rhs1); 1680 /* We do not know how to scalarize those. */ 1681 return; 1682 } 1683 1684 /* Choose between vector shift/rotate by vector and vector shift/rotate by 1685 scalar */ 1686 if (code == LSHIFT_EXPR 1687 || code == RSHIFT_EXPR 1688 || code == LROTATE_EXPR 1689 || code == RROTATE_EXPR) 1690 { 1691 optab opv; 1692 1693 /* Check whether we have vector <op> {x,x,x,x} where x 1694 could be a scalar variable or a constant. Transform 1695 vector <op> {x,x,x,x} ==> vector <op> scalar. */ 1696 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1697 { 1698 tree first; 1699 1700 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1701 { 1702 gimple_assign_set_rhs2 (stmt, first); 1703 update_stmt (stmt); 1704 rhs2 = first; 1705 } 1706 } 1707 1708 opv = optab_for_tree_code (code, type, optab_vector); 1709 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1710 op = opv; 1711 else 1712 { 1713 op = optab_for_tree_code (code, type, optab_scalar); 1714 1715 compute_type = get_compute_type (code, op, type); 1716 if (compute_type == type) 1717 return; 1718 /* The rtl expander will expand vector/scalar as vector/vector 1719 if necessary. Pick one with wider vector type. */ 1720 tree compute_vtype = get_compute_type (code, opv, type); 1721 if (subparts_gt (compute_vtype, compute_type)) 1722 { 1723 compute_type = compute_vtype; 1724 op = opv; 1725 } 1726 } 1727 1728 if (code == LROTATE_EXPR || code == RROTATE_EXPR) 1729 { 1730 if (compute_type == NULL_TREE) 1731 compute_type = get_compute_type (code, op, type); 1732 if (compute_type == type) 1733 return; 1734 /* Before splitting vector rotates into scalar rotates, 1735 see if we can't use vector shifts and BIT_IOR_EXPR 1736 instead. For vector by vector rotates we'd also 1737 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there 1738 for now, fold doesn't seem to create such rotates anyway. */ 1739 if (compute_type == TREE_TYPE (type) 1740 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1741 { 1742 optab oplv = vashl_optab, opl = ashl_optab; 1743 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; 1744 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); 1745 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); 1746 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); 1747 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); 1748 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); 1749 /* The rtl expander will expand vector/scalar as vector/vector 1750 if necessary. Pick one with wider vector type. */ 1751 if (subparts_gt (compute_lvtype, compute_ltype)) 1752 { 1753 compute_ltype = compute_lvtype; 1754 opl = oplv; 1755 } 1756 if (subparts_gt (compute_rvtype, compute_rtype)) 1757 { 1758 compute_rtype = compute_rvtype; 1759 opr = oprv; 1760 } 1761 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and 1762 BIT_IOR_EXPR. */ 1763 compute_type = compute_ltype; 1764 if (subparts_gt (compute_type, compute_rtype)) 1765 compute_type = compute_rtype; 1766 if (subparts_gt (compute_type, compute_otype)) 1767 compute_type = compute_otype; 1768 /* Verify all 3 operations can be performed in that type. */ 1769 if (compute_type != TREE_TYPE (type)) 1770 { 1771 if (optab_handler (opl, TYPE_MODE (compute_type)) 1772 == CODE_FOR_nothing 1773 || optab_handler (opr, TYPE_MODE (compute_type)) 1774 == CODE_FOR_nothing 1775 || optab_handler (opo, TYPE_MODE (compute_type)) 1776 == CODE_FOR_nothing) 1777 compute_type = TREE_TYPE (type); 1778 } 1779 } 1780 } 1781 } 1782 else 1783 op = optab_for_tree_code (code, type, optab_default); 1784 1785 /* Optabs will try converting a negation into a subtraction, so 1786 look for it as well. TODO: negation of floating-point vectors 1787 might be turned into an exclusive OR toggling the sign bit. */ 1788 if (op == unknown_optab 1789 && code == NEGATE_EXPR 1790 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 1791 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 1792 1793 if (compute_type == NULL_TREE) 1794 compute_type = get_compute_type (code, op, type); 1795 if (compute_type == type) 1796 return; 1797 1798 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 1799 1800 /* Leave expression untouched for later expansion. */ 1801 if (new_rhs == NULL_TREE) 1802 return; 1803 1804 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1805 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1806 new_rhs); 1807 1808 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1809 way to do it is change expand_vector_operation and its callees to 1810 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1811 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1812 update_stmt (gsi_stmt (*gsi)); 1813 } 1814 1815 /* Use this to lower vector operations introduced by the vectorizer, 1816 if it may need the bit-twiddling tricks implemented in this file. */ 1817 1818 static unsigned int 1819 expand_vector_operations (void) 1820 { 1821 gimple_stmt_iterator gsi; 1822 basic_block bb; 1823 bool cfg_changed = false; 1824 1825 FOR_EACH_BB_FN (bb, cfun) 1826 { 1827 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1828 { 1829 expand_vector_operations_1 (&gsi); 1830 /* ??? If we do not cleanup EH then we will ICE in 1831 verification. But in reality we have created wrong-code 1832 as we did not properly transition EH info and edges to 1833 the piecewise computations. */ 1834 if (maybe_clean_eh_stmt (gsi_stmt (gsi)) 1835 && gimple_purge_dead_eh_edges (bb)) 1836 cfg_changed = true; 1837 } 1838 } 1839 1840 return cfg_changed ? TODO_cleanup_cfg : 0; 1841 } 1842 1843 namespace { 1844 1845 const pass_data pass_data_lower_vector = 1846 { 1847 GIMPLE_PASS, /* type */ 1848 "veclower", /* name */ 1849 OPTGROUP_VEC, /* optinfo_flags */ 1850 TV_NONE, /* tv_id */ 1851 PROP_cfg, /* properties_required */ 1852 PROP_gimple_lvec, /* properties_provided */ 1853 0, /* properties_destroyed */ 1854 0, /* todo_flags_start */ 1855 TODO_update_ssa, /* todo_flags_finish */ 1856 }; 1857 1858 class pass_lower_vector : public gimple_opt_pass 1859 { 1860 public: 1861 pass_lower_vector (gcc::context *ctxt) 1862 : gimple_opt_pass (pass_data_lower_vector, ctxt) 1863 {} 1864 1865 /* opt_pass methods: */ 1866 virtual bool gate (function *fun) 1867 { 1868 return !(fun->curr_properties & PROP_gimple_lvec); 1869 } 1870 1871 virtual unsigned int execute (function *) 1872 { 1873 return expand_vector_operations (); 1874 } 1875 1876 }; // class pass_lower_vector 1877 1878 } // anon namespace 1879 1880 gimple_opt_pass * 1881 make_pass_lower_vector (gcc::context *ctxt) 1882 { 1883 return new pass_lower_vector (ctxt); 1884 } 1885 1886 namespace { 1887 1888 const pass_data pass_data_lower_vector_ssa = 1889 { 1890 GIMPLE_PASS, /* type */ 1891 "veclower2", /* name */ 1892 OPTGROUP_VEC, /* optinfo_flags */ 1893 TV_NONE, /* tv_id */ 1894 PROP_cfg, /* properties_required */ 1895 PROP_gimple_lvec, /* properties_provided */ 1896 0, /* properties_destroyed */ 1897 0, /* todo_flags_start */ 1898 ( TODO_update_ssa 1899 | TODO_cleanup_cfg ), /* todo_flags_finish */ 1900 }; 1901 1902 class pass_lower_vector_ssa : public gimple_opt_pass 1903 { 1904 public: 1905 pass_lower_vector_ssa (gcc::context *ctxt) 1906 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) 1907 {} 1908 1909 /* opt_pass methods: */ 1910 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } 1911 virtual unsigned int execute (function *) 1912 { 1913 return expand_vector_operations (); 1914 } 1915 1916 }; // class pass_lower_vector_ssa 1917 1918 } // anon namespace 1919 1920 gimple_opt_pass * 1921 make_pass_lower_vector_ssa (gcc::context *ctxt) 1922 { 1923 return new pass_lower_vector_ssa (ctxt); 1924 } 1925 1926 #include "gt-tree-vect-generic.h" 1927