1 /* Lower vector operations to scalar operations. 2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 3 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by the 9 Free Software Foundation; either version 3, or (at your option) any 10 later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tree.h" 25 #include "tm.h" 26 #include "langhooks.h" 27 #include "tree-flow.h" 28 #include "gimple.h" 29 #include "tree-iterator.h" 30 #include "tree-pass.h" 31 #include "flags.h" 32 #include "ggc.h" 33 #include "diagnostic.h" 34 35 /* Need to include rtl.h, expr.h, etc. for optabs. */ 36 #include "expr.h" 37 #include "optabs.h" 38 39 40 static void expand_vector_operations_1 (gimple_stmt_iterator *); 41 42 43 /* Build a constant of type TYPE, made of VALUE's bits replicated 44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 45 static tree 46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 47 { 48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1); 49 int n = HOST_BITS_PER_WIDE_INT / width; 50 unsigned HOST_WIDE_INT low, high, mask; 51 tree ret; 52 53 gcc_assert (n); 54 55 if (width == HOST_BITS_PER_WIDE_INT) 56 low = value; 57 else 58 { 59 mask = ((HOST_WIDE_INT)1 << width) - 1; 60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 61 } 62 63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) 64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; 65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) 66 high = 0; 67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) 68 high = low; 69 else 70 gcc_unreachable (); 71 72 ret = build_int_cst_wide (type, low, high); 73 return ret; 74 } 75 76 static GTY(()) tree vector_inner_type; 77 static GTY(()) tree vector_last_type; 78 static GTY(()) int vector_last_nunits; 79 80 /* Return a suitable vector types made of SUBPARTS units each of mode 81 "word_mode" (the global variable). */ 82 static tree 83 build_word_mode_vector_type (int nunits) 84 { 85 if (!vector_inner_type) 86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); 87 else if (vector_last_nunits == nunits) 88 { 89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); 90 return vector_last_type; 91 } 92 93 /* We build a new type, but we canonicalize it nevertheless, 94 because it still saves some memory. */ 95 vector_last_nunits = nunits; 96 vector_last_type = type_hash_canon (nunits, 97 build_vector_type (vector_inner_type, 98 nunits)); 99 return vector_last_type; 100 } 101 102 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 103 tree, tree, tree, tree, tree, enum tree_code); 104 105 static inline tree 106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 107 tree t, tree bitsize, tree bitpos) 108 { 109 if (bitpos) 110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 111 else 112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 113 } 114 115 static tree 116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 118 enum tree_code code) 119 { 120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 121 return gimplify_build1 (gsi, code, inner_type, a); 122 } 123 124 static tree 125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 126 tree bitpos, tree bitsize, enum tree_code code) 127 { 128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 132 return gimplify_build2 (gsi, code, inner_type, a, b); 133 } 134 135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 136 137 INNER_TYPE is the type of A and B elements 138 139 returned expression is of signed integer type with the 140 size equal to the size of INNER_TYPE. */ 141 static tree 142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 143 tree bitpos, tree bitsize, enum tree_code code) 144 { 145 tree comp_type; 146 147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 149 150 comp_type = build_nonstandard_integer_type 151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0); 152 153 return gimplify_build3 (gsi, COND_EXPR, comp_type, 154 fold_build2 (code, boolean_type_node, a, b), 155 build_int_cst (comp_type, -1), 156 build_int_cst (comp_type, 0)); 157 } 158 159 /* Expand vector addition to scalars. This does bit twiddling 160 in order to increase parallelism: 161 162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ 163 (a ^ b) & 0x80808080 164 165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ 166 (a ^ ~b) & 0x80808080 167 168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) 169 170 This optimization should be done only if 4 vector items or more 171 fit into a word. */ 172 static tree 173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 175 enum tree_code code) 176 { 177 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 178 unsigned HOST_WIDE_INT max; 179 tree low_bits, high_bits, a_low, b_low, result_low, signs; 180 181 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 182 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 184 185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); 186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 187 188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); 189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 190 if (code == PLUS_EXPR) 191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); 192 else 193 { 194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); 195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); 196 } 197 198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); 200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 201 } 202 203 static tree 204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 206 tree bitsize ATTRIBUTE_UNUSED, 207 enum tree_code code ATTRIBUTE_UNUSED) 208 { 209 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 210 HOST_WIDE_INT max; 211 tree low_bits, high_bits, b_low, result_low, signs; 212 213 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 214 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 216 217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 218 219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); 221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); 223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 224 } 225 226 /* Expand a vector operation to scalars, by using many operations 227 whose type is the vector type's inner type. */ 228 static tree 229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 230 tree type, tree inner_type, 231 tree a, tree b, enum tree_code code) 232 { 233 VEC(constructor_elt,gc) *v; 234 tree part_width = TYPE_SIZE (inner_type); 235 tree index = bitsize_int (0); 236 int nunits = TYPE_VECTOR_SUBPARTS (type); 237 int delta = tree_low_cst (part_width, 1) 238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); 239 int i; 240 location_t loc = gimple_location (gsi_stmt (*gsi)); 241 242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) 243 warning_at (loc, OPT_Wvector_operation_performance, 244 "vector operation will be expanded piecewise"); 245 else 246 warning_at (loc, OPT_Wvector_operation_performance, 247 "vector operation will be expanded in parallel"); 248 249 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta); 250 for (i = 0; i < nunits; 251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) 252 { 253 tree result = f (gsi, inner_type, a, b, index, part_width, code); 254 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL); 255 ce->index = NULL_TREE; 256 ce->value = result; 257 } 258 259 return build_constructor (type, v); 260 } 261 262 /* Expand a vector operation to scalars with the freedom to use 263 a scalar integer type, or to use a different size for the items 264 in the vector type. */ 265 static tree 266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 267 tree a, tree b, 268 enum tree_code code) 269 { 270 tree result, compute_type; 271 enum machine_mode mode; 272 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; 273 location_t loc = gimple_location (gsi_stmt (*gsi)); 274 275 /* We have three strategies. If the type is already correct, just do 276 the operation an element at a time. Else, if the vector is wider than 277 one word, do it a word at a time; finally, if the vector is smaller 278 than one word, do it as a scalar. */ 279 if (TYPE_MODE (TREE_TYPE (type)) == word_mode) 280 return expand_vector_piecewise (gsi, f, 281 type, TREE_TYPE (type), 282 a, b, code); 283 else if (n_words > 1) 284 { 285 tree word_type = build_word_mode_vector_type (n_words); 286 result = expand_vector_piecewise (gsi, f, 287 word_type, TREE_TYPE (word_type), 288 a, b, code); 289 result = force_gimple_operand_gsi (gsi, result, true, NULL, true, 290 GSI_SAME_STMT); 291 } 292 else 293 { 294 /* Use a single scalar operation with a mode no wider than word_mode. */ 295 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); 296 compute_type = lang_hooks.types.type_for_mode (mode, 1); 297 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); 298 warning_at (loc, OPT_Wvector_operation_performance, 299 "vector operation will be expanded with a " 300 "single scalar operation"); 301 } 302 303 return result; 304 } 305 306 /* Expand a vector operation to scalars; for integer types we can use 307 special bit twiddling tricks to do the sums a word at a time, using 308 function F_PARALLEL instead of F. These tricks are done only if 309 they can process at least four items, that is, only if the vector 310 holds at least four items and if a word can hold four items. */ 311 static tree 312 expand_vector_addition (gimple_stmt_iterator *gsi, 313 elem_op_func f, elem_op_func f_parallel, 314 tree type, tree a, tree b, enum tree_code code) 315 { 316 int parts_per_word = UNITS_PER_WORD 317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); 318 319 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 320 && parts_per_word >= 4 321 && TYPE_VECTOR_SUBPARTS (type) >= 4) 322 return expand_vector_parallel (gsi, f_parallel, 323 type, a, b, code); 324 else 325 return expand_vector_piecewise (gsi, f, 326 type, TREE_TYPE (type), 327 a, b, code); 328 } 329 330 /* Check if vector VEC consists of all the equal elements and 331 that the number of elements corresponds to the type of VEC. 332 The function returns first element of the vector 333 or NULL_TREE if the vector is not uniform. */ 334 static tree 335 uniform_vector_p (tree vec) 336 { 337 tree first, t, els; 338 unsigned i; 339 340 if (vec == NULL_TREE) 341 return NULL_TREE; 342 343 if (TREE_CODE (vec) == VECTOR_CST) 344 { 345 els = TREE_VECTOR_CST_ELTS (vec); 346 first = TREE_VALUE (els); 347 els = TREE_CHAIN (els); 348 349 for (t = els; t; t = TREE_CHAIN (t)) 350 if (!operand_equal_p (first, TREE_VALUE (t), 0)) 351 return NULL_TREE; 352 353 return first; 354 } 355 356 else if (TREE_CODE (vec) == CONSTRUCTOR) 357 { 358 first = error_mark_node; 359 360 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t) 361 { 362 if (i == 0) 363 { 364 first = t; 365 continue; 366 } 367 if (!operand_equal_p (first, t, 0)) 368 return NULL_TREE; 369 } 370 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))) 371 return NULL_TREE; 372 373 return first; 374 } 375 376 return NULL_TREE; 377 } 378 379 /* Try to expand vector comparison expression OP0 CODE OP1 by 380 querying optab if the following expression: 381 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> 382 can be expanded. */ 383 static tree 384 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, 385 tree op1, enum tree_code code) 386 { 387 tree t; 388 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0))) 389 t = expand_vector_piecewise (gsi, do_compare, type, 390 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); 391 else 392 t = NULL_TREE; 393 394 return t; 395 } 396 397 static tree 398 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 399 gimple assign, enum tree_code code) 400 { 401 enum machine_mode compute_mode = TYPE_MODE (compute_type); 402 403 /* If the compute mode is not a vector mode (hence we are not decomposing 404 a BLKmode vector to smaller, hardware-supported vectors), we may want 405 to expand the operations in parallel. */ 406 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT 407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT 408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT 409 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT 410 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM 411 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) 412 switch (code) 413 { 414 case PLUS_EXPR: 415 case MINUS_EXPR: 416 if (!TYPE_OVERFLOW_TRAPS (type)) 417 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 418 gimple_assign_rhs1 (assign), 419 gimple_assign_rhs2 (assign), code); 420 break; 421 422 case NEGATE_EXPR: 423 if (!TYPE_OVERFLOW_TRAPS (type)) 424 return expand_vector_addition (gsi, do_unop, do_negate, type, 425 gimple_assign_rhs1 (assign), 426 NULL_TREE, code); 427 break; 428 429 case BIT_AND_EXPR: 430 case BIT_IOR_EXPR: 431 case BIT_XOR_EXPR: 432 return expand_vector_parallel (gsi, do_binop, type, 433 gimple_assign_rhs1 (assign), 434 gimple_assign_rhs2 (assign), code); 435 436 case BIT_NOT_EXPR: 437 return expand_vector_parallel (gsi, do_unop, type, 438 gimple_assign_rhs1 (assign), 439 NULL_TREE, code); 440 case EQ_EXPR: 441 case NE_EXPR: 442 case GT_EXPR: 443 case LT_EXPR: 444 case GE_EXPR: 445 case LE_EXPR: 446 case UNEQ_EXPR: 447 case UNGT_EXPR: 448 case UNLT_EXPR: 449 case UNGE_EXPR: 450 case UNLE_EXPR: 451 case LTGT_EXPR: 452 case ORDERED_EXPR: 453 case UNORDERED_EXPR: 454 { 455 tree rhs1 = gimple_assign_rhs1 (assign); 456 tree rhs2 = gimple_assign_rhs2 (assign); 457 458 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); 459 } 460 default: 461 break; 462 } 463 464 if (TREE_CODE_CLASS (code) == tcc_unary) 465 return expand_vector_piecewise (gsi, do_unop, type, compute_type, 466 gimple_assign_rhs1 (assign), 467 NULL_TREE, code); 468 else 469 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 470 gimple_assign_rhs1 (assign), 471 gimple_assign_rhs2 (assign), code); 472 } 473 474 /* Return a type for the widest vector mode whose components are of type 475 TYPE, or NULL_TREE if none is found. */ 476 477 static tree 478 type_for_widest_vector_mode (tree type, optab op) 479 { 480 enum machine_mode inner_mode = TYPE_MODE (type); 481 enum machine_mode best_mode = VOIDmode, mode; 482 int best_nunits = 0; 483 484 if (SCALAR_FLOAT_MODE_P (inner_mode)) 485 mode = MIN_MODE_VECTOR_FLOAT; 486 else if (SCALAR_FRACT_MODE_P (inner_mode)) 487 mode = MIN_MODE_VECTOR_FRACT; 488 else if (SCALAR_UFRACT_MODE_P (inner_mode)) 489 mode = MIN_MODE_VECTOR_UFRACT; 490 else if (SCALAR_ACCUM_MODE_P (inner_mode)) 491 mode = MIN_MODE_VECTOR_ACCUM; 492 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 493 mode = MIN_MODE_VECTOR_UACCUM; 494 else 495 mode = MIN_MODE_VECTOR_INT; 496 497 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 498 if (GET_MODE_INNER (mode) == inner_mode 499 && GET_MODE_NUNITS (mode) > best_nunits 500 && optab_handler (op, mode) != CODE_FOR_nothing) 501 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 502 503 if (best_mode == VOIDmode) 504 return NULL_TREE; 505 else 506 return build_vector_type_for_mode (type, best_mode); 507 } 508 509 510 /* Build a reference to the element of the vector VECT. Function 511 returns either the element itself, either BIT_FIELD_REF, or an 512 ARRAY_REF expression. 513 514 GSI is requred to insert temporary variables while building a 515 refernece to the element of the vector VECT. 516 517 PTMPVEC is a pointer to the temporary variable for caching 518 purposes. In case when PTMPVEC is NULL new temporary variable 519 will be created. */ 520 static tree 521 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) 522 { 523 tree vect_type, vect_elt_type; 524 gimple asgn; 525 tree tmpvec; 526 tree arraytype; 527 bool need_asgn = true; 528 unsigned int elements; 529 530 vect_type = TREE_TYPE (vect); 531 vect_elt_type = TREE_TYPE (vect_type); 532 elements = TYPE_VECTOR_SUBPARTS (vect_type); 533 534 if (TREE_CODE (idx) == INTEGER_CST) 535 { 536 unsigned HOST_WIDE_INT index; 537 538 /* Given that we're about to compute a binary modulus, 539 we don't care about the high bits of the value. */ 540 index = TREE_INT_CST_LOW (idx); 541 if (!host_integerp (idx, 1) || index >= elements) 542 { 543 index &= elements - 1; 544 idx = build_int_cst (TREE_TYPE (idx), index); 545 } 546 547 /* When lowering a vector statement sequence do some easy 548 simplification by looking through intermediate vector results. */ 549 if (TREE_CODE (vect) == SSA_NAME) 550 { 551 gimple def_stmt = SSA_NAME_DEF_STMT (vect); 552 if (is_gimple_assign (def_stmt) 553 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 554 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) 555 vect = gimple_assign_rhs1 (def_stmt); 556 } 557 558 if (TREE_CODE (vect) == VECTOR_CST) 559 { 560 unsigned i; 561 tree vals = TREE_VECTOR_CST_ELTS (vect); 562 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i) 563 if (i == index) 564 return TREE_VALUE (vals); 565 return build_zero_cst (vect_elt_type); 566 } 567 else if (TREE_CODE (vect) == CONSTRUCTOR) 568 { 569 unsigned i; 570 tree elt_i, elt_v; 571 572 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v) 573 if (operand_equal_p (elt_i, idx, 0)) 574 return elt_v; 575 return build_zero_cst (vect_elt_type); 576 } 577 else 578 { 579 tree size = TYPE_SIZE (vect_elt_type); 580 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), 581 size); 582 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); 583 } 584 } 585 586 if (!ptmpvec) 587 tmpvec = create_tmp_var (vect_type, "vectmp"); 588 else if (!*ptmpvec) 589 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); 590 else 591 { 592 tmpvec = *ptmpvec; 593 need_asgn = false; 594 } 595 596 if (need_asgn) 597 { 598 TREE_ADDRESSABLE (tmpvec) = 1; 599 asgn = gimple_build_assign (tmpvec, vect); 600 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); 601 } 602 603 arraytype = build_array_type_nelts (vect_elt_type, elements); 604 return build4 (ARRAY_REF, vect_elt_type, 605 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), 606 idx, NULL_TREE, NULL_TREE); 607 } 608 609 /* Check if VEC_PERM_EXPR within the given setting is supported 610 by hardware, or lower it piecewise. 611 612 When VEC_PERM_EXPR has the same first and second operands: 613 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be 614 {v0[mask[0]], v0[mask[1]], ...} 615 MASK and V0 must have the same number of elements. 616 617 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to 618 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} 619 V0 and V1 must have the same type. MASK, V0, V1 must have the 620 same number of arguments. */ 621 622 static void 623 lower_vec_perm (gimple_stmt_iterator *gsi) 624 { 625 gimple stmt = gsi_stmt (*gsi); 626 tree mask = gimple_assign_rhs3 (stmt); 627 tree vec0 = gimple_assign_rhs1 (stmt); 628 tree vec1 = gimple_assign_rhs2 (stmt); 629 tree vect_type = TREE_TYPE (vec0); 630 tree mask_type = TREE_TYPE (mask); 631 tree vect_elt_type = TREE_TYPE (vect_type); 632 tree mask_elt_type = TREE_TYPE (mask_type); 633 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); 634 VEC(constructor_elt,gc) *v; 635 tree constr, t, si, i_val; 636 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; 637 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); 638 location_t loc = gimple_location (gsi_stmt (*gsi)); 639 unsigned i; 640 641 if (TREE_CODE (mask) == VECTOR_CST) 642 { 643 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); 644 tree vals = TREE_VECTOR_CST_ELTS (mask); 645 646 for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals)) 647 sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1); 648 649 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) 650 return; 651 } 652 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) 653 return; 654 655 warning_at (loc, OPT_Wvector_operation_performance, 656 "vector shuffling operation will be expanded piecewise"); 657 658 v = VEC_alloc (constructor_elt, gc, elements); 659 for (i = 0; i < elements; i++) 660 { 661 si = size_int (i); 662 i_val = vector_element (gsi, mask, si, &masktmp); 663 664 if (TREE_CODE (i_val) == INTEGER_CST) 665 { 666 unsigned HOST_WIDE_INT index; 667 668 index = TREE_INT_CST_LOW (i_val); 669 if (!host_integerp (i_val, 1) || index >= elements) 670 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); 671 672 if (two_operand_p && (index & elements) != 0) 673 t = vector_element (gsi, vec1, i_val, &vec1tmp); 674 else 675 t = vector_element (gsi, vec0, i_val, &vec0tmp); 676 677 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 678 true, GSI_SAME_STMT); 679 } 680 else 681 { 682 tree cond = NULL_TREE, v0_val; 683 684 if (two_operand_p) 685 { 686 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 687 build_int_cst (mask_elt_type, elements)); 688 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 689 true, GSI_SAME_STMT); 690 } 691 692 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 693 build_int_cst (mask_elt_type, elements - 1)); 694 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, 695 true, GSI_SAME_STMT); 696 697 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); 698 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, 699 true, GSI_SAME_STMT); 700 701 if (two_operand_p) 702 { 703 tree v1_val; 704 705 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); 706 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, 707 true, GSI_SAME_STMT); 708 709 cond = fold_build2 (EQ_EXPR, boolean_type_node, 710 cond, build_zero_cst (mask_elt_type)); 711 cond = fold_build3 (COND_EXPR, vect_elt_type, 712 cond, v0_val, v1_val); 713 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 714 true, GSI_SAME_STMT); 715 } 716 else 717 t = v0_val; 718 } 719 720 CONSTRUCTOR_APPEND_ELT (v, si, t); 721 } 722 723 constr = build_constructor (vect_type, v); 724 gimple_assign_set_rhs_from_tree (gsi, constr); 725 update_stmt (gsi_stmt (*gsi)); 726 } 727 728 /* Process one statement. If we identify a vector operation, expand it. */ 729 730 static void 731 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 732 { 733 gimple stmt = gsi_stmt (*gsi); 734 tree lhs, rhs1, rhs2 = NULL, type, compute_type; 735 enum tree_code code; 736 enum machine_mode compute_mode; 737 optab op = NULL; 738 enum gimple_rhs_class rhs_class; 739 tree new_rhs; 740 741 if (gimple_code (stmt) != GIMPLE_ASSIGN) 742 return; 743 744 code = gimple_assign_rhs_code (stmt); 745 rhs_class = get_gimple_rhs_class (code); 746 lhs = gimple_assign_lhs (stmt); 747 748 if (code == VEC_PERM_EXPR) 749 { 750 lower_vec_perm (gsi); 751 return; 752 } 753 754 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 755 return; 756 757 rhs1 = gimple_assign_rhs1 (stmt); 758 type = gimple_expr_type (stmt); 759 if (rhs_class == GIMPLE_BINARY_RHS) 760 rhs2 = gimple_assign_rhs2 (stmt); 761 762 if (TREE_CODE (type) != VECTOR_TYPE) 763 return; 764 765 if (code == NOP_EXPR 766 || code == FLOAT_EXPR 767 || code == FIX_TRUNC_EXPR 768 || code == VIEW_CONVERT_EXPR) 769 return; 770 771 gcc_assert (code != CONVERT_EXPR); 772 773 /* The signedness is determined from input argument. */ 774 if (code == VEC_UNPACK_FLOAT_HI_EXPR 775 || code == VEC_UNPACK_FLOAT_LO_EXPR) 776 type = TREE_TYPE (rhs1); 777 778 /* Choose between vector shift/rotate by vector and vector shift/rotate by 779 scalar */ 780 if (code == LSHIFT_EXPR 781 || code == RSHIFT_EXPR 782 || code == LROTATE_EXPR 783 || code == RROTATE_EXPR) 784 { 785 optab opv; 786 787 /* Check whether we have vector <op> {x,x,x,x} where x 788 could be a scalar variable or a constant. Transform 789 vector <op> {x,x,x,x} ==> vector <op> scalar. */ 790 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 791 { 792 tree first; 793 gimple def_stmt; 794 795 if ((TREE_CODE (rhs2) == VECTOR_CST 796 && (first = uniform_vector_p (rhs2)) != NULL_TREE) 797 || (TREE_CODE (rhs2) == SSA_NAME 798 && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) 799 && gimple_assign_single_p (def_stmt) 800 && (first = uniform_vector_p 801 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)) 802 { 803 gimple_assign_set_rhs2 (stmt, first); 804 update_stmt (stmt); 805 rhs2 = first; 806 } 807 } 808 809 opv = optab_for_tree_code (code, type, optab_vector); 810 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 811 op = opv; 812 else 813 { 814 op = optab_for_tree_code (code, type, optab_scalar); 815 816 /* The rtl expander will expand vector/scalar as vector/vector 817 if necessary. Don't bother converting the stmt here. */ 818 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing 819 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing) 820 return; 821 } 822 } 823 else 824 op = optab_for_tree_code (code, type, optab_default); 825 826 /* For widening/narrowing vector operations, the relevant type is of the 827 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is 828 calculated in the same way above. */ 829 if (code == WIDEN_SUM_EXPR 830 || code == VEC_WIDEN_MULT_HI_EXPR 831 || code == VEC_WIDEN_MULT_LO_EXPR 832 || code == VEC_UNPACK_HI_EXPR 833 || code == VEC_UNPACK_LO_EXPR 834 || code == VEC_PACK_TRUNC_EXPR 835 || code == VEC_PACK_SAT_EXPR 836 || code == VEC_PACK_FIX_TRUNC_EXPR 837 || code == VEC_WIDEN_LSHIFT_HI_EXPR 838 || code == VEC_WIDEN_LSHIFT_LO_EXPR) 839 type = TREE_TYPE (rhs1); 840 841 /* Optabs will try converting a negation into a subtraction, so 842 look for it as well. TODO: negation of floating-point vectors 843 might be turned into an exclusive OR toggling the sign bit. */ 844 if (op == NULL 845 && code == NEGATE_EXPR 846 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 847 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 848 849 /* For very wide vectors, try using a smaller vector mode. */ 850 compute_type = type; 851 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op) 852 { 853 tree vector_compute_type 854 = type_for_widest_vector_mode (TREE_TYPE (type), op); 855 if (vector_compute_type != NULL_TREE 856 && (TYPE_VECTOR_SUBPARTS (vector_compute_type) 857 < TYPE_VECTOR_SUBPARTS (compute_type)) 858 && (optab_handler (op, TYPE_MODE (vector_compute_type)) 859 != CODE_FOR_nothing)) 860 compute_type = vector_compute_type; 861 } 862 863 /* If we are breaking a BLKmode vector into smaller pieces, 864 type_for_widest_vector_mode has already looked into the optab, 865 so skip these checks. */ 866 if (compute_type == type) 867 { 868 compute_mode = TYPE_MODE (compute_type); 869 if (VECTOR_MODE_P (compute_mode) 870 && op != NULL 871 && optab_handler (op, compute_mode) != CODE_FOR_nothing) 872 return; 873 else 874 /* There is no operation in hardware, so fall back to scalars. */ 875 compute_type = TREE_TYPE (type); 876 } 877 878 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); 879 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 880 881 /* Leave expression untouched for later expansion. */ 882 if (new_rhs == NULL_TREE) 883 return; 884 885 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 886 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 887 new_rhs); 888 889 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 890 way to do it is change expand_vector_operation and its callees to 891 return a tree_code, RHS1 and RHS2 instead of a tree. */ 892 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 893 update_stmt (gsi_stmt (*gsi)); 894 } 895 896 /* Use this to lower vector operations introduced by the vectorizer, 897 if it may need the bit-twiddling tricks implemented in this file. */ 898 899 static bool 900 gate_expand_vector_operations_ssa (void) 901 { 902 return optimize == 0; 903 } 904 905 static unsigned int 906 expand_vector_operations (void) 907 { 908 gimple_stmt_iterator gsi; 909 basic_block bb; 910 bool cfg_changed = false; 911 912 FOR_EACH_BB (bb) 913 { 914 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 915 { 916 expand_vector_operations_1 (&gsi); 917 /* ??? If we do not cleanup EH then we will ICE in 918 verification. But in reality we have created wrong-code 919 as we did not properly transition EH info and edges to 920 the piecewise computations. */ 921 if (maybe_clean_eh_stmt (gsi_stmt (gsi)) 922 && gimple_purge_dead_eh_edges (bb)) 923 cfg_changed = true; 924 } 925 } 926 927 return cfg_changed ? TODO_cleanup_cfg : 0; 928 } 929 930 struct gimple_opt_pass pass_lower_vector = 931 { 932 { 933 GIMPLE_PASS, 934 "veclower", /* name */ 935 gate_expand_vector_operations_ssa, /* gate */ 936 expand_vector_operations, /* execute */ 937 NULL, /* sub */ 938 NULL, /* next */ 939 0, /* static_pass_number */ 940 TV_NONE, /* tv_id */ 941 PROP_cfg, /* properties_required */ 942 0, /* properties_provided */ 943 0, /* properties_destroyed */ 944 0, /* todo_flags_start */ 945 TODO_update_ssa /* todo_flags_finish */ 946 | TODO_verify_ssa 947 | TODO_verify_stmts | TODO_verify_flow 948 | TODO_cleanup_cfg 949 } 950 }; 951 952 struct gimple_opt_pass pass_lower_vector_ssa = 953 { 954 { 955 GIMPLE_PASS, 956 "veclower2", /* name */ 957 0, /* gate */ 958 expand_vector_operations, /* execute */ 959 NULL, /* sub */ 960 NULL, /* next */ 961 0, /* static_pass_number */ 962 TV_NONE, /* tv_id */ 963 PROP_cfg, /* properties_required */ 964 0, /* properties_provided */ 965 0, /* properties_destroyed */ 966 0, /* todo_flags_start */ 967 TODO_update_ssa /* todo_flags_finish */ 968 | TODO_verify_ssa 969 | TODO_verify_stmts | TODO_verify_flow 970 | TODO_cleanup_cfg 971 } 972 }; 973 974 #include "gt-tree-vect-generic.h" 975