138fd1498Szrj /* Lower vector operations to scalar operations.
238fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it
738fd1498Szrj under the terms of the GNU General Public License as published by the
838fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
938fd1498Szrj later version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1238fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "backend.h"
2438fd1498Szrj #include "rtl.h"
2538fd1498Szrj #include "tree.h"
2638fd1498Szrj #include "gimple.h"
2738fd1498Szrj #include "tree-pass.h"
2838fd1498Szrj #include "ssa.h"
2938fd1498Szrj #include "expmed.h"
3038fd1498Szrj #include "optabs-tree.h"
3138fd1498Szrj #include "diagnostic.h"
3238fd1498Szrj #include "fold-const.h"
3338fd1498Szrj #include "stor-layout.h"
3438fd1498Szrj #include "langhooks.h"
3538fd1498Szrj #include "tree-eh.h"
3638fd1498Szrj #include "gimple-iterator.h"
3738fd1498Szrj #include "gimplify-me.h"
3838fd1498Szrj #include "gimplify.h"
3938fd1498Szrj #include "tree-cfg.h"
4038fd1498Szrj #include "tree-vector-builder.h"
4138fd1498Szrj #include "vec-perm-indices.h"
4238fd1498Szrj
4338fd1498Szrj
4438fd1498Szrj static void expand_vector_operations_1 (gimple_stmt_iterator *);
4538fd1498Szrj
4638fd1498Szrj /* Return the number of elements in a vector type TYPE that we have
4738fd1498Szrj already decided needs to be expanded piecewise. We don't support
4838fd1498Szrj this kind of expansion for variable-length vectors, since we should
4938fd1498Szrj always check for target support before introducing uses of those. */
5038fd1498Szrj static unsigned int
nunits_for_known_piecewise_op(const_tree type)5138fd1498Szrj nunits_for_known_piecewise_op (const_tree type)
5238fd1498Szrj {
5338fd1498Szrj return TYPE_VECTOR_SUBPARTS (type).to_constant ();
5438fd1498Szrj }
5538fd1498Szrj
5638fd1498Szrj /* Return true if TYPE1 has more elements than TYPE2, where either
5738fd1498Szrj type may be a vector or a scalar. */
5838fd1498Szrj
5938fd1498Szrj static inline bool
subparts_gt(tree type1,tree type2)6038fd1498Szrj subparts_gt (tree type1, tree type2)
6138fd1498Szrj {
6238fd1498Szrj poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
6338fd1498Szrj poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
6438fd1498Szrj return known_gt (n1, n2);
6538fd1498Szrj }
6638fd1498Szrj
6738fd1498Szrj /* Build a constant of type TYPE, made of VALUE's bits replicated
6838fd1498Szrj every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
6938fd1498Szrj static tree
build_replicated_const(tree type,tree inner_type,HOST_WIDE_INT value)7038fd1498Szrj build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
7138fd1498Szrj {
7238fd1498Szrj int width = tree_to_uhwi (TYPE_SIZE (inner_type));
7338fd1498Szrj int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
7438fd1498Szrj / HOST_BITS_PER_WIDE_INT;
7538fd1498Szrj unsigned HOST_WIDE_INT low, mask;
7638fd1498Szrj HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
7738fd1498Szrj int i;
7838fd1498Szrj
7938fd1498Szrj gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
8038fd1498Szrj
8138fd1498Szrj if (width == HOST_BITS_PER_WIDE_INT)
8238fd1498Szrj low = value;
8338fd1498Szrj else
8438fd1498Szrj {
8538fd1498Szrj mask = ((HOST_WIDE_INT)1 << width) - 1;
8638fd1498Szrj low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
8738fd1498Szrj }
8838fd1498Szrj
8938fd1498Szrj for (i = 0; i < n; i++)
9038fd1498Szrj a[i] = low;
9138fd1498Szrj
9238fd1498Szrj gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
9338fd1498Szrj return wide_int_to_tree
9438fd1498Szrj (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
9538fd1498Szrj }
9638fd1498Szrj
9738fd1498Szrj static GTY(()) tree vector_inner_type;
9838fd1498Szrj static GTY(()) tree vector_last_type;
9938fd1498Szrj static GTY(()) int vector_last_nunits;
10038fd1498Szrj
10138fd1498Szrj /* Return a suitable vector types made of SUBPARTS units each of mode
10238fd1498Szrj "word_mode" (the global variable). */
10338fd1498Szrj static tree
build_word_mode_vector_type(int nunits)10438fd1498Szrj build_word_mode_vector_type (int nunits)
10538fd1498Szrj {
10638fd1498Szrj if (!vector_inner_type)
10738fd1498Szrj vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
10838fd1498Szrj else if (vector_last_nunits == nunits)
10938fd1498Szrj {
11038fd1498Szrj gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
11138fd1498Szrj return vector_last_type;
11238fd1498Szrj }
11338fd1498Szrj
11438fd1498Szrj vector_last_nunits = nunits;
115*58e805e6Szrj vector_last_type = build_vector_type (vector_inner_type, nunits);
11638fd1498Szrj return vector_last_type;
11738fd1498Szrj }
11838fd1498Szrj
11938fd1498Szrj typedef tree (*elem_op_func) (gimple_stmt_iterator *,
12038fd1498Szrj tree, tree, tree, tree, tree, enum tree_code,
12138fd1498Szrj tree);
12238fd1498Szrj
12338fd1498Szrj static inline tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)12438fd1498Szrj tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
12538fd1498Szrj tree t, tree bitsize, tree bitpos)
12638fd1498Szrj {
12738fd1498Szrj if (TREE_CODE (t) == SSA_NAME)
12838fd1498Szrj {
12938fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (t);
13038fd1498Szrj if (is_gimple_assign (def_stmt)
13138fd1498Szrj && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
13238fd1498Szrj || (bitpos
13338fd1498Szrj && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
13438fd1498Szrj t = gimple_assign_rhs1 (def_stmt);
13538fd1498Szrj }
13638fd1498Szrj if (bitpos)
13738fd1498Szrj {
13838fd1498Szrj if (TREE_CODE (type) == BOOLEAN_TYPE)
13938fd1498Szrj {
14038fd1498Szrj tree itype
14138fd1498Szrj = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
14238fd1498Szrj tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
14338fd1498Szrj bitsize, bitpos);
14438fd1498Szrj return gimplify_build2 (gsi, NE_EXPR, type, field,
14538fd1498Szrj build_zero_cst (itype));
14638fd1498Szrj }
14738fd1498Szrj else
14838fd1498Szrj return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
14938fd1498Szrj }
15038fd1498Szrj else
15138fd1498Szrj return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
15238fd1498Szrj }
15338fd1498Szrj
15438fd1498Szrj static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)15538fd1498Szrj do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
15638fd1498Szrj tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
15738fd1498Szrj enum tree_code code, tree type ATTRIBUTE_UNUSED)
15838fd1498Szrj {
15938fd1498Szrj a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
16038fd1498Szrj return gimplify_build1 (gsi, code, inner_type, a);
16138fd1498Szrj }
16238fd1498Szrj
16338fd1498Szrj static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)16438fd1498Szrj do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
16538fd1498Szrj tree bitpos, tree bitsize, enum tree_code code,
16638fd1498Szrj tree type ATTRIBUTE_UNUSED)
16738fd1498Szrj {
16838fd1498Szrj if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
16938fd1498Szrj a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
17038fd1498Szrj if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
17138fd1498Szrj b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
17238fd1498Szrj return gimplify_build2 (gsi, code, inner_type, a, b);
17338fd1498Szrj }
17438fd1498Szrj
17538fd1498Szrj /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
17638fd1498Szrj
17738fd1498Szrj INNER_TYPE is the type of A and B elements
17838fd1498Szrj
17938fd1498Szrj returned expression is of signed integer type with the
18038fd1498Szrj size equal to the size of INNER_TYPE. */
18138fd1498Szrj static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)18238fd1498Szrj do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
18338fd1498Szrj tree bitpos, tree bitsize, enum tree_code code, tree type)
18438fd1498Szrj {
18538fd1498Szrj tree stype = TREE_TYPE (type);
18638fd1498Szrj tree cst_false = build_zero_cst (stype);
18738fd1498Szrj tree cst_true = build_all_ones_cst (stype);
18838fd1498Szrj tree cmp;
18938fd1498Szrj
19038fd1498Szrj a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
19138fd1498Szrj b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
19238fd1498Szrj
19338fd1498Szrj cmp = build2 (code, boolean_type_node, a, b);
19438fd1498Szrj return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
19538fd1498Szrj }
19638fd1498Szrj
19738fd1498Szrj /* Expand vector addition to scalars. This does bit twiddling
19838fd1498Szrj in order to increase parallelism:
19938fd1498Szrj
20038fd1498Szrj a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
20138fd1498Szrj (a ^ b) & 0x80808080
20238fd1498Szrj
20338fd1498Szrj a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
20438fd1498Szrj (a ^ ~b) & 0x80808080
20538fd1498Szrj
20638fd1498Szrj -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
20738fd1498Szrj
20838fd1498Szrj This optimization should be done only if 4 vector items or more
20938fd1498Szrj fit into a word. */
21038fd1498Szrj static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)21138fd1498Szrj do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
21238fd1498Szrj tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
21338fd1498Szrj enum tree_code code, tree type ATTRIBUTE_UNUSED)
21438fd1498Szrj {
21538fd1498Szrj tree inner_type = TREE_TYPE (TREE_TYPE (a));
21638fd1498Szrj unsigned HOST_WIDE_INT max;
21738fd1498Szrj tree low_bits, high_bits, a_low, b_low, result_low, signs;
21838fd1498Szrj
21938fd1498Szrj max = GET_MODE_MASK (TYPE_MODE (inner_type));
22038fd1498Szrj low_bits = build_replicated_const (word_type, inner_type, max >> 1);
22138fd1498Szrj high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
22238fd1498Szrj
22338fd1498Szrj a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
22438fd1498Szrj b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
22538fd1498Szrj
22638fd1498Szrj signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
22738fd1498Szrj b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
22838fd1498Szrj if (code == PLUS_EXPR)
22938fd1498Szrj a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
23038fd1498Szrj else
23138fd1498Szrj {
23238fd1498Szrj a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
23338fd1498Szrj signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
23438fd1498Szrj }
23538fd1498Szrj
23638fd1498Szrj signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
23738fd1498Szrj result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
23838fd1498Szrj return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
23938fd1498Szrj }
24038fd1498Szrj
24138fd1498Szrj static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)24238fd1498Szrj do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
24338fd1498Szrj tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
24438fd1498Szrj tree bitsize ATTRIBUTE_UNUSED,
24538fd1498Szrj enum tree_code code ATTRIBUTE_UNUSED,
24638fd1498Szrj tree type ATTRIBUTE_UNUSED)
24738fd1498Szrj {
24838fd1498Szrj tree inner_type = TREE_TYPE (TREE_TYPE (b));
24938fd1498Szrj HOST_WIDE_INT max;
25038fd1498Szrj tree low_bits, high_bits, b_low, result_low, signs;
25138fd1498Szrj
25238fd1498Szrj max = GET_MODE_MASK (TYPE_MODE (inner_type));
25338fd1498Szrj low_bits = build_replicated_const (word_type, inner_type, max >> 1);
25438fd1498Szrj high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
25538fd1498Szrj
25638fd1498Szrj b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
25738fd1498Szrj
25838fd1498Szrj b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
25938fd1498Szrj signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
26038fd1498Szrj signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
26138fd1498Szrj result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
26238fd1498Szrj return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
26338fd1498Szrj }
26438fd1498Szrj
26538fd1498Szrj /* Expand a vector operation to scalars, by using many operations
26638fd1498Szrj whose type is the vector type's inner type. */
26738fd1498Szrj static tree
expand_vector_piecewise(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree inner_type,tree a,tree b,enum tree_code code)26838fd1498Szrj expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
26938fd1498Szrj tree type, tree inner_type,
27038fd1498Szrj tree a, tree b, enum tree_code code)
27138fd1498Szrj {
27238fd1498Szrj vec<constructor_elt, va_gc> *v;
27338fd1498Szrj tree part_width = TYPE_SIZE (inner_type);
27438fd1498Szrj tree index = bitsize_int (0);
27538fd1498Szrj int nunits = nunits_for_known_piecewise_op (type);
27638fd1498Szrj int delta = tree_to_uhwi (part_width)
27738fd1498Szrj / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
27838fd1498Szrj int i;
27938fd1498Szrj location_t loc = gimple_location (gsi_stmt (*gsi));
28038fd1498Szrj
28138fd1498Szrj if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
28238fd1498Szrj warning_at (loc, OPT_Wvector_operation_performance,
28338fd1498Szrj "vector operation will be expanded piecewise");
28438fd1498Szrj else
28538fd1498Szrj warning_at (loc, OPT_Wvector_operation_performance,
28638fd1498Szrj "vector operation will be expanded in parallel");
28738fd1498Szrj
28838fd1498Szrj vec_alloc (v, (nunits + delta - 1) / delta);
28938fd1498Szrj for (i = 0; i < nunits;
29038fd1498Szrj i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
29138fd1498Szrj {
29238fd1498Szrj tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
29338fd1498Szrj constructor_elt ce = {NULL_TREE, result};
29438fd1498Szrj v->quick_push (ce);
29538fd1498Szrj }
29638fd1498Szrj
29738fd1498Szrj return build_constructor (type, v);
29838fd1498Szrj }
29938fd1498Szrj
30038fd1498Szrj /* Expand a vector operation to scalars with the freedom to use
30138fd1498Szrj a scalar integer type, or to use a different size for the items
30238fd1498Szrj in the vector type. */
30338fd1498Szrj static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)30438fd1498Szrj expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
30538fd1498Szrj tree a, tree b,
30638fd1498Szrj enum tree_code code)
30738fd1498Szrj {
30838fd1498Szrj tree result, compute_type;
30938fd1498Szrj int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
31038fd1498Szrj location_t loc = gimple_location (gsi_stmt (*gsi));
31138fd1498Szrj
31238fd1498Szrj /* We have three strategies. If the type is already correct, just do
31338fd1498Szrj the operation an element at a time. Else, if the vector is wider than
31438fd1498Szrj one word, do it a word at a time; finally, if the vector is smaller
31538fd1498Szrj than one word, do it as a scalar. */
31638fd1498Szrj if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
31738fd1498Szrj return expand_vector_piecewise (gsi, f,
31838fd1498Szrj type, TREE_TYPE (type),
31938fd1498Szrj a, b, code);
32038fd1498Szrj else if (n_words > 1)
32138fd1498Szrj {
32238fd1498Szrj tree word_type = build_word_mode_vector_type (n_words);
32338fd1498Szrj result = expand_vector_piecewise (gsi, f,
32438fd1498Szrj word_type, TREE_TYPE (word_type),
32538fd1498Szrj a, b, code);
32638fd1498Szrj result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
32738fd1498Szrj GSI_SAME_STMT);
32838fd1498Szrj }
32938fd1498Szrj else
33038fd1498Szrj {
33138fd1498Szrj /* Use a single scalar operation with a mode no wider than word_mode. */
33238fd1498Szrj scalar_int_mode mode
33338fd1498Szrj = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
33438fd1498Szrj compute_type = lang_hooks.types.type_for_mode (mode, 1);
33538fd1498Szrj result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
33638fd1498Szrj warning_at (loc, OPT_Wvector_operation_performance,
33738fd1498Szrj "vector operation will be expanded with a "
33838fd1498Szrj "single scalar operation");
33938fd1498Szrj }
34038fd1498Szrj
34138fd1498Szrj return result;
34238fd1498Szrj }
34338fd1498Szrj
34438fd1498Szrj /* Expand a vector operation to scalars; for integer types we can use
34538fd1498Szrj special bit twiddling tricks to do the sums a word at a time, using
34638fd1498Szrj function F_PARALLEL instead of F. These tricks are done only if
34738fd1498Szrj they can process at least four items, that is, only if the vector
34838fd1498Szrj holds at least four items and if a word can hold four items. */
34938fd1498Szrj static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)35038fd1498Szrj expand_vector_addition (gimple_stmt_iterator *gsi,
35138fd1498Szrj elem_op_func f, elem_op_func f_parallel,
35238fd1498Szrj tree type, tree a, tree b, enum tree_code code)
35338fd1498Szrj {
35438fd1498Szrj int parts_per_word = UNITS_PER_WORD
35538fd1498Szrj / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
35638fd1498Szrj
35738fd1498Szrj if (INTEGRAL_TYPE_P (TREE_TYPE (type))
35838fd1498Szrj && parts_per_word >= 4
35938fd1498Szrj && nunits_for_known_piecewise_op (type) >= 4)
36038fd1498Szrj return expand_vector_parallel (gsi, f_parallel,
36138fd1498Szrj type, a, b, code);
36238fd1498Szrj else
36338fd1498Szrj return expand_vector_piecewise (gsi, f,
36438fd1498Szrj type, TREE_TYPE (type),
36538fd1498Szrj a, b, code);
36638fd1498Szrj }
36738fd1498Szrj
36838fd1498Szrj /* Try to expand vector comparison expression OP0 CODE OP1 by
36938fd1498Szrj querying optab if the following expression:
37038fd1498Szrj VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
37138fd1498Szrj can be expanded. */
37238fd1498Szrj static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)37338fd1498Szrj expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
37438fd1498Szrj tree op1, enum tree_code code)
37538fd1498Szrj {
37638fd1498Szrj tree t;
37738fd1498Szrj if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
37838fd1498Szrj && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
37938fd1498Szrj t = expand_vector_piecewise (gsi, do_compare, type,
38038fd1498Szrj TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
38138fd1498Szrj else
38238fd1498Szrj t = NULL_TREE;
38338fd1498Szrj
38438fd1498Szrj return t;
38538fd1498Szrj }
38638fd1498Szrj
38738fd1498Szrj /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
38838fd1498Szrj of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
38938fd1498Szrj the result if successful, otherwise return NULL_TREE. */
39038fd1498Szrj static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)39138fd1498Szrj add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
39238fd1498Szrj {
39338fd1498Szrj optab op;
39438fd1498Szrj unsigned int i, nunits = nunits_for_known_piecewise_op (type);
39538fd1498Szrj bool scalar_shift = true;
39638fd1498Szrj
39738fd1498Szrj for (i = 1; i < nunits; i++)
39838fd1498Szrj {
39938fd1498Szrj if (shiftcnts[i] != shiftcnts[0])
40038fd1498Szrj scalar_shift = false;
40138fd1498Szrj }
40238fd1498Szrj
40338fd1498Szrj if (scalar_shift && shiftcnts[0] == 0)
40438fd1498Szrj return op0;
40538fd1498Szrj
40638fd1498Szrj if (scalar_shift)
40738fd1498Szrj {
40838fd1498Szrj op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
40938fd1498Szrj if (op != unknown_optab
41038fd1498Szrj && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
41138fd1498Szrj return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
41238fd1498Szrj build_int_cst (NULL_TREE, shiftcnts[0]));
41338fd1498Szrj }
41438fd1498Szrj
41538fd1498Szrj op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
41638fd1498Szrj if (op != unknown_optab
41738fd1498Szrj && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
41838fd1498Szrj {
41938fd1498Szrj tree_vector_builder vec (type, nunits, 1);
42038fd1498Szrj for (i = 0; i < nunits; i++)
42138fd1498Szrj vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
42238fd1498Szrj return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
42338fd1498Szrj }
42438fd1498Szrj
42538fd1498Szrj return NULL_TREE;
42638fd1498Szrj }
42738fd1498Szrj
42838fd1498Szrj /* Try to expand integer vector division by constant using
42938fd1498Szrj widening multiply, shifts and additions. */
43038fd1498Szrj static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)43138fd1498Szrj expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
43238fd1498Szrj tree op1, enum tree_code code)
43338fd1498Szrj {
43438fd1498Szrj bool use_pow2 = true;
43538fd1498Szrj bool has_vector_shift = true;
43638fd1498Szrj int mode = -1, this_mode;
43738fd1498Szrj int pre_shift = -1, post_shift;
43838fd1498Szrj unsigned int nunits = nunits_for_known_piecewise_op (type);
43938fd1498Szrj int *shifts = XALLOCAVEC (int, nunits * 4);
44038fd1498Szrj int *pre_shifts = shifts + nunits;
44138fd1498Szrj int *post_shifts = pre_shifts + nunits;
44238fd1498Szrj int *shift_temps = post_shifts + nunits;
44338fd1498Szrj unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
44438fd1498Szrj int prec = TYPE_PRECISION (TREE_TYPE (type));
44538fd1498Szrj int dummy_int;
44638fd1498Szrj unsigned int i;
44738fd1498Szrj signop sign_p = TYPE_SIGN (TREE_TYPE (type));
44838fd1498Szrj unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
44938fd1498Szrj tree cur_op, mulcst, tem;
45038fd1498Szrj optab op;
45138fd1498Szrj
45238fd1498Szrj if (prec > HOST_BITS_PER_WIDE_INT)
45338fd1498Szrj return NULL_TREE;
45438fd1498Szrj
45538fd1498Szrj op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
45638fd1498Szrj if (op == unknown_optab
45738fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
45838fd1498Szrj has_vector_shift = false;
45938fd1498Szrj
46038fd1498Szrj /* Analysis phase. Determine if all op1 elements are either power
46138fd1498Szrj of two and it is possible to expand it using shifts (or for remainder
46238fd1498Szrj using masking). Additionally compute the multiplicative constants
46338fd1498Szrj and pre and post shifts if the division is to be expanded using
46438fd1498Szrj widening or high part multiplication plus shifts. */
46538fd1498Szrj for (i = 0; i < nunits; i++)
46638fd1498Szrj {
46738fd1498Szrj tree cst = VECTOR_CST_ELT (op1, i);
46838fd1498Szrj unsigned HOST_WIDE_INT ml;
46938fd1498Szrj
47038fd1498Szrj if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
47138fd1498Szrj return NULL_TREE;
47238fd1498Szrj pre_shifts[i] = 0;
47338fd1498Szrj post_shifts[i] = 0;
47438fd1498Szrj mulc[i] = 0;
47538fd1498Szrj if (use_pow2
47638fd1498Szrj && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
47738fd1498Szrj use_pow2 = false;
47838fd1498Szrj if (use_pow2)
47938fd1498Szrj {
48038fd1498Szrj shifts[i] = tree_log2 (cst);
48138fd1498Szrj if (shifts[i] != shifts[0]
48238fd1498Szrj && code == TRUNC_DIV_EXPR
48338fd1498Szrj && !has_vector_shift)
48438fd1498Szrj use_pow2 = false;
48538fd1498Szrj }
48638fd1498Szrj if (mode == -2)
48738fd1498Szrj continue;
48838fd1498Szrj if (sign_p == UNSIGNED)
48938fd1498Szrj {
49038fd1498Szrj unsigned HOST_WIDE_INT mh;
49138fd1498Szrj unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
49238fd1498Szrj
49338fd1498Szrj if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
49438fd1498Szrj /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
49538fd1498Szrj return NULL_TREE;
49638fd1498Szrj
49738fd1498Szrj if (d <= 1)
49838fd1498Szrj {
49938fd1498Szrj mode = -2;
50038fd1498Szrj continue;
50138fd1498Szrj }
50238fd1498Szrj
50338fd1498Szrj /* Find a suitable multiplier and right shift count
50438fd1498Szrj instead of multiplying with D. */
50538fd1498Szrj mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
50638fd1498Szrj
50738fd1498Szrj /* If the suggested multiplier is more than SIZE bits, we can
50838fd1498Szrj do better for even divisors, using an initial right shift. */
50938fd1498Szrj if ((mh != 0 && (d & 1) == 0)
51038fd1498Szrj || (!has_vector_shift && pre_shift != -1))
51138fd1498Szrj {
51238fd1498Szrj if (has_vector_shift)
51338fd1498Szrj pre_shift = ctz_or_zero (d);
51438fd1498Szrj else if (pre_shift == -1)
51538fd1498Szrj {
51638fd1498Szrj unsigned int j;
51738fd1498Szrj for (j = 0; j < nunits; j++)
51838fd1498Szrj {
51938fd1498Szrj tree cst2 = VECTOR_CST_ELT (op1, j);
52038fd1498Szrj unsigned HOST_WIDE_INT d2;
52138fd1498Szrj int this_pre_shift;
52238fd1498Szrj
52338fd1498Szrj if (!tree_fits_uhwi_p (cst2))
52438fd1498Szrj return NULL_TREE;
52538fd1498Szrj d2 = tree_to_uhwi (cst2) & mask;
52638fd1498Szrj if (d2 == 0)
52738fd1498Szrj return NULL_TREE;
52838fd1498Szrj this_pre_shift = floor_log2 (d2 & -d2);
52938fd1498Szrj if (pre_shift == -1 || this_pre_shift < pre_shift)
53038fd1498Szrj pre_shift = this_pre_shift;
53138fd1498Szrj }
53238fd1498Szrj if (i != 0 && pre_shift != 0)
53338fd1498Szrj {
53438fd1498Szrj /* Restart. */
53538fd1498Szrj i = -1U;
53638fd1498Szrj mode = -1;
53738fd1498Szrj continue;
53838fd1498Szrj }
53938fd1498Szrj }
54038fd1498Szrj if (pre_shift != 0)
54138fd1498Szrj {
54238fd1498Szrj if ((d >> pre_shift) <= 1)
54338fd1498Szrj {
54438fd1498Szrj mode = -2;
54538fd1498Szrj continue;
54638fd1498Szrj }
54738fd1498Szrj mh = choose_multiplier (d >> pre_shift, prec,
54838fd1498Szrj prec - pre_shift,
54938fd1498Szrj &ml, &post_shift, &dummy_int);
55038fd1498Szrj gcc_assert (!mh);
55138fd1498Szrj pre_shifts[i] = pre_shift;
55238fd1498Szrj }
55338fd1498Szrj }
55438fd1498Szrj if (!mh)
55538fd1498Szrj this_mode = 0;
55638fd1498Szrj else
55738fd1498Szrj this_mode = 1;
55838fd1498Szrj }
55938fd1498Szrj else
56038fd1498Szrj {
56138fd1498Szrj HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
56238fd1498Szrj unsigned HOST_WIDE_INT abs_d;
56338fd1498Szrj
56438fd1498Szrj if (d == -1)
56538fd1498Szrj return NULL_TREE;
56638fd1498Szrj
56738fd1498Szrj /* Since d might be INT_MIN, we have to cast to
56838fd1498Szrj unsigned HOST_WIDE_INT before negating to avoid
56938fd1498Szrj undefined signed overflow. */
57038fd1498Szrj abs_d = (d >= 0
57138fd1498Szrj ? (unsigned HOST_WIDE_INT) d
57238fd1498Szrj : - (unsigned HOST_WIDE_INT) d);
57338fd1498Szrj
57438fd1498Szrj /* n rem d = n rem -d */
57538fd1498Szrj if (code == TRUNC_MOD_EXPR && d < 0)
57638fd1498Szrj d = abs_d;
57738fd1498Szrj else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
57838fd1498Szrj {
57938fd1498Szrj /* This case is not handled correctly below. */
58038fd1498Szrj mode = -2;
58138fd1498Szrj continue;
58238fd1498Szrj }
58338fd1498Szrj if (abs_d <= 1)
58438fd1498Szrj {
58538fd1498Szrj mode = -2;
58638fd1498Szrj continue;
58738fd1498Szrj }
58838fd1498Szrj
58938fd1498Szrj choose_multiplier (abs_d, prec, prec - 1, &ml,
59038fd1498Szrj &post_shift, &dummy_int);
59138fd1498Szrj if (ml >= HOST_WIDE_INT_1U << (prec - 1))
59238fd1498Szrj {
59338fd1498Szrj this_mode = 4 + (d < 0);
59438fd1498Szrj ml |= HOST_WIDE_INT_M1U << (prec - 1);
59538fd1498Szrj }
59638fd1498Szrj else
59738fd1498Szrj this_mode = 2 + (d < 0);
59838fd1498Szrj }
59938fd1498Szrj mulc[i] = ml;
60038fd1498Szrj post_shifts[i] = post_shift;
60138fd1498Szrj if ((i && !has_vector_shift && post_shifts[0] != post_shift)
60238fd1498Szrj || post_shift >= prec
60338fd1498Szrj || pre_shifts[i] >= prec)
60438fd1498Szrj this_mode = -2;
60538fd1498Szrj
60638fd1498Szrj if (i == 0)
60738fd1498Szrj mode = this_mode;
60838fd1498Szrj else if (mode != this_mode)
60938fd1498Szrj mode = -2;
61038fd1498Szrj }
61138fd1498Szrj
61238fd1498Szrj if (use_pow2)
61338fd1498Szrj {
61438fd1498Szrj tree addend = NULL_TREE;
61538fd1498Szrj if (sign_p == SIGNED)
61638fd1498Szrj {
61738fd1498Szrj tree uns_type;
61838fd1498Szrj
61938fd1498Szrj /* Both division and remainder sequences need
62038fd1498Szrj op0 < 0 ? mask : 0 computed. It can be either computed as
62138fd1498Szrj (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
62238fd1498Szrj if none of the shifts is 0, or as the conditional. */
62338fd1498Szrj for (i = 0; i < nunits; i++)
62438fd1498Szrj if (shifts[i] == 0)
62538fd1498Szrj break;
62638fd1498Szrj uns_type
62738fd1498Szrj = build_vector_type (build_nonstandard_integer_type (prec, 1),
62838fd1498Szrj nunits);
62938fd1498Szrj if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
63038fd1498Szrj {
63138fd1498Szrj for (i = 0; i < nunits; i++)
63238fd1498Szrj shift_temps[i] = prec - 1;
63338fd1498Szrj cur_op = add_rshift (gsi, type, op0, shift_temps);
63438fd1498Szrj if (cur_op != NULL_TREE)
63538fd1498Szrj {
63638fd1498Szrj cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
63738fd1498Szrj uns_type, cur_op);
63838fd1498Szrj for (i = 0; i < nunits; i++)
63938fd1498Szrj shift_temps[i] = prec - shifts[i];
64038fd1498Szrj cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
64138fd1498Szrj if (cur_op != NULL_TREE)
64238fd1498Szrj addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
64338fd1498Szrj type, cur_op);
64438fd1498Szrj }
64538fd1498Szrj }
64638fd1498Szrj if (addend == NULL_TREE
64738fd1498Szrj && expand_vec_cond_expr_p (type, type, LT_EXPR))
64838fd1498Szrj {
64938fd1498Szrj tree zero, cst, cond, mask_type;
65038fd1498Szrj gimple *stmt;
65138fd1498Szrj
65238fd1498Szrj mask_type = build_same_sized_truth_vector_type (type);
65338fd1498Szrj zero = build_zero_cst (type);
65438fd1498Szrj cond = build2 (LT_EXPR, mask_type, op0, zero);
65538fd1498Szrj tree_vector_builder vec (type, nunits, 1);
65638fd1498Szrj for (i = 0; i < nunits; i++)
65738fd1498Szrj vec.quick_push (build_int_cst (TREE_TYPE (type),
65838fd1498Szrj (HOST_WIDE_INT_1U
65938fd1498Szrj << shifts[i]) - 1));
66038fd1498Szrj cst = vec.build ();
66138fd1498Szrj addend = make_ssa_name (type);
66238fd1498Szrj stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
66338fd1498Szrj cst, zero);
66438fd1498Szrj gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
66538fd1498Szrj }
66638fd1498Szrj }
66738fd1498Szrj if (code == TRUNC_DIV_EXPR)
66838fd1498Szrj {
66938fd1498Szrj if (sign_p == UNSIGNED)
67038fd1498Szrj {
67138fd1498Szrj /* q = op0 >> shift; */
67238fd1498Szrj cur_op = add_rshift (gsi, type, op0, shifts);
67338fd1498Szrj if (cur_op != NULL_TREE)
67438fd1498Szrj return cur_op;
67538fd1498Szrj }
67638fd1498Szrj else if (addend != NULL_TREE)
67738fd1498Szrj {
67838fd1498Szrj /* t1 = op0 + addend;
67938fd1498Szrj q = t1 >> shift; */
68038fd1498Szrj op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
68138fd1498Szrj if (op != unknown_optab
68238fd1498Szrj && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
68338fd1498Szrj {
68438fd1498Szrj cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
68538fd1498Szrj cur_op = add_rshift (gsi, type, cur_op, shifts);
68638fd1498Szrj if (cur_op != NULL_TREE)
68738fd1498Szrj return cur_op;
68838fd1498Szrj }
68938fd1498Szrj }
69038fd1498Szrj }
69138fd1498Szrj else
69238fd1498Szrj {
69338fd1498Szrj tree mask;
69438fd1498Szrj tree_vector_builder vec (type, nunits, 1);
69538fd1498Szrj for (i = 0; i < nunits; i++)
69638fd1498Szrj vec.quick_push (build_int_cst (TREE_TYPE (type),
69738fd1498Szrj (HOST_WIDE_INT_1U
69838fd1498Szrj << shifts[i]) - 1));
69938fd1498Szrj mask = vec.build ();
70038fd1498Szrj op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
70138fd1498Szrj if (op != unknown_optab
70238fd1498Szrj && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
70338fd1498Szrj {
70438fd1498Szrj if (sign_p == UNSIGNED)
70538fd1498Szrj /* r = op0 & mask; */
70638fd1498Szrj return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
70738fd1498Szrj else if (addend != NULL_TREE)
70838fd1498Szrj {
70938fd1498Szrj /* t1 = op0 + addend;
71038fd1498Szrj t2 = t1 & mask;
71138fd1498Szrj r = t2 - addend; */
71238fd1498Szrj op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
71338fd1498Szrj if (op != unknown_optab
71438fd1498Szrj && optab_handler (op, TYPE_MODE (type))
71538fd1498Szrj != CODE_FOR_nothing)
71638fd1498Szrj {
71738fd1498Szrj cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
71838fd1498Szrj addend);
71938fd1498Szrj cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
72038fd1498Szrj cur_op, mask);
72138fd1498Szrj op = optab_for_tree_code (MINUS_EXPR, type,
72238fd1498Szrj optab_default);
72338fd1498Szrj if (op != unknown_optab
72438fd1498Szrj && optab_handler (op, TYPE_MODE (type))
72538fd1498Szrj != CODE_FOR_nothing)
72638fd1498Szrj return gimplify_build2 (gsi, MINUS_EXPR, type,
72738fd1498Szrj cur_op, addend);
72838fd1498Szrj }
72938fd1498Szrj }
73038fd1498Szrj }
73138fd1498Szrj }
73238fd1498Szrj }
73338fd1498Szrj
73438fd1498Szrj if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
73538fd1498Szrj return NULL_TREE;
73638fd1498Szrj
73738fd1498Szrj if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
73838fd1498Szrj return NULL_TREE;
73938fd1498Szrj
74038fd1498Szrj cur_op = op0;
74138fd1498Szrj
74238fd1498Szrj switch (mode)
74338fd1498Szrj {
74438fd1498Szrj case 0:
74538fd1498Szrj gcc_assert (sign_p == UNSIGNED);
74638fd1498Szrj /* t1 = oprnd0 >> pre_shift;
74738fd1498Szrj t2 = t1 h* ml;
74838fd1498Szrj q = t2 >> post_shift; */
74938fd1498Szrj cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
75038fd1498Szrj if (cur_op == NULL_TREE)
75138fd1498Szrj return NULL_TREE;
75238fd1498Szrj break;
75338fd1498Szrj case 1:
75438fd1498Szrj gcc_assert (sign_p == UNSIGNED);
75538fd1498Szrj for (i = 0; i < nunits; i++)
75638fd1498Szrj {
75738fd1498Szrj shift_temps[i] = 1;
75838fd1498Szrj post_shifts[i]--;
75938fd1498Szrj }
76038fd1498Szrj break;
76138fd1498Szrj case 2:
76238fd1498Szrj case 3:
76338fd1498Szrj case 4:
76438fd1498Szrj case 5:
76538fd1498Szrj gcc_assert (sign_p == SIGNED);
76638fd1498Szrj for (i = 0; i < nunits; i++)
76738fd1498Szrj shift_temps[i] = prec - 1;
76838fd1498Szrj break;
76938fd1498Szrj default:
77038fd1498Szrj return NULL_TREE;
77138fd1498Szrj }
77238fd1498Szrj
77338fd1498Szrj tree_vector_builder vec (type, nunits, 1);
77438fd1498Szrj for (i = 0; i < nunits; i++)
77538fd1498Szrj vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
77638fd1498Szrj mulcst = vec.build ();
77738fd1498Szrj
77838fd1498Szrj cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
77938fd1498Szrj
78038fd1498Szrj switch (mode)
78138fd1498Szrj {
78238fd1498Szrj case 0:
78338fd1498Szrj /* t1 = oprnd0 >> pre_shift;
78438fd1498Szrj t2 = t1 h* ml;
78538fd1498Szrj q = t2 >> post_shift; */
78638fd1498Szrj cur_op = add_rshift (gsi, type, cur_op, post_shifts);
78738fd1498Szrj break;
78838fd1498Szrj case 1:
78938fd1498Szrj /* t1 = oprnd0 h* ml;
79038fd1498Szrj t2 = oprnd0 - t1;
79138fd1498Szrj t3 = t2 >> 1;
79238fd1498Szrj t4 = t1 + t3;
79338fd1498Szrj q = t4 >> (post_shift - 1); */
79438fd1498Szrj op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
79538fd1498Szrj if (op == unknown_optab
79638fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
79738fd1498Szrj return NULL_TREE;
79838fd1498Szrj tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
79938fd1498Szrj tem = add_rshift (gsi, type, tem, shift_temps);
80038fd1498Szrj op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
80138fd1498Szrj if (op == unknown_optab
80238fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
80338fd1498Szrj return NULL_TREE;
80438fd1498Szrj tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
80538fd1498Szrj cur_op = add_rshift (gsi, type, tem, post_shifts);
80638fd1498Szrj if (cur_op == NULL_TREE)
80738fd1498Szrj return NULL_TREE;
80838fd1498Szrj break;
80938fd1498Szrj case 2:
81038fd1498Szrj case 3:
81138fd1498Szrj case 4:
81238fd1498Szrj case 5:
81338fd1498Szrj /* t1 = oprnd0 h* ml;
81438fd1498Szrj t2 = t1; [ iff (mode & 2) != 0 ]
81538fd1498Szrj t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
81638fd1498Szrj t3 = t2 >> post_shift;
81738fd1498Szrj t4 = oprnd0 >> (prec - 1);
81838fd1498Szrj q = t3 - t4; [ iff (mode & 1) == 0 ]
81938fd1498Szrj q = t4 - t3; [ iff (mode & 1) != 0 ] */
82038fd1498Szrj if ((mode & 2) == 0)
82138fd1498Szrj {
82238fd1498Szrj op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
82338fd1498Szrj if (op == unknown_optab
82438fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
82538fd1498Szrj return NULL_TREE;
82638fd1498Szrj cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
82738fd1498Szrj }
82838fd1498Szrj cur_op = add_rshift (gsi, type, cur_op, post_shifts);
82938fd1498Szrj if (cur_op == NULL_TREE)
83038fd1498Szrj return NULL_TREE;
83138fd1498Szrj tem = add_rshift (gsi, type, op0, shift_temps);
83238fd1498Szrj if (tem == NULL_TREE)
83338fd1498Szrj return NULL_TREE;
83438fd1498Szrj op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
83538fd1498Szrj if (op == unknown_optab
83638fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
83738fd1498Szrj return NULL_TREE;
83838fd1498Szrj if ((mode & 1) == 0)
83938fd1498Szrj cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
84038fd1498Szrj else
84138fd1498Szrj cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
84238fd1498Szrj break;
84338fd1498Szrj default:
84438fd1498Szrj gcc_unreachable ();
84538fd1498Szrj }
84638fd1498Szrj
84738fd1498Szrj if (code == TRUNC_DIV_EXPR)
84838fd1498Szrj return cur_op;
84938fd1498Szrj
85038fd1498Szrj /* We divided. Now finish by:
85138fd1498Szrj t1 = q * oprnd1;
85238fd1498Szrj r = oprnd0 - t1; */
85338fd1498Szrj op = optab_for_tree_code (MULT_EXPR, type, optab_default);
85438fd1498Szrj if (op == unknown_optab
85538fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
85638fd1498Szrj return NULL_TREE;
85738fd1498Szrj tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
85838fd1498Szrj op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
85938fd1498Szrj if (op == unknown_optab
86038fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
86138fd1498Szrj return NULL_TREE;
86238fd1498Szrj return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
86338fd1498Szrj }
86438fd1498Szrj
86538fd1498Szrj /* Expand a vector condition to scalars, by using many conditions
86638fd1498Szrj on the vector's elements. */
86738fd1498Szrj static void
expand_vector_condition(gimple_stmt_iterator * gsi)86838fd1498Szrj expand_vector_condition (gimple_stmt_iterator *gsi)
86938fd1498Szrj {
87038fd1498Szrj gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
87138fd1498Szrj tree type = gimple_expr_type (stmt);
87238fd1498Szrj tree a = gimple_assign_rhs1 (stmt);
87338fd1498Szrj tree a1 = a;
87438fd1498Szrj tree a2 = NULL_TREE;
87538fd1498Szrj bool a_is_comparison = false;
87638fd1498Szrj tree b = gimple_assign_rhs2 (stmt);
87738fd1498Szrj tree c = gimple_assign_rhs3 (stmt);
87838fd1498Szrj vec<constructor_elt, va_gc> *v;
87938fd1498Szrj tree constr;
88038fd1498Szrj tree inner_type = TREE_TYPE (type);
88138fd1498Szrj tree cond_type = TREE_TYPE (TREE_TYPE (a));
88238fd1498Szrj tree comp_inner_type = cond_type;
88338fd1498Szrj tree width = TYPE_SIZE (inner_type);
88438fd1498Szrj tree index = bitsize_int (0);
88538fd1498Szrj tree comp_width = width;
88638fd1498Szrj tree comp_index = index;
88738fd1498Szrj int i;
88838fd1498Szrj location_t loc = gimple_location (gsi_stmt (*gsi));
88938fd1498Szrj
89038fd1498Szrj if (!is_gimple_val (a))
89138fd1498Szrj {
89238fd1498Szrj gcc_assert (COMPARISON_CLASS_P (a));
89338fd1498Szrj a_is_comparison = true;
89438fd1498Szrj a1 = TREE_OPERAND (a, 0);
89538fd1498Szrj a2 = TREE_OPERAND (a, 1);
89638fd1498Szrj comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
89738fd1498Szrj comp_width = TYPE_SIZE (comp_inner_type);
89838fd1498Szrj }
89938fd1498Szrj
90038fd1498Szrj if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
90138fd1498Szrj return;
90238fd1498Szrj
90338fd1498Szrj /* Handle vector boolean types with bitmasks. If there is a comparison
90438fd1498Szrj and we can expand the comparison into the vector boolean bitmask,
90538fd1498Szrj or otherwise if it is compatible with type, we can transform
90638fd1498Szrj vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
90738fd1498Szrj into
90838fd1498Szrj tmp_6 = x_2 < y_3;
90938fd1498Szrj tmp_7 = tmp_6 & vbfld_4;
91038fd1498Szrj tmp_8 = ~tmp_6;
91138fd1498Szrj tmp_9 = tmp_8 & vbfld_5;
91238fd1498Szrj vbfld_1 = tmp_7 | tmp_9;
91338fd1498Szrj Similarly for vbfld_10 instead of x_2 < y_3. */
91438fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (type)
91538fd1498Szrj && SCALAR_INT_MODE_P (TYPE_MODE (type))
91638fd1498Szrj && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
91738fd1498Szrj TYPE_VECTOR_SUBPARTS (type)
91838fd1498Szrj * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
91938fd1498Szrj && (a_is_comparison
92038fd1498Szrj ? useless_type_conversion_p (type, TREE_TYPE (a))
92138fd1498Szrj : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
92238fd1498Szrj {
92338fd1498Szrj if (a_is_comparison)
92438fd1498Szrj a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
92538fd1498Szrj a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
92638fd1498Szrj a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
92738fd1498Szrj a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
92838fd1498Szrj a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
92938fd1498Szrj gimple_assign_set_rhs_from_tree (gsi, a);
93038fd1498Szrj update_stmt (gsi_stmt (*gsi));
93138fd1498Szrj return;
93238fd1498Szrj }
93338fd1498Szrj
93438fd1498Szrj /* TODO: try and find a smaller vector type. */
93538fd1498Szrj
93638fd1498Szrj warning_at (loc, OPT_Wvector_operation_performance,
93738fd1498Szrj "vector condition will be expanded piecewise");
93838fd1498Szrj
93938fd1498Szrj int nunits = nunits_for_known_piecewise_op (type);
94038fd1498Szrj vec_alloc (v, nunits);
94138fd1498Szrj for (i = 0; i < nunits; i++)
94238fd1498Szrj {
94338fd1498Szrj tree aa, result;
94438fd1498Szrj tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
94538fd1498Szrj tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
94638fd1498Szrj if (a_is_comparison)
94738fd1498Szrj {
94838fd1498Szrj tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
94938fd1498Szrj comp_width, comp_index);
95038fd1498Szrj tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
95138fd1498Szrj comp_width, comp_index);
95238fd1498Szrj aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
95338fd1498Szrj }
95438fd1498Szrj else
95538fd1498Szrj aa = tree_vec_extract (gsi, cond_type, a, width, index);
95638fd1498Szrj result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
95738fd1498Szrj constructor_elt ce = {NULL_TREE, result};
95838fd1498Szrj v->quick_push (ce);
95938fd1498Szrj index = int_const_binop (PLUS_EXPR, index, width);
96038fd1498Szrj if (width == comp_width)
96138fd1498Szrj comp_index = index;
96238fd1498Szrj else
96338fd1498Szrj comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
96438fd1498Szrj }
96538fd1498Szrj
96638fd1498Szrj constr = build_constructor (type, v);
96738fd1498Szrj gimple_assign_set_rhs_from_tree (gsi, constr);
96838fd1498Szrj update_stmt (gsi_stmt (*gsi));
96938fd1498Szrj }
97038fd1498Szrj
97138fd1498Szrj static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code)97238fd1498Szrj expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
97338fd1498Szrj gassign *assign, enum tree_code code)
97438fd1498Szrj {
97538fd1498Szrj machine_mode compute_mode = TYPE_MODE (compute_type);
97638fd1498Szrj
97738fd1498Szrj /* If the compute mode is not a vector mode (hence we are not decomposing
97838fd1498Szrj a BLKmode vector to smaller, hardware-supported vectors), we may want
97938fd1498Szrj to expand the operations in parallel. */
98038fd1498Szrj if (!VECTOR_MODE_P (compute_mode))
98138fd1498Szrj switch (code)
98238fd1498Szrj {
98338fd1498Szrj case PLUS_EXPR:
98438fd1498Szrj case MINUS_EXPR:
98538fd1498Szrj if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
98638fd1498Szrj return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
98738fd1498Szrj gimple_assign_rhs1 (assign),
98838fd1498Szrj gimple_assign_rhs2 (assign), code);
98938fd1498Szrj break;
99038fd1498Szrj
99138fd1498Szrj case NEGATE_EXPR:
99238fd1498Szrj if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
99338fd1498Szrj return expand_vector_addition (gsi, do_unop, do_negate, type,
99438fd1498Szrj gimple_assign_rhs1 (assign),
99538fd1498Szrj NULL_TREE, code);
99638fd1498Szrj break;
99738fd1498Szrj
99838fd1498Szrj case BIT_AND_EXPR:
99938fd1498Szrj case BIT_IOR_EXPR:
100038fd1498Szrj case BIT_XOR_EXPR:
100138fd1498Szrj return expand_vector_parallel (gsi, do_binop, type,
100238fd1498Szrj gimple_assign_rhs1 (assign),
100338fd1498Szrj gimple_assign_rhs2 (assign), code);
100438fd1498Szrj
100538fd1498Szrj case BIT_NOT_EXPR:
100638fd1498Szrj return expand_vector_parallel (gsi, do_unop, type,
100738fd1498Szrj gimple_assign_rhs1 (assign),
100838fd1498Szrj NULL_TREE, code);
100938fd1498Szrj case EQ_EXPR:
101038fd1498Szrj case NE_EXPR:
101138fd1498Szrj case GT_EXPR:
101238fd1498Szrj case LT_EXPR:
101338fd1498Szrj case GE_EXPR:
101438fd1498Szrj case LE_EXPR:
101538fd1498Szrj case UNEQ_EXPR:
101638fd1498Szrj case UNGT_EXPR:
101738fd1498Szrj case UNLT_EXPR:
101838fd1498Szrj case UNGE_EXPR:
101938fd1498Szrj case UNLE_EXPR:
102038fd1498Szrj case LTGT_EXPR:
102138fd1498Szrj case ORDERED_EXPR:
102238fd1498Szrj case UNORDERED_EXPR:
102338fd1498Szrj {
102438fd1498Szrj tree rhs1 = gimple_assign_rhs1 (assign);
102538fd1498Szrj tree rhs2 = gimple_assign_rhs2 (assign);
102638fd1498Szrj
102738fd1498Szrj return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
102838fd1498Szrj }
102938fd1498Szrj
103038fd1498Szrj case TRUNC_DIV_EXPR:
103138fd1498Szrj case TRUNC_MOD_EXPR:
103238fd1498Szrj {
103338fd1498Szrj tree rhs1 = gimple_assign_rhs1 (assign);
103438fd1498Szrj tree rhs2 = gimple_assign_rhs2 (assign);
103538fd1498Szrj tree ret;
103638fd1498Szrj
103738fd1498Szrj if (!optimize
103838fd1498Szrj || !VECTOR_INTEGER_TYPE_P (type)
103938fd1498Szrj || TREE_CODE (rhs2) != VECTOR_CST
104038fd1498Szrj || !VECTOR_MODE_P (TYPE_MODE (type)))
104138fd1498Szrj break;
104238fd1498Szrj
104338fd1498Szrj ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
104438fd1498Szrj if (ret != NULL_TREE)
104538fd1498Szrj return ret;
104638fd1498Szrj break;
104738fd1498Szrj }
104838fd1498Szrj
104938fd1498Szrj default:
105038fd1498Szrj break;
105138fd1498Szrj }
105238fd1498Szrj
105338fd1498Szrj if (TREE_CODE_CLASS (code) == tcc_unary)
105438fd1498Szrj return expand_vector_piecewise (gsi, do_unop, type, compute_type,
105538fd1498Szrj gimple_assign_rhs1 (assign),
105638fd1498Szrj NULL_TREE, code);
105738fd1498Szrj else
105838fd1498Szrj return expand_vector_piecewise (gsi, do_binop, type, compute_type,
105938fd1498Szrj gimple_assign_rhs1 (assign),
106038fd1498Szrj gimple_assign_rhs2 (assign), code);
106138fd1498Szrj }
106238fd1498Szrj
106338fd1498Szrj /* Try to optimize
106438fd1498Szrj a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
106538fd1498Szrj style stmts into:
106638fd1498Szrj _9 = { b_7, b_7, b_7, b_7 };
106738fd1498Szrj a_5 = _9 + { 0, 3, 6, 9 };
106838fd1498Szrj because vector splat operation is usually more efficient
106938fd1498Szrj than piecewise initialization of the vector. */
107038fd1498Szrj
107138fd1498Szrj static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)107238fd1498Szrj optimize_vector_constructor (gimple_stmt_iterator *gsi)
107338fd1498Szrj {
107438fd1498Szrj gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
107538fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
107638fd1498Szrj tree rhs = gimple_assign_rhs1 (stmt);
107738fd1498Szrj tree type = TREE_TYPE (rhs);
107838fd1498Szrj unsigned int i, j;
107938fd1498Szrj unsigned HOST_WIDE_INT nelts;
108038fd1498Szrj bool all_same = true;
108138fd1498Szrj constructor_elt *elt;
108238fd1498Szrj gimple *g;
108338fd1498Szrj tree base = NULL_TREE;
108438fd1498Szrj optab op;
108538fd1498Szrj
108638fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
108738fd1498Szrj || nelts <= 2
108838fd1498Szrj || CONSTRUCTOR_NELTS (rhs) != nelts)
108938fd1498Szrj return;
109038fd1498Szrj op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
109138fd1498Szrj if (op == unknown_optab
109238fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
109338fd1498Szrj return;
109438fd1498Szrj FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
109538fd1498Szrj if (TREE_CODE (elt->value) != SSA_NAME
109638fd1498Szrj || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
109738fd1498Szrj return;
109838fd1498Szrj else
109938fd1498Szrj {
110038fd1498Szrj tree this_base = elt->value;
110138fd1498Szrj if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
110238fd1498Szrj all_same = false;
110338fd1498Szrj for (j = 0; j < nelts + 1; j++)
110438fd1498Szrj {
110538fd1498Szrj g = SSA_NAME_DEF_STMT (this_base);
110638fd1498Szrj if (is_gimple_assign (g)
110738fd1498Szrj && gimple_assign_rhs_code (g) == PLUS_EXPR
110838fd1498Szrj && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
110938fd1498Szrj && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
111038fd1498Szrj && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
111138fd1498Szrj this_base = gimple_assign_rhs1 (g);
111238fd1498Szrj else
111338fd1498Szrj break;
111438fd1498Szrj }
111538fd1498Szrj if (i == 0)
111638fd1498Szrj base = this_base;
111738fd1498Szrj else if (this_base != base)
111838fd1498Szrj return;
111938fd1498Szrj }
112038fd1498Szrj if (all_same)
112138fd1498Szrj return;
112238fd1498Szrj tree_vector_builder cst (type, nelts, 1);
112338fd1498Szrj for (i = 0; i < nelts; i++)
112438fd1498Szrj {
112538fd1498Szrj tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
112638fd1498Szrj tree elt = build_zero_cst (TREE_TYPE (base));
112738fd1498Szrj while (this_base != base)
112838fd1498Szrj {
112938fd1498Szrj g = SSA_NAME_DEF_STMT (this_base);
113038fd1498Szrj elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
113138fd1498Szrj elt, gimple_assign_rhs2 (g));
113238fd1498Szrj if (elt == NULL_TREE
113338fd1498Szrj || TREE_CODE (elt) != INTEGER_CST
113438fd1498Szrj || TREE_OVERFLOW (elt))
113538fd1498Szrj return;
113638fd1498Szrj this_base = gimple_assign_rhs1 (g);
113738fd1498Szrj }
113838fd1498Szrj cst.quick_push (elt);
113938fd1498Szrj }
114038fd1498Szrj for (i = 0; i < nelts; i++)
114138fd1498Szrj CONSTRUCTOR_ELT (rhs, i)->value = base;
114238fd1498Szrj g = gimple_build_assign (make_ssa_name (type), rhs);
114338fd1498Szrj gsi_insert_before (gsi, g, GSI_SAME_STMT);
114438fd1498Szrj g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
114538fd1498Szrj cst.build ());
114638fd1498Szrj gsi_replace (gsi, g, false);
114738fd1498Szrj }
114838fd1498Szrj
114938fd1498Szrj /* Return a type for the widest vector mode whose components are of type
115038fd1498Szrj TYPE, or NULL_TREE if none is found. */
115138fd1498Szrj
115238fd1498Szrj static tree
type_for_widest_vector_mode(tree type,optab op)115338fd1498Szrj type_for_widest_vector_mode (tree type, optab op)
115438fd1498Szrj {
115538fd1498Szrj machine_mode inner_mode = TYPE_MODE (type);
115638fd1498Szrj machine_mode best_mode = VOIDmode, mode;
115738fd1498Szrj poly_int64 best_nunits = 0;
115838fd1498Szrj
115938fd1498Szrj if (SCALAR_FLOAT_MODE_P (inner_mode))
116038fd1498Szrj mode = MIN_MODE_VECTOR_FLOAT;
116138fd1498Szrj else if (SCALAR_FRACT_MODE_P (inner_mode))
116238fd1498Szrj mode = MIN_MODE_VECTOR_FRACT;
116338fd1498Szrj else if (SCALAR_UFRACT_MODE_P (inner_mode))
116438fd1498Szrj mode = MIN_MODE_VECTOR_UFRACT;
116538fd1498Szrj else if (SCALAR_ACCUM_MODE_P (inner_mode))
116638fd1498Szrj mode = MIN_MODE_VECTOR_ACCUM;
116738fd1498Szrj else if (SCALAR_UACCUM_MODE_P (inner_mode))
116838fd1498Szrj mode = MIN_MODE_VECTOR_UACCUM;
116938fd1498Szrj else if (inner_mode == BImode)
117038fd1498Szrj mode = MIN_MODE_VECTOR_BOOL;
117138fd1498Szrj else
117238fd1498Szrj mode = MIN_MODE_VECTOR_INT;
117338fd1498Szrj
117438fd1498Szrj FOR_EACH_MODE_FROM (mode, mode)
117538fd1498Szrj if (GET_MODE_INNER (mode) == inner_mode
117638fd1498Szrj && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
117738fd1498Szrj && optab_handler (op, mode) != CODE_FOR_nothing)
117838fd1498Szrj best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
117938fd1498Szrj
118038fd1498Szrj if (best_mode == VOIDmode)
118138fd1498Szrj return NULL_TREE;
118238fd1498Szrj else
118338fd1498Szrj return build_vector_type_for_mode (type, best_mode);
118438fd1498Szrj }
118538fd1498Szrj
118638fd1498Szrj
118738fd1498Szrj /* Build a reference to the element of the vector VECT. Function
118838fd1498Szrj returns either the element itself, either BIT_FIELD_REF, or an
118938fd1498Szrj ARRAY_REF expression.
119038fd1498Szrj
119138fd1498Szrj GSI is required to insert temporary variables while building a
119238fd1498Szrj refernece to the element of the vector VECT.
119338fd1498Szrj
119438fd1498Szrj PTMPVEC is a pointer to the temporary variable for caching
119538fd1498Szrj purposes. In case when PTMPVEC is NULL new temporary variable
119638fd1498Szrj will be created. */
119738fd1498Szrj static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)119838fd1498Szrj vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
119938fd1498Szrj {
120038fd1498Szrj tree vect_type, vect_elt_type;
120138fd1498Szrj gimple *asgn;
120238fd1498Szrj tree tmpvec;
120338fd1498Szrj tree arraytype;
120438fd1498Szrj bool need_asgn = true;
120538fd1498Szrj unsigned int elements;
120638fd1498Szrj
120738fd1498Szrj vect_type = TREE_TYPE (vect);
120838fd1498Szrj vect_elt_type = TREE_TYPE (vect_type);
120938fd1498Szrj elements = nunits_for_known_piecewise_op (vect_type);
121038fd1498Szrj
121138fd1498Szrj if (TREE_CODE (idx) == INTEGER_CST)
121238fd1498Szrj {
121338fd1498Szrj unsigned HOST_WIDE_INT index;
121438fd1498Szrj
121538fd1498Szrj /* Given that we're about to compute a binary modulus,
121638fd1498Szrj we don't care about the high bits of the value. */
121738fd1498Szrj index = TREE_INT_CST_LOW (idx);
121838fd1498Szrj if (!tree_fits_uhwi_p (idx) || index >= elements)
121938fd1498Szrj {
122038fd1498Szrj index &= elements - 1;
122138fd1498Szrj idx = build_int_cst (TREE_TYPE (idx), index);
122238fd1498Szrj }
122338fd1498Szrj
122438fd1498Szrj /* When lowering a vector statement sequence do some easy
122538fd1498Szrj simplification by looking through intermediate vector results. */
122638fd1498Szrj if (TREE_CODE (vect) == SSA_NAME)
122738fd1498Szrj {
122838fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
122938fd1498Szrj if (is_gimple_assign (def_stmt)
123038fd1498Szrj && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
123138fd1498Szrj || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
123238fd1498Szrj vect = gimple_assign_rhs1 (def_stmt);
123338fd1498Szrj }
123438fd1498Szrj
123538fd1498Szrj if (TREE_CODE (vect) == VECTOR_CST)
123638fd1498Szrj return VECTOR_CST_ELT (vect, index);
123738fd1498Szrj else if (TREE_CODE (vect) == CONSTRUCTOR
123838fd1498Szrj && (CONSTRUCTOR_NELTS (vect) == 0
123938fd1498Szrj || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
124038fd1498Szrj != VECTOR_TYPE))
124138fd1498Szrj {
124238fd1498Szrj if (index < CONSTRUCTOR_NELTS (vect))
124338fd1498Szrj return CONSTRUCTOR_ELT (vect, index)->value;
124438fd1498Szrj return build_zero_cst (vect_elt_type);
124538fd1498Szrj }
124638fd1498Szrj else
124738fd1498Szrj {
124838fd1498Szrj tree size = TYPE_SIZE (vect_elt_type);
124938fd1498Szrj tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
125038fd1498Szrj size);
125138fd1498Szrj return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
125238fd1498Szrj }
125338fd1498Szrj }
125438fd1498Szrj
125538fd1498Szrj if (!ptmpvec)
125638fd1498Szrj tmpvec = create_tmp_var (vect_type, "vectmp");
125738fd1498Szrj else if (!*ptmpvec)
125838fd1498Szrj tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
125938fd1498Szrj else
126038fd1498Szrj {
126138fd1498Szrj tmpvec = *ptmpvec;
126238fd1498Szrj need_asgn = false;
126338fd1498Szrj }
126438fd1498Szrj
126538fd1498Szrj if (need_asgn)
126638fd1498Szrj {
126738fd1498Szrj TREE_ADDRESSABLE (tmpvec) = 1;
126838fd1498Szrj asgn = gimple_build_assign (tmpvec, vect);
126938fd1498Szrj gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
127038fd1498Szrj }
127138fd1498Szrj
127238fd1498Szrj arraytype = build_array_type_nelts (vect_elt_type, elements);
127338fd1498Szrj return build4 (ARRAY_REF, vect_elt_type,
127438fd1498Szrj build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
127538fd1498Szrj idx, NULL_TREE, NULL_TREE);
127638fd1498Szrj }
127738fd1498Szrj
127838fd1498Szrj /* Check if VEC_PERM_EXPR within the given setting is supported
127938fd1498Szrj by hardware, or lower it piecewise.
128038fd1498Szrj
128138fd1498Szrj When VEC_PERM_EXPR has the same first and second operands:
128238fd1498Szrj VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
128338fd1498Szrj {v0[mask[0]], v0[mask[1]], ...}
128438fd1498Szrj MASK and V0 must have the same number of elements.
128538fd1498Szrj
128638fd1498Szrj Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
128738fd1498Szrj {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
128838fd1498Szrj V0 and V1 must have the same type. MASK, V0, V1 must have the
128938fd1498Szrj same number of arguments. */
129038fd1498Szrj
129138fd1498Szrj static void
lower_vec_perm(gimple_stmt_iterator * gsi)129238fd1498Szrj lower_vec_perm (gimple_stmt_iterator *gsi)
129338fd1498Szrj {
129438fd1498Szrj gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
129538fd1498Szrj tree mask = gimple_assign_rhs3 (stmt);
129638fd1498Szrj tree vec0 = gimple_assign_rhs1 (stmt);
129738fd1498Szrj tree vec1 = gimple_assign_rhs2 (stmt);
129838fd1498Szrj tree vect_type = TREE_TYPE (vec0);
129938fd1498Szrj tree mask_type = TREE_TYPE (mask);
130038fd1498Szrj tree vect_elt_type = TREE_TYPE (vect_type);
130138fd1498Szrj tree mask_elt_type = TREE_TYPE (mask_type);
130238fd1498Szrj unsigned HOST_WIDE_INT elements;
130338fd1498Szrj vec<constructor_elt, va_gc> *v;
130438fd1498Szrj tree constr, t, si, i_val;
130538fd1498Szrj tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
130638fd1498Szrj bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
130738fd1498Szrj location_t loc = gimple_location (gsi_stmt (*gsi));
130838fd1498Szrj unsigned i;
130938fd1498Szrj
131038fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
131138fd1498Szrj return;
131238fd1498Szrj
131338fd1498Szrj if (TREE_CODE (mask) == SSA_NAME)
131438fd1498Szrj {
131538fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
131638fd1498Szrj if (is_gimple_assign (def_stmt)
131738fd1498Szrj && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
131838fd1498Szrj mask = gimple_assign_rhs1 (def_stmt);
131938fd1498Szrj }
132038fd1498Szrj
132138fd1498Szrj vec_perm_builder sel_int;
132238fd1498Szrj
132338fd1498Szrj if (TREE_CODE (mask) == VECTOR_CST
132438fd1498Szrj && tree_to_vec_perm_builder (&sel_int, mask))
132538fd1498Szrj {
132638fd1498Szrj vec_perm_indices indices (sel_int, 2, elements);
132738fd1498Szrj if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
132838fd1498Szrj {
132938fd1498Szrj gimple_assign_set_rhs3 (stmt, mask);
133038fd1498Szrj update_stmt (stmt);
133138fd1498Szrj return;
133238fd1498Szrj }
133338fd1498Szrj /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
133438fd1498Szrj vector as VEC1 and a right element shift MASK. */
133538fd1498Szrj if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
133638fd1498Szrj != CODE_FOR_nothing
133738fd1498Szrj && TREE_CODE (vec1) == VECTOR_CST
133838fd1498Szrj && initializer_zerop (vec1)
133938fd1498Szrj && maybe_ne (indices[0], 0)
134038fd1498Szrj && known_lt (poly_uint64 (indices[0]), elements))
134138fd1498Szrj {
134238fd1498Szrj bool ok_p = indices.series_p (0, 1, indices[0], 1);
134338fd1498Szrj if (!ok_p)
134438fd1498Szrj {
134538fd1498Szrj for (i = 1; i < elements; ++i)
134638fd1498Szrj {
134738fd1498Szrj poly_uint64 actual = indices[i];
134838fd1498Szrj poly_uint64 expected = i + indices[0];
134938fd1498Szrj /* Indices into the second vector are all equivalent. */
135038fd1498Szrj if (maybe_lt (actual, elements)
135138fd1498Szrj ? maybe_ne (actual, expected)
135238fd1498Szrj : maybe_lt (expected, elements))
135338fd1498Szrj break;
135438fd1498Szrj }
135538fd1498Szrj ok_p = i == elements;
135638fd1498Szrj }
135738fd1498Szrj if (ok_p)
135838fd1498Szrj {
135938fd1498Szrj gimple_assign_set_rhs3 (stmt, mask);
136038fd1498Szrj update_stmt (stmt);
136138fd1498Szrj return;
136238fd1498Szrj }
136338fd1498Szrj }
136438fd1498Szrj }
136538fd1498Szrj else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
136638fd1498Szrj return;
136738fd1498Szrj
136838fd1498Szrj warning_at (loc, OPT_Wvector_operation_performance,
136938fd1498Szrj "vector shuffling operation will be expanded piecewise");
137038fd1498Szrj
137138fd1498Szrj vec_alloc (v, elements);
137238fd1498Szrj for (i = 0; i < elements; i++)
137338fd1498Szrj {
137438fd1498Szrj si = size_int (i);
137538fd1498Szrj i_val = vector_element (gsi, mask, si, &masktmp);
137638fd1498Szrj
137738fd1498Szrj if (TREE_CODE (i_val) == INTEGER_CST)
137838fd1498Szrj {
137938fd1498Szrj unsigned HOST_WIDE_INT index;
138038fd1498Szrj
138138fd1498Szrj index = TREE_INT_CST_LOW (i_val);
138238fd1498Szrj if (!tree_fits_uhwi_p (i_val) || index >= elements)
138338fd1498Szrj i_val = build_int_cst (mask_elt_type, index & (elements - 1));
138438fd1498Szrj
138538fd1498Szrj if (two_operand_p && (index & elements) != 0)
138638fd1498Szrj t = vector_element (gsi, vec1, i_val, &vec1tmp);
138738fd1498Szrj else
138838fd1498Szrj t = vector_element (gsi, vec0, i_val, &vec0tmp);
138938fd1498Szrj
139038fd1498Szrj t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
139138fd1498Szrj true, GSI_SAME_STMT);
139238fd1498Szrj }
139338fd1498Szrj else
139438fd1498Szrj {
139538fd1498Szrj tree cond = NULL_TREE, v0_val;
139638fd1498Szrj
139738fd1498Szrj if (two_operand_p)
139838fd1498Szrj {
139938fd1498Szrj cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
140038fd1498Szrj build_int_cst (mask_elt_type, elements));
140138fd1498Szrj cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
140238fd1498Szrj true, GSI_SAME_STMT);
140338fd1498Szrj }
140438fd1498Szrj
140538fd1498Szrj i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
140638fd1498Szrj build_int_cst (mask_elt_type, elements - 1));
140738fd1498Szrj i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
140838fd1498Szrj true, GSI_SAME_STMT);
140938fd1498Szrj
141038fd1498Szrj v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
141138fd1498Szrj v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
141238fd1498Szrj true, GSI_SAME_STMT);
141338fd1498Szrj
141438fd1498Szrj if (two_operand_p)
141538fd1498Szrj {
141638fd1498Szrj tree v1_val;
141738fd1498Szrj
141838fd1498Szrj v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
141938fd1498Szrj v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
142038fd1498Szrj true, GSI_SAME_STMT);
142138fd1498Szrj
142238fd1498Szrj cond = fold_build2 (EQ_EXPR, boolean_type_node,
142338fd1498Szrj cond, build_zero_cst (mask_elt_type));
142438fd1498Szrj cond = fold_build3 (COND_EXPR, vect_elt_type,
142538fd1498Szrj cond, v0_val, v1_val);
142638fd1498Szrj t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
142738fd1498Szrj true, GSI_SAME_STMT);
142838fd1498Szrj }
142938fd1498Szrj else
143038fd1498Szrj t = v0_val;
143138fd1498Szrj }
143238fd1498Szrj
143338fd1498Szrj CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
143438fd1498Szrj }
143538fd1498Szrj
143638fd1498Szrj constr = build_constructor (vect_type, v);
143738fd1498Szrj gimple_assign_set_rhs_from_tree (gsi, constr);
143838fd1498Szrj update_stmt (gsi_stmt (*gsi));
143938fd1498Szrj }
144038fd1498Szrj
144138fd1498Szrj /* If OP is a uniform vector return the element it is a splat from. */
144238fd1498Szrj
144338fd1498Szrj static tree
ssa_uniform_vector_p(tree op)144438fd1498Szrj ssa_uniform_vector_p (tree op)
144538fd1498Szrj {
144638fd1498Szrj if (TREE_CODE (op) == VECTOR_CST
144738fd1498Szrj || TREE_CODE (op) == VEC_DUPLICATE_EXPR
144838fd1498Szrj || TREE_CODE (op) == CONSTRUCTOR)
144938fd1498Szrj return uniform_vector_p (op);
145038fd1498Szrj if (TREE_CODE (op) == SSA_NAME)
145138fd1498Szrj {
145238fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (op);
145338fd1498Szrj if (gimple_assign_single_p (def_stmt))
145438fd1498Szrj return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
145538fd1498Szrj }
145638fd1498Szrj return NULL_TREE;
145738fd1498Szrj }
145838fd1498Szrj
145938fd1498Szrj /* Return type in which CODE operation with optab OP can be
146038fd1498Szrj computed. */
146138fd1498Szrj
146238fd1498Szrj static tree
get_compute_type(enum tree_code code,optab op,tree type)146338fd1498Szrj get_compute_type (enum tree_code code, optab op, tree type)
146438fd1498Szrj {
146538fd1498Szrj /* For very wide vectors, try using a smaller vector mode. */
146638fd1498Szrj tree compute_type = type;
146738fd1498Szrj if (op
146838fd1498Szrj && (!VECTOR_MODE_P (TYPE_MODE (type))
146938fd1498Szrj || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
147038fd1498Szrj {
147138fd1498Szrj tree vector_compute_type
147238fd1498Szrj = type_for_widest_vector_mode (TREE_TYPE (type), op);
147338fd1498Szrj if (vector_compute_type != NULL_TREE
147438fd1498Szrj && subparts_gt (compute_type, vector_compute_type)
147538fd1498Szrj && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
147638fd1498Szrj && (optab_handler (op, TYPE_MODE (vector_compute_type))
147738fd1498Szrj != CODE_FOR_nothing))
147838fd1498Szrj compute_type = vector_compute_type;
147938fd1498Szrj }
148038fd1498Szrj
148138fd1498Szrj /* If we are breaking a BLKmode vector into smaller pieces,
148238fd1498Szrj type_for_widest_vector_mode has already looked into the optab,
148338fd1498Szrj so skip these checks. */
148438fd1498Szrj if (compute_type == type)
148538fd1498Szrj {
148638fd1498Szrj machine_mode compute_mode = TYPE_MODE (compute_type);
148738fd1498Szrj if (VECTOR_MODE_P (compute_mode))
148838fd1498Szrj {
148938fd1498Szrj if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
149038fd1498Szrj return compute_type;
149138fd1498Szrj if (code == MULT_HIGHPART_EXPR
149238fd1498Szrj && can_mult_highpart_p (compute_mode,
149338fd1498Szrj TYPE_UNSIGNED (compute_type)))
149438fd1498Szrj return compute_type;
149538fd1498Szrj }
149638fd1498Szrj /* There is no operation in hardware, so fall back to scalars. */
149738fd1498Szrj compute_type = TREE_TYPE (type);
149838fd1498Szrj }
149938fd1498Szrj
150038fd1498Szrj return compute_type;
150138fd1498Szrj }
150238fd1498Szrj
150338fd1498Szrj static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)150438fd1498Szrj do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
150538fd1498Szrj tree bitpos, tree bitsize, enum tree_code code,
150638fd1498Szrj tree type ATTRIBUTE_UNUSED)
150738fd1498Szrj {
150838fd1498Szrj if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
150938fd1498Szrj a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
151038fd1498Szrj if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
151138fd1498Szrj b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
151238fd1498Szrj tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
151338fd1498Szrj return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
151438fd1498Szrj }
151538fd1498Szrj
151638fd1498Szrj /* Expand a vector COND_EXPR to scalars, piecewise. */
151738fd1498Szrj static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)151838fd1498Szrj expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
151938fd1498Szrj {
152038fd1498Szrj gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
152138fd1498Szrj tree type = gimple_expr_type (stmt);
152238fd1498Szrj tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
152338fd1498Szrj machine_mode compute_mode = TYPE_MODE (compute_type);
152438fd1498Szrj gcc_assert (compute_mode != BLKmode);
152538fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
152638fd1498Szrj tree rhs2 = gimple_assign_rhs2 (stmt);
152738fd1498Szrj tree rhs3 = gimple_assign_rhs3 (stmt);
152838fd1498Szrj tree new_rhs;
152938fd1498Szrj
153038fd1498Szrj /* If the compute mode is not a vector mode (hence we are not decomposing
153138fd1498Szrj a BLKmode vector to smaller, hardware-supported vectors), we may want
153238fd1498Szrj to expand the operations in parallel. */
153338fd1498Szrj if (!VECTOR_MODE_P (compute_mode))
153438fd1498Szrj new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
153538fd1498Szrj COND_EXPR);
153638fd1498Szrj else
153738fd1498Szrj new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
153838fd1498Szrj rhs2, rhs3, COND_EXPR);
153938fd1498Szrj if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
154038fd1498Szrj new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
154138fd1498Szrj new_rhs);
154238fd1498Szrj
154338fd1498Szrj /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
154438fd1498Szrj way to do it is change expand_vector_operation and its callees to
154538fd1498Szrj return a tree_code, RHS1 and RHS2 instead of a tree. */
154638fd1498Szrj gimple_assign_set_rhs_from_tree (gsi, new_rhs);
154738fd1498Szrj update_stmt (gsi_stmt (*gsi));
154838fd1498Szrj }
154938fd1498Szrj
155038fd1498Szrj /* Process one statement. If we identify a vector operation, expand it. */
155138fd1498Szrj
155238fd1498Szrj static void
expand_vector_operations_1(gimple_stmt_iterator * gsi)155338fd1498Szrj expand_vector_operations_1 (gimple_stmt_iterator *gsi)
155438fd1498Szrj {
155538fd1498Szrj tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
155638fd1498Szrj enum tree_code code;
155738fd1498Szrj optab op = unknown_optab;
155838fd1498Szrj enum gimple_rhs_class rhs_class;
155938fd1498Szrj tree new_rhs;
156038fd1498Szrj
156138fd1498Szrj /* Only consider code == GIMPLE_ASSIGN. */
156238fd1498Szrj gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
156338fd1498Szrj if (!stmt)
156438fd1498Szrj return;
156538fd1498Szrj
156638fd1498Szrj code = gimple_assign_rhs_code (stmt);
156738fd1498Szrj rhs_class = get_gimple_rhs_class (code);
156838fd1498Szrj lhs = gimple_assign_lhs (stmt);
156938fd1498Szrj
157038fd1498Szrj if (code == VEC_PERM_EXPR)
157138fd1498Szrj {
157238fd1498Szrj lower_vec_perm (gsi);
157338fd1498Szrj return;
157438fd1498Szrj }
157538fd1498Szrj
157638fd1498Szrj if (code == VEC_COND_EXPR)
157738fd1498Szrj {
157838fd1498Szrj expand_vector_condition (gsi);
157938fd1498Szrj return;
158038fd1498Szrj }
158138fd1498Szrj
158238fd1498Szrj if (code == COND_EXPR
158338fd1498Szrj && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
158438fd1498Szrj && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
158538fd1498Szrj {
158638fd1498Szrj expand_vector_scalar_condition (gsi);
158738fd1498Szrj return;
158838fd1498Szrj }
158938fd1498Szrj
159038fd1498Szrj if (code == CONSTRUCTOR
159138fd1498Szrj && TREE_CODE (lhs) == SSA_NAME
159238fd1498Szrj && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
159338fd1498Szrj && !gimple_clobber_p (stmt)
159438fd1498Szrj && optimize)
159538fd1498Szrj {
159638fd1498Szrj optimize_vector_constructor (gsi);
159738fd1498Szrj return;
159838fd1498Szrj }
159938fd1498Szrj
160038fd1498Szrj if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
160138fd1498Szrj return;
160238fd1498Szrj
160338fd1498Szrj rhs1 = gimple_assign_rhs1 (stmt);
160438fd1498Szrj type = gimple_expr_type (stmt);
160538fd1498Szrj if (rhs_class == GIMPLE_BINARY_RHS)
160638fd1498Szrj rhs2 = gimple_assign_rhs2 (stmt);
160738fd1498Szrj
160838fd1498Szrj if (!VECTOR_TYPE_P (type)
160938fd1498Szrj || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
161038fd1498Szrj return;
161138fd1498Szrj
1612*58e805e6Szrj /* A scalar operation pretending to be a vector one. */
1613*58e805e6Szrj if (VECTOR_BOOLEAN_TYPE_P (type)
1614*58e805e6Szrj && !VECTOR_MODE_P (TYPE_MODE (type))
1615*58e805e6Szrj && TYPE_MODE (type) != BLKmode)
1616*58e805e6Szrj return;
1617*58e805e6Szrj
161838fd1498Szrj /* If the vector operation is operating on all same vector elements
161938fd1498Szrj implement it with a scalar operation and a splat if the target
162038fd1498Szrj supports the scalar operation. */
162138fd1498Szrj tree srhs1, srhs2 = NULL_TREE;
162238fd1498Szrj if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
162338fd1498Szrj && (rhs2 == NULL_TREE
162438fd1498Szrj || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
162538fd1498Szrj && (srhs2 = rhs2))
162638fd1498Szrj || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
162738fd1498Szrj /* As we query direct optabs restrict to non-convert operations. */
162838fd1498Szrj && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
162938fd1498Szrj {
163038fd1498Szrj op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
163138fd1498Szrj if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
163238fd1498Szrj && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
163338fd1498Szrj {
163438fd1498Szrj tree slhs = make_ssa_name (TREE_TYPE (srhs1));
163538fd1498Szrj gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
163638fd1498Szrj gsi_insert_before (gsi, repl, GSI_SAME_STMT);
163738fd1498Szrj gimple_assign_set_rhs_from_tree (gsi,
163838fd1498Szrj build_vector_from_val (type, slhs));
163938fd1498Szrj update_stmt (stmt);
164038fd1498Szrj return;
164138fd1498Szrj }
164238fd1498Szrj }
164338fd1498Szrj
164438fd1498Szrj if (CONVERT_EXPR_CODE_P (code)
164538fd1498Szrj || code == FLOAT_EXPR
164638fd1498Szrj || code == FIX_TRUNC_EXPR
164738fd1498Szrj || code == VIEW_CONVERT_EXPR)
164838fd1498Szrj return;
164938fd1498Szrj
165038fd1498Szrj /* The signedness is determined from input argument. */
165138fd1498Szrj if (code == VEC_UNPACK_FLOAT_HI_EXPR
165238fd1498Szrj || code == VEC_UNPACK_FLOAT_LO_EXPR)
165338fd1498Szrj {
165438fd1498Szrj type = TREE_TYPE (rhs1);
165538fd1498Szrj /* We do not know how to scalarize those. */
165638fd1498Szrj return;
165738fd1498Szrj }
165838fd1498Szrj
165938fd1498Szrj /* For widening/narrowing vector operations, the relevant type is of the
166038fd1498Szrj arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
166138fd1498Szrj calculated in the same way above. */
166238fd1498Szrj if (code == WIDEN_SUM_EXPR
166338fd1498Szrj || code == VEC_WIDEN_MULT_HI_EXPR
166438fd1498Szrj || code == VEC_WIDEN_MULT_LO_EXPR
166538fd1498Szrj || code == VEC_WIDEN_MULT_EVEN_EXPR
166638fd1498Szrj || code == VEC_WIDEN_MULT_ODD_EXPR
166738fd1498Szrj || code == VEC_UNPACK_HI_EXPR
166838fd1498Szrj || code == VEC_UNPACK_LO_EXPR
166938fd1498Szrj || code == VEC_PACK_TRUNC_EXPR
167038fd1498Szrj || code == VEC_PACK_SAT_EXPR
167138fd1498Szrj || code == VEC_PACK_FIX_TRUNC_EXPR
167238fd1498Szrj || code == VEC_WIDEN_LSHIFT_HI_EXPR
167338fd1498Szrj || code == VEC_WIDEN_LSHIFT_LO_EXPR)
167438fd1498Szrj {
167538fd1498Szrj type = TREE_TYPE (rhs1);
167638fd1498Szrj /* We do not know how to scalarize those. */
167738fd1498Szrj return;
167838fd1498Szrj }
167938fd1498Szrj
168038fd1498Szrj /* Choose between vector shift/rotate by vector and vector shift/rotate by
168138fd1498Szrj scalar */
168238fd1498Szrj if (code == LSHIFT_EXPR
168338fd1498Szrj || code == RSHIFT_EXPR
168438fd1498Szrj || code == LROTATE_EXPR
168538fd1498Szrj || code == RROTATE_EXPR)
168638fd1498Szrj {
168738fd1498Szrj optab opv;
168838fd1498Szrj
168938fd1498Szrj /* Check whether we have vector <op> {x,x,x,x} where x
169038fd1498Szrj could be a scalar variable or a constant. Transform
169138fd1498Szrj vector <op> {x,x,x,x} ==> vector <op> scalar. */
169238fd1498Szrj if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
169338fd1498Szrj {
169438fd1498Szrj tree first;
169538fd1498Szrj
169638fd1498Szrj if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
169738fd1498Szrj {
169838fd1498Szrj gimple_assign_set_rhs2 (stmt, first);
169938fd1498Szrj update_stmt (stmt);
170038fd1498Szrj rhs2 = first;
170138fd1498Szrj }
170238fd1498Szrj }
170338fd1498Szrj
170438fd1498Szrj opv = optab_for_tree_code (code, type, optab_vector);
170538fd1498Szrj if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
170638fd1498Szrj op = opv;
170738fd1498Szrj else
170838fd1498Szrj {
170938fd1498Szrj op = optab_for_tree_code (code, type, optab_scalar);
171038fd1498Szrj
171138fd1498Szrj compute_type = get_compute_type (code, op, type);
171238fd1498Szrj if (compute_type == type)
171338fd1498Szrj return;
171438fd1498Szrj /* The rtl expander will expand vector/scalar as vector/vector
171538fd1498Szrj if necessary. Pick one with wider vector type. */
171638fd1498Szrj tree compute_vtype = get_compute_type (code, opv, type);
171738fd1498Szrj if (subparts_gt (compute_vtype, compute_type))
171838fd1498Szrj {
171938fd1498Szrj compute_type = compute_vtype;
172038fd1498Szrj op = opv;
172138fd1498Szrj }
172238fd1498Szrj }
172338fd1498Szrj
172438fd1498Szrj if (code == LROTATE_EXPR || code == RROTATE_EXPR)
172538fd1498Szrj {
172638fd1498Szrj if (compute_type == NULL_TREE)
172738fd1498Szrj compute_type = get_compute_type (code, op, type);
172838fd1498Szrj if (compute_type == type)
172938fd1498Szrj return;
173038fd1498Szrj /* Before splitting vector rotates into scalar rotates,
173138fd1498Szrj see if we can't use vector shifts and BIT_IOR_EXPR
173238fd1498Szrj instead. For vector by vector rotates we'd also
173338fd1498Szrj need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
173438fd1498Szrj for now, fold doesn't seem to create such rotates anyway. */
173538fd1498Szrj if (compute_type == TREE_TYPE (type)
173638fd1498Szrj && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
173738fd1498Szrj {
173838fd1498Szrj optab oplv = vashl_optab, opl = ashl_optab;
173938fd1498Szrj optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
174038fd1498Szrj tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
174138fd1498Szrj tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
174238fd1498Szrj tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
174338fd1498Szrj tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
174438fd1498Szrj tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
174538fd1498Szrj /* The rtl expander will expand vector/scalar as vector/vector
174638fd1498Szrj if necessary. Pick one with wider vector type. */
174738fd1498Szrj if (subparts_gt (compute_lvtype, compute_ltype))
174838fd1498Szrj {
174938fd1498Szrj compute_ltype = compute_lvtype;
175038fd1498Szrj opl = oplv;
175138fd1498Szrj }
175238fd1498Szrj if (subparts_gt (compute_rvtype, compute_rtype))
175338fd1498Szrj {
175438fd1498Szrj compute_rtype = compute_rvtype;
175538fd1498Szrj opr = oprv;
175638fd1498Szrj }
175738fd1498Szrj /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
175838fd1498Szrj BIT_IOR_EXPR. */
175938fd1498Szrj compute_type = compute_ltype;
176038fd1498Szrj if (subparts_gt (compute_type, compute_rtype))
176138fd1498Szrj compute_type = compute_rtype;
176238fd1498Szrj if (subparts_gt (compute_type, compute_otype))
176338fd1498Szrj compute_type = compute_otype;
176438fd1498Szrj /* Verify all 3 operations can be performed in that type. */
176538fd1498Szrj if (compute_type != TREE_TYPE (type))
176638fd1498Szrj {
176738fd1498Szrj if (optab_handler (opl, TYPE_MODE (compute_type))
176838fd1498Szrj == CODE_FOR_nothing
176938fd1498Szrj || optab_handler (opr, TYPE_MODE (compute_type))
177038fd1498Szrj == CODE_FOR_nothing
177138fd1498Szrj || optab_handler (opo, TYPE_MODE (compute_type))
177238fd1498Szrj == CODE_FOR_nothing)
177338fd1498Szrj compute_type = TREE_TYPE (type);
177438fd1498Szrj }
177538fd1498Szrj }
177638fd1498Szrj }
177738fd1498Szrj }
177838fd1498Szrj else
177938fd1498Szrj op = optab_for_tree_code (code, type, optab_default);
178038fd1498Szrj
178138fd1498Szrj /* Optabs will try converting a negation into a subtraction, so
178238fd1498Szrj look for it as well. TODO: negation of floating-point vectors
178338fd1498Szrj might be turned into an exclusive OR toggling the sign bit. */
178438fd1498Szrj if (op == unknown_optab
178538fd1498Szrj && code == NEGATE_EXPR
178638fd1498Szrj && INTEGRAL_TYPE_P (TREE_TYPE (type)))
178738fd1498Szrj op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
178838fd1498Szrj
178938fd1498Szrj if (compute_type == NULL_TREE)
179038fd1498Szrj compute_type = get_compute_type (code, op, type);
179138fd1498Szrj if (compute_type == type)
179238fd1498Szrj return;
179338fd1498Szrj
179438fd1498Szrj new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
179538fd1498Szrj
179638fd1498Szrj /* Leave expression untouched for later expansion. */
179738fd1498Szrj if (new_rhs == NULL_TREE)
179838fd1498Szrj return;
179938fd1498Szrj
180038fd1498Szrj if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
180138fd1498Szrj new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
180238fd1498Szrj new_rhs);
180338fd1498Szrj
180438fd1498Szrj /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
180538fd1498Szrj way to do it is change expand_vector_operation and its callees to
180638fd1498Szrj return a tree_code, RHS1 and RHS2 instead of a tree. */
180738fd1498Szrj gimple_assign_set_rhs_from_tree (gsi, new_rhs);
180838fd1498Szrj update_stmt (gsi_stmt (*gsi));
180938fd1498Szrj }
181038fd1498Szrj
181138fd1498Szrj /* Use this to lower vector operations introduced by the vectorizer,
181238fd1498Szrj if it may need the bit-twiddling tricks implemented in this file. */
181338fd1498Szrj
181438fd1498Szrj static unsigned int
expand_vector_operations(void)181538fd1498Szrj expand_vector_operations (void)
181638fd1498Szrj {
181738fd1498Szrj gimple_stmt_iterator gsi;
181838fd1498Szrj basic_block bb;
181938fd1498Szrj bool cfg_changed = false;
182038fd1498Szrj
182138fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
182238fd1498Szrj {
182338fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
182438fd1498Szrj {
182538fd1498Szrj expand_vector_operations_1 (&gsi);
182638fd1498Szrj /* ??? If we do not cleanup EH then we will ICE in
182738fd1498Szrj verification. But in reality we have created wrong-code
182838fd1498Szrj as we did not properly transition EH info and edges to
182938fd1498Szrj the piecewise computations. */
183038fd1498Szrj if (maybe_clean_eh_stmt (gsi_stmt (gsi))
183138fd1498Szrj && gimple_purge_dead_eh_edges (bb))
183238fd1498Szrj cfg_changed = true;
183338fd1498Szrj }
183438fd1498Szrj }
183538fd1498Szrj
183638fd1498Szrj return cfg_changed ? TODO_cleanup_cfg : 0;
183738fd1498Szrj }
183838fd1498Szrj
183938fd1498Szrj namespace {
184038fd1498Szrj
184138fd1498Szrj const pass_data pass_data_lower_vector =
184238fd1498Szrj {
184338fd1498Szrj GIMPLE_PASS, /* type */
184438fd1498Szrj "veclower", /* name */
184538fd1498Szrj OPTGROUP_VEC, /* optinfo_flags */
184638fd1498Szrj TV_NONE, /* tv_id */
184738fd1498Szrj PROP_cfg, /* properties_required */
184838fd1498Szrj PROP_gimple_lvec, /* properties_provided */
184938fd1498Szrj 0, /* properties_destroyed */
185038fd1498Szrj 0, /* todo_flags_start */
185138fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
185238fd1498Szrj };
185338fd1498Szrj
185438fd1498Szrj class pass_lower_vector : public gimple_opt_pass
185538fd1498Szrj {
185638fd1498Szrj public:
pass_lower_vector(gcc::context * ctxt)185738fd1498Szrj pass_lower_vector (gcc::context *ctxt)
185838fd1498Szrj : gimple_opt_pass (pass_data_lower_vector, ctxt)
185938fd1498Szrj {}
186038fd1498Szrj
186138fd1498Szrj /* opt_pass methods: */
gate(function * fun)186238fd1498Szrj virtual bool gate (function *fun)
186338fd1498Szrj {
186438fd1498Szrj return !(fun->curr_properties & PROP_gimple_lvec);
186538fd1498Szrj }
186638fd1498Szrj
execute(function *)186738fd1498Szrj virtual unsigned int execute (function *)
186838fd1498Szrj {
186938fd1498Szrj return expand_vector_operations ();
187038fd1498Szrj }
187138fd1498Szrj
187238fd1498Szrj }; // class pass_lower_vector
187338fd1498Szrj
187438fd1498Szrj } // anon namespace
187538fd1498Szrj
187638fd1498Szrj gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)187738fd1498Szrj make_pass_lower_vector (gcc::context *ctxt)
187838fd1498Szrj {
187938fd1498Szrj return new pass_lower_vector (ctxt);
188038fd1498Szrj }
188138fd1498Szrj
188238fd1498Szrj namespace {
188338fd1498Szrj
188438fd1498Szrj const pass_data pass_data_lower_vector_ssa =
188538fd1498Szrj {
188638fd1498Szrj GIMPLE_PASS, /* type */
188738fd1498Szrj "veclower2", /* name */
188838fd1498Szrj OPTGROUP_VEC, /* optinfo_flags */
188938fd1498Szrj TV_NONE, /* tv_id */
189038fd1498Szrj PROP_cfg, /* properties_required */
189138fd1498Szrj PROP_gimple_lvec, /* properties_provided */
189238fd1498Szrj 0, /* properties_destroyed */
189338fd1498Szrj 0, /* todo_flags_start */
189438fd1498Szrj ( TODO_update_ssa
189538fd1498Szrj | TODO_cleanup_cfg ), /* todo_flags_finish */
189638fd1498Szrj };
189738fd1498Szrj
189838fd1498Szrj class pass_lower_vector_ssa : public gimple_opt_pass
189938fd1498Szrj {
190038fd1498Szrj public:
pass_lower_vector_ssa(gcc::context * ctxt)190138fd1498Szrj pass_lower_vector_ssa (gcc::context *ctxt)
190238fd1498Szrj : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
190338fd1498Szrj {}
190438fd1498Szrj
190538fd1498Szrj /* opt_pass methods: */
clone()190638fd1498Szrj opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)190738fd1498Szrj virtual unsigned int execute (function *)
190838fd1498Szrj {
190938fd1498Szrj return expand_vector_operations ();
191038fd1498Szrj }
191138fd1498Szrj
191238fd1498Szrj }; // class pass_lower_vector_ssa
191338fd1498Szrj
191438fd1498Szrj } // anon namespace
191538fd1498Szrj
191638fd1498Szrj gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)191738fd1498Szrj make_pass_lower_vector_ssa (gcc::context *ctxt)
191838fd1498Szrj {
191938fd1498Szrj return new pass_lower_vector_ssa (ctxt);
192038fd1498Szrj }
192138fd1498Szrj
192238fd1498Szrj #include "gt-tree-vect-generic.h"
1923