138fd1498Szrj /* Global, SSA-based optimizations using mathematical identities.
238fd1498Szrj Copyright (C) 2005-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it
738fd1498Szrj under the terms of the GNU General Public License as published by the
838fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
938fd1498Szrj later version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1238fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj /* Currently, the only mini-pass in this file tries to CSE reciprocal
2138fd1498Szrj operations. These are common in sequences such as this one:
2238fd1498Szrj
2338fd1498Szrj modulus = sqrt(x*x + y*y + z*z);
2438fd1498Szrj x = x / modulus;
2538fd1498Szrj y = y / modulus;
2638fd1498Szrj z = z / modulus;
2738fd1498Szrj
2838fd1498Szrj that can be optimized to
2938fd1498Szrj
3038fd1498Szrj modulus = sqrt(x*x + y*y + z*z);
3138fd1498Szrj rmodulus = 1.0 / modulus;
3238fd1498Szrj x = x * rmodulus;
3338fd1498Szrj y = y * rmodulus;
3438fd1498Szrj z = z * rmodulus;
3538fd1498Szrj
3638fd1498Szrj We do this for loop invariant divisors, and with this pass whenever
3738fd1498Szrj we notice that a division has the same divisor multiple times.
3838fd1498Szrj
3938fd1498Szrj Of course, like in PRE, we don't insert a division if a dominator
4038fd1498Szrj already has one. However, this cannot be done as an extension of
4138fd1498Szrj PRE for several reasons.
4238fd1498Szrj
4338fd1498Szrj First of all, with some experiments it was found out that the
4438fd1498Szrj transformation is not always useful if there are only two divisions
4538fd1498Szrj by the same divisor. This is probably because modern processors
4638fd1498Szrj can pipeline the divisions; on older, in-order processors it should
4738fd1498Szrj still be effective to optimize two divisions by the same number.
4838fd1498Szrj We make this a param, and it shall be called N in the remainder of
4938fd1498Szrj this comment.
5038fd1498Szrj
5138fd1498Szrj Second, if trapping math is active, we have less freedom on where
5238fd1498Szrj to insert divisions: we can only do so in basic blocks that already
5338fd1498Szrj contain one. (If divisions don't trap, instead, we can insert
5438fd1498Szrj divisions elsewhere, which will be in blocks that are common dominators
5538fd1498Szrj of those that have the division).
5638fd1498Szrj
5738fd1498Szrj We really don't want to compute the reciprocal unless a division will
5838fd1498Szrj be found. To do this, we won't insert the division in a basic block
5938fd1498Szrj that has less than N divisions *post-dominating* it.
6038fd1498Szrj
6138fd1498Szrj The algorithm constructs a subset of the dominator tree, holding the
6238fd1498Szrj blocks containing the divisions and the common dominators to them,
6338fd1498Szrj and walk it twice. The first walk is in post-order, and it annotates
6438fd1498Szrj each block with the number of divisions that post-dominate it: this
6538fd1498Szrj gives information on where divisions can be inserted profitably.
6638fd1498Szrj The second walk is in pre-order, and it inserts divisions as explained
6738fd1498Szrj above, and replaces divisions by multiplications.
6838fd1498Szrj
6938fd1498Szrj In the best case, the cost of the pass is O(n_statements). In the
7038fd1498Szrj worst-case, the cost is due to creating the dominator tree subset,
7138fd1498Szrj with a cost of O(n_basic_blocks ^ 2); however this can only happen
7238fd1498Szrj for n_statements / n_basic_blocks statements. So, the amortized cost
7338fd1498Szrj of creating the dominator tree subset is O(n_basic_blocks) and the
7438fd1498Szrj worst-case cost of the pass is O(n_statements * n_basic_blocks).
7538fd1498Szrj
7638fd1498Szrj More practically, the cost will be small because there are few
7738fd1498Szrj divisions, and they tend to be in the same basic block, so insert_bb
7838fd1498Szrj is called very few times.
7938fd1498Szrj
8038fd1498Szrj If we did this using domwalk.c, an efficient implementation would have
8138fd1498Szrj to work on all the variables in a single pass, because we could not
8238fd1498Szrj work on just a subset of the dominator tree, as we do now, and the
8338fd1498Szrj cost would also be something like O(n_statements * n_basic_blocks).
8438fd1498Szrj The data structures would be more complex in order to work on all the
8538fd1498Szrj variables in a single pass. */
8638fd1498Szrj
8738fd1498Szrj #include "config.h"
8838fd1498Szrj #include "system.h"
8938fd1498Szrj #include "coretypes.h"
9038fd1498Szrj #include "backend.h"
9138fd1498Szrj #include "target.h"
9238fd1498Szrj #include "rtl.h"
9338fd1498Szrj #include "tree.h"
9438fd1498Szrj #include "gimple.h"
9538fd1498Szrj #include "predict.h"
9638fd1498Szrj #include "alloc-pool.h"
9738fd1498Szrj #include "tree-pass.h"
9838fd1498Szrj #include "ssa.h"
9938fd1498Szrj #include "optabs-tree.h"
10038fd1498Szrj #include "gimple-pretty-print.h"
10138fd1498Szrj #include "alias.h"
10238fd1498Szrj #include "fold-const.h"
10338fd1498Szrj #include "gimple-fold.h"
10438fd1498Szrj #include "gimple-iterator.h"
10538fd1498Szrj #include "gimplify.h"
10638fd1498Szrj #include "gimplify-me.h"
10738fd1498Szrj #include "stor-layout.h"
10838fd1498Szrj #include "tree-cfg.h"
10938fd1498Szrj #include "tree-dfa.h"
11038fd1498Szrj #include "tree-ssa.h"
11138fd1498Szrj #include "builtins.h"
11238fd1498Szrj #include "params.h"
11338fd1498Szrj #include "internal-fn.h"
11438fd1498Szrj #include "case-cfn-macros.h"
11538fd1498Szrj #include "optabs-libfuncs.h"
11638fd1498Szrj #include "tree-eh.h"
11738fd1498Szrj #include "targhooks.h"
11838fd1498Szrj #include "domwalk.h"
11938fd1498Szrj
12038fd1498Szrj /* This structure represents one basic block that either computes a
12138fd1498Szrj division, or is a common dominator for basic block that compute a
12238fd1498Szrj division. */
12338fd1498Szrj struct occurrence {
12438fd1498Szrj /* The basic block represented by this structure. */
12538fd1498Szrj basic_block bb;
12638fd1498Szrj
12738fd1498Szrj /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
12838fd1498Szrj inserted in BB. */
12938fd1498Szrj tree recip_def;
13038fd1498Szrj
13138fd1498Szrj /* If non-NULL, the SSA_NAME holding the definition for a squared
13238fd1498Szrj reciprocal inserted in BB. */
13338fd1498Szrj tree square_recip_def;
13438fd1498Szrj
13538fd1498Szrj /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
13638fd1498Szrj was inserted in BB. */
13738fd1498Szrj gimple *recip_def_stmt;
13838fd1498Szrj
13938fd1498Szrj /* Pointer to a list of "struct occurrence"s for blocks dominated
14038fd1498Szrj by BB. */
14138fd1498Szrj struct occurrence *children;
14238fd1498Szrj
14338fd1498Szrj /* Pointer to the next "struct occurrence"s in the list of blocks
14438fd1498Szrj sharing a common dominator. */
14538fd1498Szrj struct occurrence *next;
14638fd1498Szrj
14738fd1498Szrj /* The number of divisions that are in BB before compute_merit. The
14838fd1498Szrj number of divisions that are in BB or post-dominate it after
14938fd1498Szrj compute_merit. */
15038fd1498Szrj int num_divisions;
15138fd1498Szrj
15238fd1498Szrj /* True if the basic block has a division, false if it is a common
15338fd1498Szrj dominator for basic blocks that do. If it is false and trapping
15438fd1498Szrj math is active, BB is not a candidate for inserting a reciprocal. */
15538fd1498Szrj bool bb_has_division;
15638fd1498Szrj };
15738fd1498Szrj
15838fd1498Szrj static struct
15938fd1498Szrj {
16038fd1498Szrj /* Number of 1.0/X ops inserted. */
16138fd1498Szrj int rdivs_inserted;
16238fd1498Szrj
16338fd1498Szrj /* Number of 1.0/FUNC ops inserted. */
16438fd1498Szrj int rfuncs_inserted;
16538fd1498Szrj } reciprocal_stats;
16638fd1498Szrj
16738fd1498Szrj static struct
16838fd1498Szrj {
16938fd1498Szrj /* Number of cexpi calls inserted. */
17038fd1498Szrj int inserted;
17138fd1498Szrj } sincos_stats;
17238fd1498Szrj
17338fd1498Szrj static struct
17438fd1498Szrj {
17538fd1498Szrj /* Number of widening multiplication ops inserted. */
17638fd1498Szrj int widen_mults_inserted;
17738fd1498Szrj
17838fd1498Szrj /* Number of integer multiply-and-accumulate ops inserted. */
17938fd1498Szrj int maccs_inserted;
18038fd1498Szrj
18138fd1498Szrj /* Number of fp fused multiply-add ops inserted. */
18238fd1498Szrj int fmas_inserted;
18338fd1498Szrj
18438fd1498Szrj /* Number of divmod calls inserted. */
18538fd1498Szrj int divmod_calls_inserted;
18638fd1498Szrj } widen_mul_stats;
18738fd1498Szrj
18838fd1498Szrj /* The instance of "struct occurrence" representing the highest
18938fd1498Szrj interesting block in the dominator tree. */
19038fd1498Szrj static struct occurrence *occ_head;
19138fd1498Szrj
19238fd1498Szrj /* Allocation pool for getting instances of "struct occurrence". */
19338fd1498Szrj static object_allocator<occurrence> *occ_pool;
19438fd1498Szrj
19538fd1498Szrj
19638fd1498Szrj
19738fd1498Szrj /* Allocate and return a new struct occurrence for basic block BB, and
19838fd1498Szrj whose children list is headed by CHILDREN. */
19938fd1498Szrj static struct occurrence *
occ_new(basic_block bb,struct occurrence * children)20038fd1498Szrj occ_new (basic_block bb, struct occurrence *children)
20138fd1498Szrj {
20238fd1498Szrj struct occurrence *occ;
20338fd1498Szrj
20438fd1498Szrj bb->aux = occ = occ_pool->allocate ();
20538fd1498Szrj memset (occ, 0, sizeof (struct occurrence));
20638fd1498Szrj
20738fd1498Szrj occ->bb = bb;
20838fd1498Szrj occ->children = children;
20938fd1498Szrj return occ;
21038fd1498Szrj }
21138fd1498Szrj
21238fd1498Szrj
21338fd1498Szrj /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
21438fd1498Szrj list of "struct occurrence"s, one per basic block, having IDOM as
21538fd1498Szrj their common dominator.
21638fd1498Szrj
21738fd1498Szrj We try to insert NEW_OCC as deep as possible in the tree, and we also
21838fd1498Szrj insert any other block that is a common dominator for BB and one
21938fd1498Szrj block already in the tree. */
22038fd1498Szrj
22138fd1498Szrj static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)22238fd1498Szrj insert_bb (struct occurrence *new_occ, basic_block idom,
22338fd1498Szrj struct occurrence **p_head)
22438fd1498Szrj {
22538fd1498Szrj struct occurrence *occ, **p_occ;
22638fd1498Szrj
22738fd1498Szrj for (p_occ = p_head; (occ = *p_occ) != NULL; )
22838fd1498Szrj {
22938fd1498Szrj basic_block bb = new_occ->bb, occ_bb = occ->bb;
23038fd1498Szrj basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
23138fd1498Szrj if (dom == bb)
23238fd1498Szrj {
23338fd1498Szrj /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
23438fd1498Szrj from its list. */
23538fd1498Szrj *p_occ = occ->next;
23638fd1498Szrj occ->next = new_occ->children;
23738fd1498Szrj new_occ->children = occ;
23838fd1498Szrj
23938fd1498Szrj /* Try the next block (it may as well be dominated by BB). */
24038fd1498Szrj }
24138fd1498Szrj
24238fd1498Szrj else if (dom == occ_bb)
24338fd1498Szrj {
24438fd1498Szrj /* OCC_BB dominates BB. Tail recurse to look deeper. */
24538fd1498Szrj insert_bb (new_occ, dom, &occ->children);
24638fd1498Szrj return;
24738fd1498Szrj }
24838fd1498Szrj
24938fd1498Szrj else if (dom != idom)
25038fd1498Szrj {
25138fd1498Szrj gcc_assert (!dom->aux);
25238fd1498Szrj
25338fd1498Szrj /* There is a dominator between IDOM and BB, add it and make
25438fd1498Szrj two children out of NEW_OCC and OCC. First, remove OCC from
25538fd1498Szrj its list. */
25638fd1498Szrj *p_occ = occ->next;
25738fd1498Szrj new_occ->next = occ;
25838fd1498Szrj occ->next = NULL;
25938fd1498Szrj
26038fd1498Szrj /* None of the previous blocks has DOM as a dominator: if we tail
26138fd1498Szrj recursed, we would reexamine them uselessly. Just switch BB with
26238fd1498Szrj DOM, and go on looking for blocks dominated by DOM. */
26338fd1498Szrj new_occ = occ_new (dom, new_occ);
26438fd1498Szrj }
26538fd1498Szrj
26638fd1498Szrj else
26738fd1498Szrj {
26838fd1498Szrj /* Nothing special, go on with the next element. */
26938fd1498Szrj p_occ = &occ->next;
27038fd1498Szrj }
27138fd1498Szrj }
27238fd1498Szrj
27338fd1498Szrj /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
27438fd1498Szrj new_occ->next = *p_head;
27538fd1498Szrj *p_head = new_occ;
27638fd1498Szrj }
27738fd1498Szrj
27838fd1498Szrj /* Register that we found a division in BB.
27938fd1498Szrj IMPORTANCE is a measure of how much weighting to give
28038fd1498Szrj that division. Use IMPORTANCE = 2 to register a single
28138fd1498Szrj division. If the division is going to be found multiple
28238fd1498Szrj times use 1 (as it is with squares). */
28338fd1498Szrj
28438fd1498Szrj static inline void
register_division_in(basic_block bb,int importance)28538fd1498Szrj register_division_in (basic_block bb, int importance)
28638fd1498Szrj {
28738fd1498Szrj struct occurrence *occ;
28838fd1498Szrj
28938fd1498Szrj occ = (struct occurrence *) bb->aux;
29038fd1498Szrj if (!occ)
29138fd1498Szrj {
29238fd1498Szrj occ = occ_new (bb, NULL);
29338fd1498Szrj insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
29438fd1498Szrj }
29538fd1498Szrj
29638fd1498Szrj occ->bb_has_division = true;
29738fd1498Szrj occ->num_divisions += importance;
29838fd1498Szrj }
29938fd1498Szrj
30038fd1498Szrj
30138fd1498Szrj /* Compute the number of divisions that postdominate each block in OCC and
30238fd1498Szrj its children. */
30338fd1498Szrj
30438fd1498Szrj static void
compute_merit(struct occurrence * occ)30538fd1498Szrj compute_merit (struct occurrence *occ)
30638fd1498Szrj {
30738fd1498Szrj struct occurrence *occ_child;
30838fd1498Szrj basic_block dom = occ->bb;
30938fd1498Szrj
31038fd1498Szrj for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
31138fd1498Szrj {
31238fd1498Szrj basic_block bb;
31338fd1498Szrj if (occ_child->children)
31438fd1498Szrj compute_merit (occ_child);
31538fd1498Szrj
31638fd1498Szrj if (flag_exceptions)
31738fd1498Szrj bb = single_noncomplex_succ (dom);
31838fd1498Szrj else
31938fd1498Szrj bb = dom;
32038fd1498Szrj
32138fd1498Szrj if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
32238fd1498Szrj occ->num_divisions += occ_child->num_divisions;
32338fd1498Szrj }
32438fd1498Szrj }
32538fd1498Szrj
32638fd1498Szrj
32738fd1498Szrj /* Return whether USE_STMT is a floating-point division by DEF. */
32838fd1498Szrj static inline bool
is_division_by(gimple * use_stmt,tree def)32938fd1498Szrj is_division_by (gimple *use_stmt, tree def)
33038fd1498Szrj {
33138fd1498Szrj return is_gimple_assign (use_stmt)
33238fd1498Szrj && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
33338fd1498Szrj && gimple_assign_rhs2 (use_stmt) == def
33438fd1498Szrj /* Do not recognize x / x as valid division, as we are getting
33538fd1498Szrj confused later by replacing all immediate uses x in such
33638fd1498Szrj a stmt. */
33738fd1498Szrj && gimple_assign_rhs1 (use_stmt) != def;
33838fd1498Szrj }
33938fd1498Szrj
34038fd1498Szrj /* Return whether USE_STMT is DEF * DEF. */
34138fd1498Szrj static inline bool
is_square_of(gimple * use_stmt,tree def)34238fd1498Szrj is_square_of (gimple *use_stmt, tree def)
34338fd1498Szrj {
34438fd1498Szrj if (gimple_code (use_stmt) == GIMPLE_ASSIGN
34538fd1498Szrj && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
34638fd1498Szrj {
34738fd1498Szrj tree op0 = gimple_assign_rhs1 (use_stmt);
34838fd1498Szrj tree op1 = gimple_assign_rhs2 (use_stmt);
34938fd1498Szrj
35038fd1498Szrj return op0 == op1 && op0 == def;
35138fd1498Szrj }
35238fd1498Szrj return 0;
35338fd1498Szrj }
35438fd1498Szrj
35538fd1498Szrj /* Return whether USE_STMT is a floating-point division by
35638fd1498Szrj DEF * DEF. */
35738fd1498Szrj static inline bool
is_division_by_square(gimple * use_stmt,tree def)35838fd1498Szrj is_division_by_square (gimple *use_stmt, tree def)
35938fd1498Szrj {
36038fd1498Szrj if (gimple_code (use_stmt) == GIMPLE_ASSIGN
36138fd1498Szrj && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
36238fd1498Szrj && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
36338fd1498Szrj {
36438fd1498Szrj tree denominator = gimple_assign_rhs2 (use_stmt);
36538fd1498Szrj if (TREE_CODE (denominator) == SSA_NAME)
36638fd1498Szrj {
36738fd1498Szrj return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
36838fd1498Szrj }
36938fd1498Szrj }
37038fd1498Szrj return 0;
37138fd1498Szrj }
37238fd1498Szrj
37338fd1498Szrj /* Walk the subset of the dominator tree rooted at OCC, setting the
37438fd1498Szrj RECIP_DEF field to a definition of 1.0 / DEF that can be used in
37538fd1498Szrj the given basic block. The field may be left NULL, of course,
37638fd1498Szrj if it is not possible or profitable to do the optimization.
37738fd1498Szrj
37838fd1498Szrj DEF_BSI is an iterator pointing at the statement defining DEF.
37938fd1498Szrj If RECIP_DEF is set, a dominator already has a computation that can
38038fd1498Szrj be used.
38138fd1498Szrj
38238fd1498Szrj If should_insert_square_recip is set, then this also inserts
38338fd1498Szrj the square of the reciprocal immediately after the definition
38438fd1498Szrj of the reciprocal. */
38538fd1498Szrj
38638fd1498Szrj static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)38738fd1498Szrj insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
38838fd1498Szrj tree def, tree recip_def, tree square_recip_def,
38938fd1498Szrj int should_insert_square_recip, int threshold)
39038fd1498Szrj {
39138fd1498Szrj tree type;
39238fd1498Szrj gassign *new_stmt, *new_square_stmt;
39338fd1498Szrj gimple_stmt_iterator gsi;
39438fd1498Szrj struct occurrence *occ_child;
39538fd1498Szrj
39638fd1498Szrj if (!recip_def
39738fd1498Szrj && (occ->bb_has_division || !flag_trapping_math)
39838fd1498Szrj /* Divide by two as all divisions are counted twice in
39938fd1498Szrj the costing loop. */
40038fd1498Szrj && occ->num_divisions / 2 >= threshold)
40138fd1498Szrj {
40238fd1498Szrj /* Make a variable with the replacement and substitute it. */
40338fd1498Szrj type = TREE_TYPE (def);
40438fd1498Szrj recip_def = create_tmp_reg (type, "reciptmp");
40538fd1498Szrj new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
40638fd1498Szrj build_one_cst (type), def);
40738fd1498Szrj
40838fd1498Szrj if (should_insert_square_recip)
40938fd1498Szrj {
41038fd1498Szrj square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
41138fd1498Szrj new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
41238fd1498Szrj recip_def, recip_def);
41338fd1498Szrj }
41438fd1498Szrj
41538fd1498Szrj if (occ->bb_has_division)
41638fd1498Szrj {
41738fd1498Szrj /* Case 1: insert before an existing division. */
41838fd1498Szrj gsi = gsi_after_labels (occ->bb);
41938fd1498Szrj while (!gsi_end_p (gsi)
42038fd1498Szrj && (!is_division_by (gsi_stmt (gsi), def))
42138fd1498Szrj && (!is_division_by_square (gsi_stmt (gsi), def)))
42238fd1498Szrj gsi_next (&gsi);
42338fd1498Szrj
42438fd1498Szrj gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
42558e805e6Szrj if (should_insert_square_recip)
42658e805e6Szrj gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
42738fd1498Szrj }
42838fd1498Szrj else if (def_gsi && occ->bb == def_gsi->bb)
42938fd1498Szrj {
43038fd1498Szrj /* Case 2: insert right after the definition. Note that this will
43138fd1498Szrj never happen if the definition statement can throw, because in
43238fd1498Szrj that case the sole successor of the statement's basic block will
43338fd1498Szrj dominate all the uses as well. */
43438fd1498Szrj gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
43558e805e6Szrj if (should_insert_square_recip)
43658e805e6Szrj gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
43738fd1498Szrj }
43838fd1498Szrj else
43938fd1498Szrj {
44038fd1498Szrj /* Case 3: insert in a basic block not containing defs/uses. */
44138fd1498Szrj gsi = gsi_after_labels (occ->bb);
44238fd1498Szrj gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
44338fd1498Szrj if (should_insert_square_recip)
44438fd1498Szrj gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
44558e805e6Szrj }
44638fd1498Szrj
44738fd1498Szrj reciprocal_stats.rdivs_inserted++;
44838fd1498Szrj
44938fd1498Szrj occ->recip_def_stmt = new_stmt;
45038fd1498Szrj }
45138fd1498Szrj
45238fd1498Szrj occ->recip_def = recip_def;
45338fd1498Szrj occ->square_recip_def = square_recip_def;
45438fd1498Szrj for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
45538fd1498Szrj insert_reciprocals (def_gsi, occ_child, def, recip_def,
45638fd1498Szrj square_recip_def, should_insert_square_recip,
45738fd1498Szrj threshold);
45838fd1498Szrj }
45938fd1498Szrj
46038fd1498Szrj /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
46138fd1498Szrj Take as argument the use for (x * x). */
46238fd1498Szrj static inline void
replace_reciprocal_squares(use_operand_p use_p)46338fd1498Szrj replace_reciprocal_squares (use_operand_p use_p)
46438fd1498Szrj {
46538fd1498Szrj gimple *use_stmt = USE_STMT (use_p);
46638fd1498Szrj basic_block bb = gimple_bb (use_stmt);
46738fd1498Szrj struct occurrence *occ = (struct occurrence *) bb->aux;
46838fd1498Szrj
46938fd1498Szrj if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
47038fd1498Szrj && occ->recip_def)
47138fd1498Szrj {
47238fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
47338fd1498Szrj gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
47438fd1498Szrj gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
47538fd1498Szrj SET_USE (use_p, occ->square_recip_def);
47638fd1498Szrj fold_stmt_inplace (&gsi);
47738fd1498Szrj update_stmt (use_stmt);
47838fd1498Szrj }
47938fd1498Szrj }
48038fd1498Szrj
48138fd1498Szrj
48238fd1498Szrj /* Replace the division at USE_P with a multiplication by the reciprocal, if
48338fd1498Szrj possible. */
48438fd1498Szrj
48538fd1498Szrj static inline void
replace_reciprocal(use_operand_p use_p)48638fd1498Szrj replace_reciprocal (use_operand_p use_p)
48738fd1498Szrj {
48838fd1498Szrj gimple *use_stmt = USE_STMT (use_p);
48938fd1498Szrj basic_block bb = gimple_bb (use_stmt);
49038fd1498Szrj struct occurrence *occ = (struct occurrence *) bb->aux;
49138fd1498Szrj
49238fd1498Szrj if (optimize_bb_for_speed_p (bb)
49338fd1498Szrj && occ->recip_def && use_stmt != occ->recip_def_stmt)
49438fd1498Szrj {
49538fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
49638fd1498Szrj gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
49738fd1498Szrj SET_USE (use_p, occ->recip_def);
49838fd1498Szrj fold_stmt_inplace (&gsi);
49938fd1498Szrj update_stmt (use_stmt);
50038fd1498Szrj }
50138fd1498Szrj }
50238fd1498Szrj
50338fd1498Szrj
50438fd1498Szrj /* Free OCC and return one more "struct occurrence" to be freed. */
50538fd1498Szrj
50638fd1498Szrj static struct occurrence *
free_bb(struct occurrence * occ)50738fd1498Szrj free_bb (struct occurrence *occ)
50838fd1498Szrj {
50938fd1498Szrj struct occurrence *child, *next;
51038fd1498Szrj
51138fd1498Szrj /* First get the two pointers hanging off OCC. */
51238fd1498Szrj next = occ->next;
51338fd1498Szrj child = occ->children;
51438fd1498Szrj occ->bb->aux = NULL;
51538fd1498Szrj occ_pool->remove (occ);
51638fd1498Szrj
51738fd1498Szrj /* Now ensure that we don't recurse unless it is necessary. */
51838fd1498Szrj if (!child)
51938fd1498Szrj return next;
52038fd1498Szrj else
52138fd1498Szrj {
52238fd1498Szrj while (next)
52338fd1498Szrj next = free_bb (next);
52438fd1498Szrj
52538fd1498Szrj return child;
52638fd1498Szrj }
52738fd1498Szrj }
52838fd1498Szrj
52938fd1498Szrj
53038fd1498Szrj /* Look for floating-point divisions among DEF's uses, and try to
53138fd1498Szrj replace them by multiplications with the reciprocal. Add
53238fd1498Szrj as many statements computing the reciprocal as needed.
53338fd1498Szrj
53438fd1498Szrj DEF must be a GIMPLE register of a floating-point type. */
53538fd1498Szrj
53638fd1498Szrj static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)53738fd1498Szrj execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
53838fd1498Szrj {
53938fd1498Szrj use_operand_p use_p, square_use_p;
54038fd1498Szrj imm_use_iterator use_iter, square_use_iter;
54138fd1498Szrj tree square_def;
54238fd1498Szrj struct occurrence *occ;
54338fd1498Szrj int count = 0;
54438fd1498Szrj int threshold;
54538fd1498Szrj int square_recip_count = 0;
54638fd1498Szrj int sqrt_recip_count = 0;
54738fd1498Szrj
54838fd1498Szrj gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
54938fd1498Szrj threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
55038fd1498Szrj
55138fd1498Szrj /* If DEF is a square (x * x), count the number of divisions by x.
55238fd1498Szrj If there are more divisions by x than by (DEF * DEF), prefer to optimize
55338fd1498Szrj the reciprocal of x instead of DEF. This improves cases like:
55438fd1498Szrj def = x * x
55538fd1498Szrj t0 = a / def
55638fd1498Szrj t1 = b / def
55738fd1498Szrj t2 = c / x
55838fd1498Szrj Reciprocal optimization of x results in 1 division rather than 2 or 3. */
55938fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (def);
56038fd1498Szrj
56138fd1498Szrj if (is_gimple_assign (def_stmt)
56238fd1498Szrj && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
56338fd1498Szrj && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
56438fd1498Szrj && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
56538fd1498Szrj {
56638fd1498Szrj tree op0 = gimple_assign_rhs1 (def_stmt);
56738fd1498Szrj
56838fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
56938fd1498Szrj {
57038fd1498Szrj gimple *use_stmt = USE_STMT (use_p);
57138fd1498Szrj if (is_division_by (use_stmt, op0))
57238fd1498Szrj sqrt_recip_count++;
57338fd1498Szrj }
57438fd1498Szrj }
57538fd1498Szrj
57638fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
57738fd1498Szrj {
57838fd1498Szrj gimple *use_stmt = USE_STMT (use_p);
57938fd1498Szrj if (is_division_by (use_stmt, def))
58038fd1498Szrj {
58138fd1498Szrj register_division_in (gimple_bb (use_stmt), 2);
58238fd1498Szrj count++;
58338fd1498Szrj }
58438fd1498Szrj
58538fd1498Szrj if (is_square_of (use_stmt, def))
58638fd1498Szrj {
58738fd1498Szrj square_def = gimple_assign_lhs (use_stmt);
58838fd1498Szrj FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
58938fd1498Szrj {
59038fd1498Szrj gimple *square_use_stmt = USE_STMT (square_use_p);
59138fd1498Szrj if (is_division_by (square_use_stmt, square_def))
59238fd1498Szrj {
59338fd1498Szrj /* This is executed twice for each division by a square. */
59438fd1498Szrj register_division_in (gimple_bb (square_use_stmt), 1);
59538fd1498Szrj square_recip_count++;
59638fd1498Szrj }
59738fd1498Szrj }
59838fd1498Szrj }
59938fd1498Szrj }
60038fd1498Szrj
60138fd1498Szrj /* Square reciprocals were counted twice above. */
60238fd1498Szrj square_recip_count /= 2;
60338fd1498Szrj
60438fd1498Szrj /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
60538fd1498Szrj if (sqrt_recip_count > square_recip_count)
606*e215fc28Szrj goto out;
60738fd1498Szrj
60838fd1498Szrj /* Do the expensive part only if we can hope to optimize something. */
60938fd1498Szrj if (count + square_recip_count >= threshold && count >= 1)
61038fd1498Szrj {
61138fd1498Szrj gimple *use_stmt;
61238fd1498Szrj for (occ = occ_head; occ; occ = occ->next)
61338fd1498Szrj {
61438fd1498Szrj compute_merit (occ);
61538fd1498Szrj insert_reciprocals (def_gsi, occ, def, NULL, NULL,
61638fd1498Szrj square_recip_count, threshold);
61738fd1498Szrj }
61838fd1498Szrj
61938fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
62038fd1498Szrj {
62138fd1498Szrj if (is_division_by (use_stmt, def))
62238fd1498Szrj {
62338fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
62438fd1498Szrj replace_reciprocal (use_p);
62538fd1498Szrj }
62638fd1498Szrj else if (square_recip_count > 0 && is_square_of (use_stmt, def))
62738fd1498Szrj {
62838fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
62938fd1498Szrj {
63038fd1498Szrj /* Find all uses of the square that are divisions and
63138fd1498Szrj * replace them by multiplications with the inverse. */
63238fd1498Szrj imm_use_iterator square_iterator;
63338fd1498Szrj gimple *powmult_use_stmt = USE_STMT (use_p);
63438fd1498Szrj tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
63538fd1498Szrj
63638fd1498Szrj FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
63738fd1498Szrj square_iterator, powmult_def_name)
63838fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
63938fd1498Szrj {
64038fd1498Szrj gimple *powmult_use_stmt = USE_STMT (square_use_p);
64138fd1498Szrj if (is_division_by (powmult_use_stmt, powmult_def_name))
64238fd1498Szrj replace_reciprocal_squares (square_use_p);
64338fd1498Szrj }
64438fd1498Szrj }
64538fd1498Szrj }
64638fd1498Szrj }
64738fd1498Szrj }
64838fd1498Szrj
649*e215fc28Szrj out:
65038fd1498Szrj for (occ = occ_head; occ; )
65138fd1498Szrj occ = free_bb (occ);
65238fd1498Szrj
65338fd1498Szrj occ_head = NULL;
65438fd1498Szrj }
65538fd1498Szrj
65638fd1498Szrj /* Return an internal function that implements the reciprocal of CALL,
65738fd1498Szrj or IFN_LAST if there is no such function that the target supports. */
65838fd1498Szrj
65938fd1498Szrj internal_fn
internal_fn_reciprocal(gcall * call)66038fd1498Szrj internal_fn_reciprocal (gcall *call)
66138fd1498Szrj {
66238fd1498Szrj internal_fn ifn;
66338fd1498Szrj
66438fd1498Szrj switch (gimple_call_combined_fn (call))
66538fd1498Szrj {
66638fd1498Szrj CASE_CFN_SQRT:
66738fd1498Szrj CASE_CFN_SQRT_FN:
66838fd1498Szrj ifn = IFN_RSQRT;
66938fd1498Szrj break;
67038fd1498Szrj
67138fd1498Szrj default:
67238fd1498Szrj return IFN_LAST;
67338fd1498Szrj }
67438fd1498Szrj
67538fd1498Szrj tree_pair types = direct_internal_fn_types (ifn, call);
67638fd1498Szrj if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
67738fd1498Szrj return IFN_LAST;
67838fd1498Szrj
67938fd1498Szrj return ifn;
68038fd1498Szrj }
68138fd1498Szrj
68238fd1498Szrj /* Go through all the floating-point SSA_NAMEs, and call
68338fd1498Szrj execute_cse_reciprocals_1 on each of them. */
68438fd1498Szrj namespace {
68538fd1498Szrj
68638fd1498Szrj const pass_data pass_data_cse_reciprocals =
68738fd1498Szrj {
68838fd1498Szrj GIMPLE_PASS, /* type */
68938fd1498Szrj "recip", /* name */
69038fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
69138fd1498Szrj TV_TREE_RECIP, /* tv_id */
69238fd1498Szrj PROP_ssa, /* properties_required */
69338fd1498Szrj 0, /* properties_provided */
69438fd1498Szrj 0, /* properties_destroyed */
69538fd1498Szrj 0, /* todo_flags_start */
69638fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
69738fd1498Szrj };
69838fd1498Szrj
69938fd1498Szrj class pass_cse_reciprocals : public gimple_opt_pass
70038fd1498Szrj {
70138fd1498Szrj public:
pass_cse_reciprocals(gcc::context * ctxt)70238fd1498Szrj pass_cse_reciprocals (gcc::context *ctxt)
70338fd1498Szrj : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
70438fd1498Szrj {}
70538fd1498Szrj
70638fd1498Szrj /* opt_pass methods: */
gate(function *)70738fd1498Szrj virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
70838fd1498Szrj virtual unsigned int execute (function *);
70938fd1498Szrj
71038fd1498Szrj }; // class pass_cse_reciprocals
71138fd1498Szrj
71238fd1498Szrj unsigned int
execute(function * fun)71338fd1498Szrj pass_cse_reciprocals::execute (function *fun)
71438fd1498Szrj {
71538fd1498Szrj basic_block bb;
71638fd1498Szrj tree arg;
71738fd1498Szrj
71838fd1498Szrj occ_pool = new object_allocator<occurrence> ("dominators for recip");
71938fd1498Szrj
72038fd1498Szrj memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
72138fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
72238fd1498Szrj calculate_dominance_info (CDI_POST_DOMINATORS);
72338fd1498Szrj
72438fd1498Szrj if (flag_checking)
72538fd1498Szrj FOR_EACH_BB_FN (bb, fun)
72638fd1498Szrj gcc_assert (!bb->aux);
72738fd1498Szrj
72838fd1498Szrj for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
72938fd1498Szrj if (FLOAT_TYPE_P (TREE_TYPE (arg))
73038fd1498Szrj && is_gimple_reg (arg))
73138fd1498Szrj {
73238fd1498Szrj tree name = ssa_default_def (fun, arg);
73338fd1498Szrj if (name)
73438fd1498Szrj execute_cse_reciprocals_1 (NULL, name);
73538fd1498Szrj }
73638fd1498Szrj
73738fd1498Szrj FOR_EACH_BB_FN (bb, fun)
73838fd1498Szrj {
73938fd1498Szrj tree def;
74038fd1498Szrj
74138fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
74238fd1498Szrj gsi_next (&gsi))
74338fd1498Szrj {
74438fd1498Szrj gphi *phi = gsi.phi ();
74538fd1498Szrj def = PHI_RESULT (phi);
74638fd1498Szrj if (! virtual_operand_p (def)
74738fd1498Szrj && FLOAT_TYPE_P (TREE_TYPE (def)))
74838fd1498Szrj execute_cse_reciprocals_1 (NULL, def);
74938fd1498Szrj }
75038fd1498Szrj
75138fd1498Szrj for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
75238fd1498Szrj gsi_next (&gsi))
75338fd1498Szrj {
75438fd1498Szrj gimple *stmt = gsi_stmt (gsi);
75538fd1498Szrj
75638fd1498Szrj if (gimple_has_lhs (stmt)
75738fd1498Szrj && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
75838fd1498Szrj && FLOAT_TYPE_P (TREE_TYPE (def))
75938fd1498Szrj && TREE_CODE (def) == SSA_NAME)
76038fd1498Szrj execute_cse_reciprocals_1 (&gsi, def);
76138fd1498Szrj }
76238fd1498Szrj
76338fd1498Szrj if (optimize_bb_for_size_p (bb))
76438fd1498Szrj continue;
76538fd1498Szrj
76638fd1498Szrj /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
76738fd1498Szrj for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
76838fd1498Szrj gsi_next (&gsi))
76938fd1498Szrj {
77038fd1498Szrj gimple *stmt = gsi_stmt (gsi);
77138fd1498Szrj
77238fd1498Szrj if (is_gimple_assign (stmt)
77338fd1498Szrj && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
77438fd1498Szrj {
77538fd1498Szrj tree arg1 = gimple_assign_rhs2 (stmt);
77638fd1498Szrj gimple *stmt1;
77738fd1498Szrj
77838fd1498Szrj if (TREE_CODE (arg1) != SSA_NAME)
77938fd1498Szrj continue;
78038fd1498Szrj
78138fd1498Szrj stmt1 = SSA_NAME_DEF_STMT (arg1);
78238fd1498Szrj
78338fd1498Szrj if (is_gimple_call (stmt1)
78438fd1498Szrj && gimple_call_lhs (stmt1))
78538fd1498Szrj {
78638fd1498Szrj bool fail;
78738fd1498Szrj imm_use_iterator ui;
78838fd1498Szrj use_operand_p use_p;
78938fd1498Szrj tree fndecl = NULL_TREE;
79038fd1498Szrj
79138fd1498Szrj gcall *call = as_a <gcall *> (stmt1);
79238fd1498Szrj internal_fn ifn = internal_fn_reciprocal (call);
79338fd1498Szrj if (ifn == IFN_LAST)
79438fd1498Szrj {
79538fd1498Szrj fndecl = gimple_call_fndecl (call);
79638fd1498Szrj if (!fndecl
79738fd1498Szrj || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
79838fd1498Szrj continue;
79938fd1498Szrj fndecl = targetm.builtin_reciprocal (fndecl);
80038fd1498Szrj if (!fndecl)
80138fd1498Szrj continue;
80238fd1498Szrj }
80338fd1498Szrj
80438fd1498Szrj /* Check that all uses of the SSA name are divisions,
80538fd1498Szrj otherwise replacing the defining statement will do
80638fd1498Szrj the wrong thing. */
80738fd1498Szrj fail = false;
80838fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
80938fd1498Szrj {
81038fd1498Szrj gimple *stmt2 = USE_STMT (use_p);
81138fd1498Szrj if (is_gimple_debug (stmt2))
81238fd1498Szrj continue;
81338fd1498Szrj if (!is_gimple_assign (stmt2)
81438fd1498Szrj || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
81538fd1498Szrj || gimple_assign_rhs1 (stmt2) == arg1
81638fd1498Szrj || gimple_assign_rhs2 (stmt2) != arg1)
81738fd1498Szrj {
81838fd1498Szrj fail = true;
81938fd1498Szrj break;
82038fd1498Szrj }
82138fd1498Szrj }
82238fd1498Szrj if (fail)
82338fd1498Szrj continue;
82438fd1498Szrj
82538fd1498Szrj gimple_replace_ssa_lhs (call, arg1);
82638fd1498Szrj if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
82738fd1498Szrj {
82838fd1498Szrj auto_vec<tree, 4> args;
82938fd1498Szrj for (unsigned int i = 0;
83038fd1498Szrj i < gimple_call_num_args (call); i++)
83138fd1498Szrj args.safe_push (gimple_call_arg (call, i));
83238fd1498Szrj gcall *stmt2;
83338fd1498Szrj if (ifn == IFN_LAST)
83438fd1498Szrj stmt2 = gimple_build_call_vec (fndecl, args);
83538fd1498Szrj else
83638fd1498Szrj stmt2 = gimple_build_call_internal_vec (ifn, args);
83738fd1498Szrj gimple_call_set_lhs (stmt2, arg1);
83838fd1498Szrj if (gimple_vdef (call))
83938fd1498Szrj {
84038fd1498Szrj gimple_set_vdef (stmt2, gimple_vdef (call));
84138fd1498Szrj SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
84238fd1498Szrj }
84338fd1498Szrj gimple_call_set_nothrow (stmt2,
84438fd1498Szrj gimple_call_nothrow_p (call));
84538fd1498Szrj gimple_set_vuse (stmt2, gimple_vuse (call));
84638fd1498Szrj gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
84738fd1498Szrj gsi_replace (&gsi2, stmt2, true);
84838fd1498Szrj }
84938fd1498Szrj else
85038fd1498Szrj {
85138fd1498Szrj if (ifn == IFN_LAST)
85238fd1498Szrj gimple_call_set_fndecl (call, fndecl);
85338fd1498Szrj else
85438fd1498Szrj gimple_call_set_internal_fn (call, ifn);
85538fd1498Szrj update_stmt (call);
85638fd1498Szrj }
85738fd1498Szrj reciprocal_stats.rfuncs_inserted++;
85838fd1498Szrj
85938fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
86038fd1498Szrj {
86138fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
86238fd1498Szrj gimple_assign_set_rhs_code (stmt, MULT_EXPR);
86338fd1498Szrj fold_stmt_inplace (&gsi);
86438fd1498Szrj update_stmt (stmt);
86538fd1498Szrj }
86638fd1498Szrj }
86738fd1498Szrj }
86838fd1498Szrj }
86938fd1498Szrj }
87038fd1498Szrj
87138fd1498Szrj statistics_counter_event (fun, "reciprocal divs inserted",
87238fd1498Szrj reciprocal_stats.rdivs_inserted);
87338fd1498Szrj statistics_counter_event (fun, "reciprocal functions inserted",
87438fd1498Szrj reciprocal_stats.rfuncs_inserted);
87538fd1498Szrj
87638fd1498Szrj free_dominance_info (CDI_DOMINATORS);
87738fd1498Szrj free_dominance_info (CDI_POST_DOMINATORS);
87838fd1498Szrj delete occ_pool;
87938fd1498Szrj return 0;
88038fd1498Szrj }
88138fd1498Szrj
88238fd1498Szrj } // anon namespace
88338fd1498Szrj
88438fd1498Szrj gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)88538fd1498Szrj make_pass_cse_reciprocals (gcc::context *ctxt)
88638fd1498Szrj {
88738fd1498Szrj return new pass_cse_reciprocals (ctxt);
88838fd1498Szrj }
88938fd1498Szrj
89038fd1498Szrj /* Records an occurrence at statement USE_STMT in the vector of trees
89138fd1498Szrj STMTS if it is dominated by *TOP_BB or dominates it or this basic block
89238fd1498Szrj is not yet initialized. Returns true if the occurrence was pushed on
89338fd1498Szrj the vector. Adjusts *TOP_BB to be the basic block dominating all
89438fd1498Szrj statements in the vector. */
89538fd1498Szrj
89638fd1498Szrj static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)89738fd1498Szrj maybe_record_sincos (vec<gimple *> *stmts,
89838fd1498Szrj basic_block *top_bb, gimple *use_stmt)
89938fd1498Szrj {
90038fd1498Szrj basic_block use_bb = gimple_bb (use_stmt);
90138fd1498Szrj if (*top_bb
90238fd1498Szrj && (*top_bb == use_bb
90338fd1498Szrj || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
90438fd1498Szrj stmts->safe_push (use_stmt);
90538fd1498Szrj else if (!*top_bb
90638fd1498Szrj || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
90738fd1498Szrj {
90838fd1498Szrj stmts->safe_push (use_stmt);
90938fd1498Szrj *top_bb = use_bb;
91038fd1498Szrj }
91138fd1498Szrj else
91238fd1498Szrj return false;
91338fd1498Szrj
91438fd1498Szrj return true;
91538fd1498Szrj }
91638fd1498Szrj
91738fd1498Szrj /* Look for sin, cos and cexpi calls with the same argument NAME and
91838fd1498Szrj create a single call to cexpi CSEing the result in this case.
91938fd1498Szrj We first walk over all immediate uses of the argument collecting
92038fd1498Szrj statements that we can CSE in a vector and in a second pass replace
92138fd1498Szrj the statement rhs with a REALPART or IMAGPART expression on the
92238fd1498Szrj result of the cexpi call we insert before the use statement that
92338fd1498Szrj dominates all other candidates. */
92438fd1498Szrj
92538fd1498Szrj static bool
execute_cse_sincos_1(tree name)92638fd1498Szrj execute_cse_sincos_1 (tree name)
92738fd1498Szrj {
92838fd1498Szrj gimple_stmt_iterator gsi;
92938fd1498Szrj imm_use_iterator use_iter;
93038fd1498Szrj tree fndecl, res, type;
93138fd1498Szrj gimple *def_stmt, *use_stmt, *stmt;
93238fd1498Szrj int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
93338fd1498Szrj auto_vec<gimple *> stmts;
93438fd1498Szrj basic_block top_bb = NULL;
93538fd1498Szrj int i;
93638fd1498Szrj bool cfg_changed = false;
93738fd1498Szrj
93838fd1498Szrj type = TREE_TYPE (name);
93938fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
94038fd1498Szrj {
94138fd1498Szrj if (gimple_code (use_stmt) != GIMPLE_CALL
94238fd1498Szrj || !gimple_call_lhs (use_stmt))
94338fd1498Szrj continue;
94438fd1498Szrj
94538fd1498Szrj switch (gimple_call_combined_fn (use_stmt))
94638fd1498Szrj {
94738fd1498Szrj CASE_CFN_COS:
94838fd1498Szrj seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
94938fd1498Szrj break;
95038fd1498Szrj
95138fd1498Szrj CASE_CFN_SIN:
95238fd1498Szrj seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
95338fd1498Szrj break;
95438fd1498Szrj
95538fd1498Szrj CASE_CFN_CEXPI:
95638fd1498Szrj seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
95738fd1498Szrj break;
95838fd1498Szrj
95938fd1498Szrj default:;
96038fd1498Szrj }
96138fd1498Szrj }
96238fd1498Szrj
96338fd1498Szrj if (seen_cos + seen_sin + seen_cexpi <= 1)
96438fd1498Szrj return false;
96538fd1498Szrj
96638fd1498Szrj /* Simply insert cexpi at the beginning of top_bb but not earlier than
96738fd1498Szrj the name def statement. */
96838fd1498Szrj fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
96938fd1498Szrj if (!fndecl)
97038fd1498Szrj return false;
97138fd1498Szrj stmt = gimple_build_call (fndecl, 1, name);
97238fd1498Szrj res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
97338fd1498Szrj gimple_call_set_lhs (stmt, res);
97438fd1498Szrj
97538fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (name);
97638fd1498Szrj if (!SSA_NAME_IS_DEFAULT_DEF (name)
97738fd1498Szrj && gimple_code (def_stmt) != GIMPLE_PHI
97838fd1498Szrj && gimple_bb (def_stmt) == top_bb)
97938fd1498Szrj {
98038fd1498Szrj gsi = gsi_for_stmt (def_stmt);
98138fd1498Szrj gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
98238fd1498Szrj }
98338fd1498Szrj else
98438fd1498Szrj {
98538fd1498Szrj gsi = gsi_after_labels (top_bb);
98638fd1498Szrj gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
98738fd1498Szrj }
98838fd1498Szrj sincos_stats.inserted++;
98938fd1498Szrj
99038fd1498Szrj /* And adjust the recorded old call sites. */
99138fd1498Szrj for (i = 0; stmts.iterate (i, &use_stmt); ++i)
99238fd1498Szrj {
99338fd1498Szrj tree rhs = NULL;
99438fd1498Szrj
99538fd1498Szrj switch (gimple_call_combined_fn (use_stmt))
99638fd1498Szrj {
99738fd1498Szrj CASE_CFN_COS:
99838fd1498Szrj rhs = fold_build1 (REALPART_EXPR, type, res);
99938fd1498Szrj break;
100038fd1498Szrj
100138fd1498Szrj CASE_CFN_SIN:
100238fd1498Szrj rhs = fold_build1 (IMAGPART_EXPR, type, res);
100338fd1498Szrj break;
100438fd1498Szrj
100538fd1498Szrj CASE_CFN_CEXPI:
100638fd1498Szrj rhs = res;
100738fd1498Szrj break;
100838fd1498Szrj
100938fd1498Szrj default:;
101038fd1498Szrj gcc_unreachable ();
101138fd1498Szrj }
101238fd1498Szrj
101338fd1498Szrj /* Replace call with a copy. */
101438fd1498Szrj stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
101538fd1498Szrj
101638fd1498Szrj gsi = gsi_for_stmt (use_stmt);
101738fd1498Szrj gsi_replace (&gsi, stmt, true);
101838fd1498Szrj if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
101938fd1498Szrj cfg_changed = true;
102038fd1498Szrj }
102138fd1498Szrj
102238fd1498Szrj return cfg_changed;
102338fd1498Szrj }
102438fd1498Szrj
102538fd1498Szrj /* To evaluate powi(x,n), the floating point value x raised to the
102638fd1498Szrj constant integer exponent n, we use a hybrid algorithm that
102738fd1498Szrj combines the "window method" with look-up tables. For an
102838fd1498Szrj introduction to exponentiation algorithms and "addition chains",
102938fd1498Szrj see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
103038fd1498Szrj "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
103138fd1498Szrj 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
103238fd1498Szrj Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
103338fd1498Szrj
103438fd1498Szrj /* Provide a default value for POWI_MAX_MULTS, the maximum number of
103538fd1498Szrj multiplications to inline before calling the system library's pow
103638fd1498Szrj function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
103738fd1498Szrj so this default never requires calling pow, powf or powl. */
103838fd1498Szrj
103938fd1498Szrj #ifndef POWI_MAX_MULTS
104038fd1498Szrj #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
104138fd1498Szrj #endif
104238fd1498Szrj
104338fd1498Szrj /* The size of the "optimal power tree" lookup table. All
104438fd1498Szrj exponents less than this value are simply looked up in the
104538fd1498Szrj powi_table below. This threshold is also used to size the
104638fd1498Szrj cache of pseudo registers that hold intermediate results. */
104738fd1498Szrj #define POWI_TABLE_SIZE 256
104838fd1498Szrj
104938fd1498Szrj /* The size, in bits of the window, used in the "window method"
105038fd1498Szrj exponentiation algorithm. This is equivalent to a radix of
105138fd1498Szrj (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
105238fd1498Szrj #define POWI_WINDOW_SIZE 3
105338fd1498Szrj
105438fd1498Szrj /* The following table is an efficient representation of an
105538fd1498Szrj "optimal power tree". For each value, i, the corresponding
105638fd1498Szrj value, j, in the table states than an optimal evaluation
105738fd1498Szrj sequence for calculating pow(x,i) can be found by evaluating
105838fd1498Szrj pow(x,j)*pow(x,i-j). An optimal power tree for the first
105938fd1498Szrj 100 integers is given in Knuth's "Seminumerical algorithms". */
106038fd1498Szrj
106138fd1498Szrj static const unsigned char powi_table[POWI_TABLE_SIZE] =
106238fd1498Szrj {
106338fd1498Szrj 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
106438fd1498Szrj 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
106538fd1498Szrj 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
106638fd1498Szrj 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
106738fd1498Szrj 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
106838fd1498Szrj 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
106938fd1498Szrj 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
107038fd1498Szrj 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
107138fd1498Szrj 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
107238fd1498Szrj 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
107338fd1498Szrj 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
107438fd1498Szrj 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
107538fd1498Szrj 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
107638fd1498Szrj 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
107738fd1498Szrj 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
107838fd1498Szrj 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
107938fd1498Szrj 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
108038fd1498Szrj 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
108138fd1498Szrj 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
108238fd1498Szrj 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
108338fd1498Szrj 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
108438fd1498Szrj 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
108538fd1498Szrj 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
108638fd1498Szrj 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
108738fd1498Szrj 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
108838fd1498Szrj 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
108938fd1498Szrj 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
109038fd1498Szrj 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
109138fd1498Szrj 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
109238fd1498Szrj 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
109338fd1498Szrj 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
109438fd1498Szrj 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
109538fd1498Szrj };
109638fd1498Szrj
109738fd1498Szrj
109838fd1498Szrj /* Return the number of multiplications required to calculate
109938fd1498Szrj powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
110038fd1498Szrj subroutine of powi_cost. CACHE is an array indicating
110138fd1498Szrj which exponents have already been calculated. */
110238fd1498Szrj
110338fd1498Szrj static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)110438fd1498Szrj powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
110538fd1498Szrj {
110638fd1498Szrj /* If we've already calculated this exponent, then this evaluation
110738fd1498Szrj doesn't require any additional multiplications. */
110838fd1498Szrj if (cache[n])
110938fd1498Szrj return 0;
111038fd1498Szrj
111138fd1498Szrj cache[n] = true;
111238fd1498Szrj return powi_lookup_cost (n - powi_table[n], cache)
111338fd1498Szrj + powi_lookup_cost (powi_table[n], cache) + 1;
111438fd1498Szrj }
111538fd1498Szrj
111638fd1498Szrj /* Return the number of multiplications required to calculate
111738fd1498Szrj powi(x,n) for an arbitrary x, given the exponent N. This
111838fd1498Szrj function needs to be kept in sync with powi_as_mults below. */
111938fd1498Szrj
112038fd1498Szrj static int
powi_cost(HOST_WIDE_INT n)112138fd1498Szrj powi_cost (HOST_WIDE_INT n)
112238fd1498Szrj {
112338fd1498Szrj bool cache[POWI_TABLE_SIZE];
112438fd1498Szrj unsigned HOST_WIDE_INT digit;
112538fd1498Szrj unsigned HOST_WIDE_INT val;
112638fd1498Szrj int result;
112738fd1498Szrj
112838fd1498Szrj if (n == 0)
112938fd1498Szrj return 0;
113038fd1498Szrj
113138fd1498Szrj /* Ignore the reciprocal when calculating the cost. */
113238fd1498Szrj val = (n < 0) ? -n : n;
113338fd1498Szrj
113438fd1498Szrj /* Initialize the exponent cache. */
113538fd1498Szrj memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
113638fd1498Szrj cache[1] = true;
113738fd1498Szrj
113838fd1498Szrj result = 0;
113938fd1498Szrj
114038fd1498Szrj while (val >= POWI_TABLE_SIZE)
114138fd1498Szrj {
114238fd1498Szrj if (val & 1)
114338fd1498Szrj {
114438fd1498Szrj digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
114538fd1498Szrj result += powi_lookup_cost (digit, cache)
114638fd1498Szrj + POWI_WINDOW_SIZE + 1;
114738fd1498Szrj val >>= POWI_WINDOW_SIZE;
114838fd1498Szrj }
114938fd1498Szrj else
115038fd1498Szrj {
115138fd1498Szrj val >>= 1;
115238fd1498Szrj result++;
115338fd1498Szrj }
115438fd1498Szrj }
115538fd1498Szrj
115638fd1498Szrj return result + powi_lookup_cost (val, cache);
115738fd1498Szrj }
115838fd1498Szrj
115938fd1498Szrj /* Recursive subroutine of powi_as_mults. This function takes the
116038fd1498Szrj array, CACHE, of already calculated exponents and an exponent N and
116138fd1498Szrj returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
116238fd1498Szrj
116338fd1498Szrj static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache)116438fd1498Szrj powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
116538fd1498Szrj HOST_WIDE_INT n, tree *cache)
116638fd1498Szrj {
116738fd1498Szrj tree op0, op1, ssa_target;
116838fd1498Szrj unsigned HOST_WIDE_INT digit;
116938fd1498Szrj gassign *mult_stmt;
117038fd1498Szrj
117138fd1498Szrj if (n < POWI_TABLE_SIZE && cache[n])
117238fd1498Szrj return cache[n];
117338fd1498Szrj
117438fd1498Szrj ssa_target = make_temp_ssa_name (type, NULL, "powmult");
117538fd1498Szrj
117638fd1498Szrj if (n < POWI_TABLE_SIZE)
117738fd1498Szrj {
117838fd1498Szrj cache[n] = ssa_target;
117938fd1498Szrj op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
118038fd1498Szrj op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
118138fd1498Szrj }
118238fd1498Szrj else if (n & 1)
118338fd1498Szrj {
118438fd1498Szrj digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
118538fd1498Szrj op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
118638fd1498Szrj op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
118738fd1498Szrj }
118838fd1498Szrj else
118938fd1498Szrj {
119038fd1498Szrj op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
119138fd1498Szrj op1 = op0;
119238fd1498Szrj }
119338fd1498Szrj
119438fd1498Szrj mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
119538fd1498Szrj gimple_set_location (mult_stmt, loc);
119638fd1498Szrj gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
119738fd1498Szrj
119838fd1498Szrj return ssa_target;
119938fd1498Szrj }
120038fd1498Szrj
120138fd1498Szrj /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
120238fd1498Szrj This function needs to be kept in sync with powi_cost above. */
120338fd1498Szrj
120438fd1498Szrj static tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)120538fd1498Szrj powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
120638fd1498Szrj tree arg0, HOST_WIDE_INT n)
120738fd1498Szrj {
120838fd1498Szrj tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
120938fd1498Szrj gassign *div_stmt;
121038fd1498Szrj tree target;
121138fd1498Szrj
121238fd1498Szrj if (n == 0)
121338fd1498Szrj return build_real (type, dconst1);
121438fd1498Szrj
121538fd1498Szrj memset (cache, 0, sizeof (cache));
121638fd1498Szrj cache[1] = arg0;
121738fd1498Szrj
121838fd1498Szrj result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
121938fd1498Szrj if (n >= 0)
122038fd1498Szrj return result;
122138fd1498Szrj
122238fd1498Szrj /* If the original exponent was negative, reciprocate the result. */
122338fd1498Szrj target = make_temp_ssa_name (type, NULL, "powmult");
122438fd1498Szrj div_stmt = gimple_build_assign (target, RDIV_EXPR,
122538fd1498Szrj build_real (type, dconst1), result);
122638fd1498Szrj gimple_set_location (div_stmt, loc);
122738fd1498Szrj gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
122838fd1498Szrj
122938fd1498Szrj return target;
123038fd1498Szrj }
123138fd1498Szrj
123238fd1498Szrj /* ARG0 and N are the two arguments to a powi builtin in GSI with
123338fd1498Szrj location info LOC. If the arguments are appropriate, create an
123438fd1498Szrj equivalent sequence of statements prior to GSI using an optimal
123538fd1498Szrj number of multiplications, and return an expession holding the
123638fd1498Szrj result. */
123738fd1498Szrj
123838fd1498Szrj static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)123938fd1498Szrj gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
124038fd1498Szrj tree arg0, HOST_WIDE_INT n)
124138fd1498Szrj {
124238fd1498Szrj /* Avoid largest negative number. */
124338fd1498Szrj if (n != -n
124438fd1498Szrj && ((n >= -1 && n <= 2)
124538fd1498Szrj || (optimize_function_for_speed_p (cfun)
124638fd1498Szrj && powi_cost (n) <= POWI_MAX_MULTS)))
124738fd1498Szrj return powi_as_mults (gsi, loc, arg0, n);
124838fd1498Szrj
124938fd1498Szrj return NULL_TREE;
125038fd1498Szrj }
125138fd1498Szrj
125238fd1498Szrj /* Build a gimple call statement that calls FN with argument ARG.
125338fd1498Szrj Set the lhs of the call statement to a fresh SSA name. Insert the
125438fd1498Szrj statement prior to GSI's current position, and return the fresh
125538fd1498Szrj SSA name. */
125638fd1498Szrj
125738fd1498Szrj static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)125838fd1498Szrj build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
125938fd1498Szrj tree fn, tree arg)
126038fd1498Szrj {
126138fd1498Szrj gcall *call_stmt;
126238fd1498Szrj tree ssa_target;
126338fd1498Szrj
126438fd1498Szrj call_stmt = gimple_build_call (fn, 1, arg);
126538fd1498Szrj ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
126638fd1498Szrj gimple_set_lhs (call_stmt, ssa_target);
126738fd1498Szrj gimple_set_location (call_stmt, loc);
126838fd1498Szrj gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
126938fd1498Szrj
127038fd1498Szrj return ssa_target;
127138fd1498Szrj }
127238fd1498Szrj
127338fd1498Szrj /* Build a gimple binary operation with the given CODE and arguments
127438fd1498Szrj ARG0, ARG1, assigning the result to a new SSA name for variable
127538fd1498Szrj TARGET. Insert the statement prior to GSI's current position, and
127638fd1498Szrj return the fresh SSA name.*/
127738fd1498Szrj
127838fd1498Szrj static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)127938fd1498Szrj build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
128038fd1498Szrj const char *name, enum tree_code code,
128138fd1498Szrj tree arg0, tree arg1)
128238fd1498Szrj {
128338fd1498Szrj tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
128438fd1498Szrj gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
128538fd1498Szrj gimple_set_location (stmt, loc);
128638fd1498Szrj gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
128738fd1498Szrj return result;
128838fd1498Szrj }
128938fd1498Szrj
129038fd1498Szrj /* Build a gimple reference operation with the given CODE and argument
129138fd1498Szrj ARG, assigning the result to a new SSA name of TYPE with NAME.
129238fd1498Szrj Insert the statement prior to GSI's current position, and return
129338fd1498Szrj the fresh SSA name. */
129438fd1498Szrj
129538fd1498Szrj static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)129638fd1498Szrj build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
129738fd1498Szrj const char *name, enum tree_code code, tree arg0)
129838fd1498Szrj {
129938fd1498Szrj tree result = make_temp_ssa_name (type, NULL, name);
130038fd1498Szrj gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
130138fd1498Szrj gimple_set_location (stmt, loc);
130238fd1498Szrj gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
130338fd1498Szrj return result;
130438fd1498Szrj }
130538fd1498Szrj
130638fd1498Szrj /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
130738fd1498Szrj prior to GSI's current position, and return the fresh SSA name. */
130838fd1498Szrj
130938fd1498Szrj static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)131038fd1498Szrj build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
131138fd1498Szrj tree type, tree val)
131238fd1498Szrj {
131338fd1498Szrj tree result = make_ssa_name (type);
131438fd1498Szrj gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
131538fd1498Szrj gimple_set_location (stmt, loc);
131638fd1498Szrj gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
131738fd1498Szrj return result;
131838fd1498Szrj }
131938fd1498Szrj
132038fd1498Szrj struct pow_synth_sqrt_info
132138fd1498Szrj {
132238fd1498Szrj bool *factors;
132338fd1498Szrj unsigned int deepest;
132438fd1498Szrj unsigned int num_mults;
132538fd1498Szrj };
132638fd1498Szrj
132738fd1498Szrj /* Return true iff the real value C can be represented as a
132838fd1498Szrj sum of powers of 0.5 up to N. That is:
132938fd1498Szrj C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
133038fd1498Szrj Record in INFO the various parameters of the synthesis algorithm such
133138fd1498Szrj as the factors a[i], the maximum 0.5 power and the number of
133238fd1498Szrj multiplications that will be required. */
133338fd1498Szrj
133438fd1498Szrj bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)133538fd1498Szrj representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
133638fd1498Szrj struct pow_synth_sqrt_info *info)
133738fd1498Szrj {
133838fd1498Szrj REAL_VALUE_TYPE factor = dconsthalf;
133938fd1498Szrj REAL_VALUE_TYPE remainder = c;
134038fd1498Szrj
134138fd1498Szrj info->deepest = 0;
134238fd1498Szrj info->num_mults = 0;
134338fd1498Szrj memset (info->factors, 0, n * sizeof (bool));
134438fd1498Szrj
134538fd1498Szrj for (unsigned i = 0; i < n; i++)
134638fd1498Szrj {
134738fd1498Szrj REAL_VALUE_TYPE res;
134838fd1498Szrj
134938fd1498Szrj /* If something inexact happened bail out now. */
135038fd1498Szrj if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
135138fd1498Szrj return false;
135238fd1498Szrj
135338fd1498Szrj /* We have hit zero. The number is representable as a sum
135438fd1498Szrj of powers of 0.5. */
135538fd1498Szrj if (real_equal (&res, &dconst0))
135638fd1498Szrj {
135738fd1498Szrj info->factors[i] = true;
135838fd1498Szrj info->deepest = i + 1;
135938fd1498Szrj return true;
136038fd1498Szrj }
136138fd1498Szrj else if (!REAL_VALUE_NEGATIVE (res))
136238fd1498Szrj {
136338fd1498Szrj remainder = res;
136438fd1498Szrj info->factors[i] = true;
136538fd1498Szrj info->num_mults++;
136638fd1498Szrj }
136738fd1498Szrj else
136838fd1498Szrj info->factors[i] = false;
136938fd1498Szrj
137038fd1498Szrj real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
137138fd1498Szrj }
137238fd1498Szrj return false;
137338fd1498Szrj }
137438fd1498Szrj
137538fd1498Szrj /* Return the tree corresponding to FN being applied
137638fd1498Szrj to ARG N times at GSI and LOC.
137738fd1498Szrj Look up previous results from CACHE if need be.
137838fd1498Szrj cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
137938fd1498Szrj
138038fd1498Szrj static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)138138fd1498Szrj get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
138238fd1498Szrj tree fn, location_t loc, tree *cache)
138338fd1498Szrj {
138438fd1498Szrj tree res = cache[n];
138538fd1498Szrj if (!res)
138638fd1498Szrj {
138738fd1498Szrj tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
138838fd1498Szrj res = build_and_insert_call (gsi, loc, fn, prev);
138938fd1498Szrj cache[n] = res;
139038fd1498Szrj }
139138fd1498Szrj
139238fd1498Szrj return res;
139338fd1498Szrj }
139438fd1498Szrj
139538fd1498Szrj /* Print to STREAM the repeated application of function FNAME to ARG
139638fd1498Szrj N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
139738fd1498Szrj "foo (foo (x))". */
139838fd1498Szrj
139938fd1498Szrj static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)140038fd1498Szrj print_nested_fn (FILE* stream, const char *fname, const char* arg,
140138fd1498Szrj unsigned int n)
140238fd1498Szrj {
140338fd1498Szrj if (n == 0)
140438fd1498Szrj fprintf (stream, "%s", arg);
140538fd1498Szrj else
140638fd1498Szrj {
140738fd1498Szrj fprintf (stream, "%s (", fname);
140838fd1498Szrj print_nested_fn (stream, fname, arg, n - 1);
140938fd1498Szrj fprintf (stream, ")");
141038fd1498Szrj }
141138fd1498Szrj }
141238fd1498Szrj
141338fd1498Szrj /* Print to STREAM the fractional sequence of sqrt chains
141438fd1498Szrj applied to ARG, described by INFO. Used for the dump file. */
141538fd1498Szrj
141638fd1498Szrj static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)141738fd1498Szrj dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
141838fd1498Szrj struct pow_synth_sqrt_info *info)
141938fd1498Szrj {
142038fd1498Szrj for (unsigned int i = 0; i < info->deepest; i++)
142138fd1498Szrj {
142238fd1498Szrj bool is_set = info->factors[i];
142338fd1498Szrj if (is_set)
142438fd1498Szrj {
142538fd1498Szrj print_nested_fn (stream, "sqrt", arg, i + 1);
142638fd1498Szrj if (i != info->deepest - 1)
142738fd1498Szrj fprintf (stream, " * ");
142838fd1498Szrj }
142938fd1498Szrj }
143038fd1498Szrj }
143138fd1498Szrj
143238fd1498Szrj /* Print to STREAM a representation of raising ARG to an integer
143338fd1498Szrj power N. Used for the dump file. */
143438fd1498Szrj
143538fd1498Szrj static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)143638fd1498Szrj dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
143738fd1498Szrj {
143838fd1498Szrj if (n > 1)
143938fd1498Szrj fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
144038fd1498Szrj else if (n == 1)
144138fd1498Szrj fprintf (stream, "%s", arg);
144238fd1498Szrj }
144338fd1498Szrj
144438fd1498Szrj /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
144538fd1498Szrj square roots. Place at GSI and LOC. Limit the maximum depth
144638fd1498Szrj of the sqrt chains to MAX_DEPTH. Return the tree holding the
144738fd1498Szrj result of the expanded sequence or NULL_TREE if the expansion failed.
144838fd1498Szrj
144938fd1498Szrj This routine assumes that ARG1 is a real number with a fractional part
145038fd1498Szrj (the integer exponent case will have been handled earlier in
145138fd1498Szrj gimple_expand_builtin_pow).
145238fd1498Szrj
145338fd1498Szrj For ARG1 > 0.0:
145438fd1498Szrj * For ARG1 composed of a whole part WHOLE_PART and a fractional part
145538fd1498Szrj FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
145638fd1498Szrj FRAC_PART == ARG1 - WHOLE_PART:
145738fd1498Szrj Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
145838fd1498Szrj POW (ARG0, FRAC_PART) is expanded as a product of square root chains
145938fd1498Szrj if it can be expressed as such, that is if FRAC_PART satisfies:
146038fd1498Szrj FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
146138fd1498Szrj where integer a[i] is either 0 or 1.
146238fd1498Szrj
146338fd1498Szrj Example:
146438fd1498Szrj POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
146538fd1498Szrj --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
146638fd1498Szrj
146738fd1498Szrj For ARG1 < 0.0 there are two approaches:
146838fd1498Szrj * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
146938fd1498Szrj is calculated as above.
147038fd1498Szrj
147138fd1498Szrj Example:
147238fd1498Szrj POW (x, -5.625) == 1.0 / POW (x, 5.625)
147338fd1498Szrj --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
147438fd1498Szrj
147538fd1498Szrj * (B) : WHOLE_PART := - ceil (abs (ARG1))
147638fd1498Szrj FRAC_PART := ARG1 - WHOLE_PART
147738fd1498Szrj and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
147838fd1498Szrj Example:
147938fd1498Szrj POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
148038fd1498Szrj --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
148138fd1498Szrj
148238fd1498Szrj For ARG1 < 0.0 we choose between (A) and (B) depending on
148338fd1498Szrj how many multiplications we'd have to do.
148438fd1498Szrj So, for the example in (B): POW (x, -5.875), if we were to
148538fd1498Szrj follow algorithm (A) we would produce:
148638fd1498Szrj 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
148738fd1498Szrj which contains more multiplications than approach (B).
148838fd1498Szrj
148938fd1498Szrj Hopefully, this approach will eliminate potentially expensive POW library
149038fd1498Szrj calls when unsafe floating point math is enabled and allow the compiler to
149138fd1498Szrj further optimise the multiplies, square roots and divides produced by this
149238fd1498Szrj function. */
149338fd1498Szrj
149438fd1498Szrj static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)149538fd1498Szrj expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
149638fd1498Szrj tree arg0, tree arg1, HOST_WIDE_INT max_depth)
149738fd1498Szrj {
149838fd1498Szrj tree type = TREE_TYPE (arg0);
149938fd1498Szrj machine_mode mode = TYPE_MODE (type);
150038fd1498Szrj tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
150138fd1498Szrj bool one_over = true;
150238fd1498Szrj
150338fd1498Szrj if (!sqrtfn)
150438fd1498Szrj return NULL_TREE;
150538fd1498Szrj
150638fd1498Szrj if (TREE_CODE (arg1) != REAL_CST)
150738fd1498Szrj return NULL_TREE;
150838fd1498Szrj
150938fd1498Szrj REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
151038fd1498Szrj
151138fd1498Szrj gcc_assert (max_depth > 0);
151238fd1498Szrj tree *cache = XALLOCAVEC (tree, max_depth + 1);
151338fd1498Szrj
151438fd1498Szrj struct pow_synth_sqrt_info synth_info;
151538fd1498Szrj synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
151638fd1498Szrj synth_info.deepest = 0;
151738fd1498Szrj synth_info.num_mults = 0;
151838fd1498Szrj
151938fd1498Szrj bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
152038fd1498Szrj REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
152138fd1498Szrj
152238fd1498Szrj /* The whole and fractional parts of exp. */
152338fd1498Szrj REAL_VALUE_TYPE whole_part;
152438fd1498Szrj REAL_VALUE_TYPE frac_part;
152538fd1498Szrj
152638fd1498Szrj real_floor (&whole_part, mode, &exp);
152738fd1498Szrj real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
152838fd1498Szrj
152938fd1498Szrj
153038fd1498Szrj REAL_VALUE_TYPE ceil_whole = dconst0;
153138fd1498Szrj REAL_VALUE_TYPE ceil_fract = dconst0;
153238fd1498Szrj
153338fd1498Szrj if (neg_exp)
153438fd1498Szrj {
153538fd1498Szrj real_ceil (&ceil_whole, mode, &exp);
153638fd1498Szrj real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
153738fd1498Szrj }
153838fd1498Szrj
153938fd1498Szrj if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
154038fd1498Szrj return NULL_TREE;
154138fd1498Szrj
154238fd1498Szrj /* Check whether it's more profitable to not use 1.0 / ... */
154338fd1498Szrj if (neg_exp)
154438fd1498Szrj {
154538fd1498Szrj struct pow_synth_sqrt_info alt_synth_info;
154638fd1498Szrj alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
154738fd1498Szrj alt_synth_info.deepest = 0;
154838fd1498Szrj alt_synth_info.num_mults = 0;
154938fd1498Szrj
155038fd1498Szrj if (representable_as_half_series_p (ceil_fract, max_depth,
155138fd1498Szrj &alt_synth_info)
155238fd1498Szrj && alt_synth_info.deepest <= synth_info.deepest
155338fd1498Szrj && alt_synth_info.num_mults < synth_info.num_mults)
155438fd1498Szrj {
155538fd1498Szrj whole_part = ceil_whole;
155638fd1498Szrj frac_part = ceil_fract;
155738fd1498Szrj synth_info.deepest = alt_synth_info.deepest;
155838fd1498Szrj synth_info.num_mults = alt_synth_info.num_mults;
155938fd1498Szrj memcpy (synth_info.factors, alt_synth_info.factors,
156038fd1498Szrj (max_depth + 1) * sizeof (bool));
156138fd1498Szrj one_over = false;
156238fd1498Szrj }
156338fd1498Szrj }
156438fd1498Szrj
156538fd1498Szrj HOST_WIDE_INT n = real_to_integer (&whole_part);
156638fd1498Szrj REAL_VALUE_TYPE cint;
156738fd1498Szrj real_from_integer (&cint, VOIDmode, n, SIGNED);
156838fd1498Szrj
156938fd1498Szrj if (!real_identical (&whole_part, &cint))
157038fd1498Szrj return NULL_TREE;
157138fd1498Szrj
157238fd1498Szrj if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
157338fd1498Szrj return NULL_TREE;
157438fd1498Szrj
157538fd1498Szrj memset (cache, 0, (max_depth + 1) * sizeof (tree));
157638fd1498Szrj
157738fd1498Szrj tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
157838fd1498Szrj
157938fd1498Szrj /* Calculate the integer part of the exponent. */
158038fd1498Szrj if (n > 1)
158138fd1498Szrj {
158238fd1498Szrj integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
158338fd1498Szrj if (!integer_res)
158438fd1498Szrj return NULL_TREE;
158538fd1498Szrj }
158638fd1498Szrj
158738fd1498Szrj if (dump_file)
158838fd1498Szrj {
158938fd1498Szrj char string[64];
159038fd1498Szrj
159138fd1498Szrj real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
159238fd1498Szrj fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
159338fd1498Szrj
159438fd1498Szrj if (neg_exp)
159538fd1498Szrj {
159638fd1498Szrj if (one_over)
159738fd1498Szrj {
159838fd1498Szrj fprintf (dump_file, "1.0 / (");
159938fd1498Szrj dump_integer_part (dump_file, "x", n);
160038fd1498Szrj if (n > 0)
160138fd1498Szrj fprintf (dump_file, " * ");
160238fd1498Szrj dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
160338fd1498Szrj fprintf (dump_file, ")");
160438fd1498Szrj }
160538fd1498Szrj else
160638fd1498Szrj {
160738fd1498Szrj dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
160838fd1498Szrj fprintf (dump_file, " / (");
160938fd1498Szrj dump_integer_part (dump_file, "x", n);
161038fd1498Szrj fprintf (dump_file, ")");
161138fd1498Szrj }
161238fd1498Szrj }
161338fd1498Szrj else
161438fd1498Szrj {
161538fd1498Szrj dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
161638fd1498Szrj if (n > 0)
161738fd1498Szrj fprintf (dump_file, " * ");
161838fd1498Szrj dump_integer_part (dump_file, "x", n);
161938fd1498Szrj }
162038fd1498Szrj
162138fd1498Szrj fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
162238fd1498Szrj }
162338fd1498Szrj
162438fd1498Szrj
162538fd1498Szrj tree fract_res = NULL_TREE;
162638fd1498Szrj cache[0] = arg0;
162738fd1498Szrj
162838fd1498Szrj /* Calculate the fractional part of the exponent. */
162938fd1498Szrj for (unsigned i = 0; i < synth_info.deepest; i++)
163038fd1498Szrj {
163138fd1498Szrj if (synth_info.factors[i])
163238fd1498Szrj {
163338fd1498Szrj tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
163438fd1498Szrj
163538fd1498Szrj if (!fract_res)
163638fd1498Szrj fract_res = sqrt_chain;
163738fd1498Szrj
163838fd1498Szrj else
163938fd1498Szrj fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
164038fd1498Szrj fract_res, sqrt_chain);
164138fd1498Szrj }
164238fd1498Szrj }
164338fd1498Szrj
164438fd1498Szrj tree res = NULL_TREE;
164538fd1498Szrj
164638fd1498Szrj if (neg_exp)
164738fd1498Szrj {
164838fd1498Szrj if (one_over)
164938fd1498Szrj {
165038fd1498Szrj if (n > 0)
165138fd1498Szrj res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
165238fd1498Szrj fract_res, integer_res);
165338fd1498Szrj else
165438fd1498Szrj res = fract_res;
165538fd1498Szrj
165638fd1498Szrj res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
165738fd1498Szrj build_real (type, dconst1), res);
165838fd1498Szrj }
165938fd1498Szrj else
166038fd1498Szrj {
166138fd1498Szrj res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
166238fd1498Szrj fract_res, integer_res);
166338fd1498Szrj }
166438fd1498Szrj }
166538fd1498Szrj else
166638fd1498Szrj res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
166738fd1498Szrj fract_res, integer_res);
166838fd1498Szrj return res;
166938fd1498Szrj }
167038fd1498Szrj
167138fd1498Szrj /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
167238fd1498Szrj with location info LOC. If possible, create an equivalent and
167338fd1498Szrj less expensive sequence of statements prior to GSI, and return an
167438fd1498Szrj expession holding the result. */
167538fd1498Szrj
167638fd1498Szrj static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)167738fd1498Szrj gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
167838fd1498Szrj tree arg0, tree arg1)
167938fd1498Szrj {
168038fd1498Szrj REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
168138fd1498Szrj REAL_VALUE_TYPE c2, dconst3;
168238fd1498Szrj HOST_WIDE_INT n;
168338fd1498Szrj tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
168438fd1498Szrj machine_mode mode;
168538fd1498Szrj bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
168638fd1498Szrj bool hw_sqrt_exists, c_is_int, c2_is_int;
168738fd1498Szrj
168838fd1498Szrj dconst1_4 = dconst1;
168938fd1498Szrj SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
169038fd1498Szrj
169138fd1498Szrj /* If the exponent isn't a constant, there's nothing of interest
169238fd1498Szrj to be done. */
169338fd1498Szrj if (TREE_CODE (arg1) != REAL_CST)
169438fd1498Szrj return NULL_TREE;
169538fd1498Szrj
169638fd1498Szrj /* Don't perform the operation if flag_signaling_nans is on
169738fd1498Szrj and the operand is a signaling NaN. */
169838fd1498Szrj if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
169938fd1498Szrj && ((TREE_CODE (arg0) == REAL_CST
170038fd1498Szrj && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
170138fd1498Szrj || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
170238fd1498Szrj return NULL_TREE;
170338fd1498Szrj
170438fd1498Szrj /* If the exponent is equivalent to an integer, expand to an optimal
170538fd1498Szrj multiplication sequence when profitable. */
170638fd1498Szrj c = TREE_REAL_CST (arg1);
170738fd1498Szrj n = real_to_integer (&c);
170838fd1498Szrj real_from_integer (&cint, VOIDmode, n, SIGNED);
170938fd1498Szrj c_is_int = real_identical (&c, &cint);
171038fd1498Szrj
171138fd1498Szrj if (c_is_int
171238fd1498Szrj && ((n >= -1 && n <= 2)
171338fd1498Szrj || (flag_unsafe_math_optimizations
171438fd1498Szrj && speed_p
171538fd1498Szrj && powi_cost (n) <= POWI_MAX_MULTS)))
171638fd1498Szrj return gimple_expand_builtin_powi (gsi, loc, arg0, n);
171738fd1498Szrj
171838fd1498Szrj /* Attempt various optimizations using sqrt and cbrt. */
171938fd1498Szrj type = TREE_TYPE (arg0);
172038fd1498Szrj mode = TYPE_MODE (type);
172138fd1498Szrj sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
172238fd1498Szrj
172338fd1498Szrj /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
172438fd1498Szrj unless signed zeros must be maintained. pow(-0,0.5) = +0, while
172538fd1498Szrj sqrt(-0) = -0. */
172638fd1498Szrj if (sqrtfn
172738fd1498Szrj && real_equal (&c, &dconsthalf)
172838fd1498Szrj && !HONOR_SIGNED_ZEROS (mode))
172938fd1498Szrj return build_and_insert_call (gsi, loc, sqrtfn, arg0);
173038fd1498Szrj
173138fd1498Szrj hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
173238fd1498Szrj
173338fd1498Szrj /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
173438fd1498Szrj optimizations since 1./3. is not exactly representable. If x
173538fd1498Szrj is negative and finite, the correct value of pow(x,1./3.) is
173638fd1498Szrj a NaN with the "invalid" exception raised, because the value
173738fd1498Szrj of 1./3. actually has an even denominator. The correct value
173838fd1498Szrj of cbrt(x) is a negative real value. */
173938fd1498Szrj cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
174038fd1498Szrj dconst1_3 = real_value_truncate (mode, dconst_third ());
174138fd1498Szrj
174238fd1498Szrj if (flag_unsafe_math_optimizations
174338fd1498Szrj && cbrtfn
174438fd1498Szrj && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
174538fd1498Szrj && real_equal (&c, &dconst1_3))
174638fd1498Szrj return build_and_insert_call (gsi, loc, cbrtfn, arg0);
174738fd1498Szrj
174838fd1498Szrj /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
174938fd1498Szrj if we don't have a hardware sqrt insn. */
175038fd1498Szrj dconst1_6 = dconst1_3;
175138fd1498Szrj SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
175238fd1498Szrj
175338fd1498Szrj if (flag_unsafe_math_optimizations
175438fd1498Szrj && sqrtfn
175538fd1498Szrj && cbrtfn
175638fd1498Szrj && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
175738fd1498Szrj && speed_p
175838fd1498Szrj && hw_sqrt_exists
175938fd1498Szrj && real_equal (&c, &dconst1_6))
176038fd1498Szrj {
176138fd1498Szrj /* sqrt(x) */
176238fd1498Szrj sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
176338fd1498Szrj
176438fd1498Szrj /* cbrt(sqrt(x)) */
176538fd1498Szrj return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
176638fd1498Szrj }
176738fd1498Szrj
176838fd1498Szrj
176938fd1498Szrj /* Attempt to expand the POW as a product of square root chains.
177038fd1498Szrj Expand the 0.25 case even when otpimising for size. */
177138fd1498Szrj if (flag_unsafe_math_optimizations
177238fd1498Szrj && sqrtfn
177338fd1498Szrj && hw_sqrt_exists
177438fd1498Szrj && (speed_p || real_equal (&c, &dconst1_4))
177538fd1498Szrj && !HONOR_SIGNED_ZEROS (mode))
177638fd1498Szrj {
177738fd1498Szrj unsigned int max_depth = speed_p
177838fd1498Szrj ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
177938fd1498Szrj : 2;
178038fd1498Szrj
178138fd1498Szrj tree expand_with_sqrts
178238fd1498Szrj = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
178338fd1498Szrj
178438fd1498Szrj if (expand_with_sqrts)
178538fd1498Szrj return expand_with_sqrts;
178638fd1498Szrj }
178738fd1498Szrj
178838fd1498Szrj real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
178938fd1498Szrj n = real_to_integer (&c2);
179038fd1498Szrj real_from_integer (&cint, VOIDmode, n, SIGNED);
179138fd1498Szrj c2_is_int = real_identical (&c2, &cint);
179238fd1498Szrj
179338fd1498Szrj /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
179438fd1498Szrj
179538fd1498Szrj powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
179638fd1498Szrj 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
179738fd1498Szrj
179838fd1498Szrj Do not calculate the first factor when n/3 = 0. As cbrt(x) is
179938fd1498Szrj different from pow(x, 1./3.) due to rounding and behavior with
180038fd1498Szrj negative x, we need to constrain this transformation to unsafe
180138fd1498Szrj math and positive x or finite math. */
180238fd1498Szrj real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
180338fd1498Szrj real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
180438fd1498Szrj real_round (&c2, mode, &c2);
180538fd1498Szrj n = real_to_integer (&c2);
180638fd1498Szrj real_from_integer (&cint, VOIDmode, n, SIGNED);
180738fd1498Szrj real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
180838fd1498Szrj real_convert (&c2, mode, &c2);
180938fd1498Szrj
181038fd1498Szrj if (flag_unsafe_math_optimizations
181138fd1498Szrj && cbrtfn
181238fd1498Szrj && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
181338fd1498Szrj && real_identical (&c2, &c)
181438fd1498Szrj && !c2_is_int
181538fd1498Szrj && optimize_function_for_speed_p (cfun)
181638fd1498Szrj && powi_cost (n / 3) <= POWI_MAX_MULTS)
181738fd1498Szrj {
181838fd1498Szrj tree powi_x_ndiv3 = NULL_TREE;
181938fd1498Szrj
182038fd1498Szrj /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
182138fd1498Szrj possible or profitable, give up. Skip the degenerate case when
182238fd1498Szrj abs(n) < 3, where the result is always 1. */
182338fd1498Szrj if (absu_hwi (n) >= 3)
182438fd1498Szrj {
182538fd1498Szrj powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
182638fd1498Szrj abs_hwi (n / 3));
182738fd1498Szrj if (!powi_x_ndiv3)
182838fd1498Szrj return NULL_TREE;
182938fd1498Szrj }
183038fd1498Szrj
183138fd1498Szrj /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
183238fd1498Szrj as that creates an unnecessary variable. Instead, just produce
183338fd1498Szrj either cbrt(x) or cbrt(x) * cbrt(x). */
183438fd1498Szrj cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
183538fd1498Szrj
183638fd1498Szrj if (absu_hwi (n) % 3 == 1)
183738fd1498Szrj powi_cbrt_x = cbrt_x;
183838fd1498Szrj else
183938fd1498Szrj powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
184038fd1498Szrj cbrt_x, cbrt_x);
184138fd1498Szrj
184238fd1498Szrj /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
184338fd1498Szrj if (absu_hwi (n) < 3)
184438fd1498Szrj result = powi_cbrt_x;
184538fd1498Szrj else
184638fd1498Szrj result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
184738fd1498Szrj powi_x_ndiv3, powi_cbrt_x);
184838fd1498Szrj
184938fd1498Szrj /* If n is negative, reciprocate the result. */
185038fd1498Szrj if (n < 0)
185138fd1498Szrj result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
185238fd1498Szrj build_real (type, dconst1), result);
185338fd1498Szrj
185438fd1498Szrj return result;
185538fd1498Szrj }
185638fd1498Szrj
185738fd1498Szrj /* No optimizations succeeded. */
185838fd1498Szrj return NULL_TREE;
185938fd1498Szrj }
186038fd1498Szrj
186138fd1498Szrj /* ARG is the argument to a cabs builtin call in GSI with location info
186238fd1498Szrj LOC. Create a sequence of statements prior to GSI that calculates
186338fd1498Szrj sqrt(R*R + I*I), where R and I are the real and imaginary components
186438fd1498Szrj of ARG, respectively. Return an expression holding the result. */
186538fd1498Szrj
186638fd1498Szrj static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)186738fd1498Szrj gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
186838fd1498Szrj {
186938fd1498Szrj tree real_part, imag_part, addend1, addend2, sum, result;
187038fd1498Szrj tree type = TREE_TYPE (TREE_TYPE (arg));
187138fd1498Szrj tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
187238fd1498Szrj machine_mode mode = TYPE_MODE (type);
187338fd1498Szrj
187438fd1498Szrj if (!flag_unsafe_math_optimizations
187538fd1498Szrj || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
187638fd1498Szrj || !sqrtfn
187738fd1498Szrj || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
187838fd1498Szrj return NULL_TREE;
187938fd1498Szrj
188038fd1498Szrj real_part = build_and_insert_ref (gsi, loc, type, "cabs",
188138fd1498Szrj REALPART_EXPR, arg);
188238fd1498Szrj addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
188338fd1498Szrj real_part, real_part);
188438fd1498Szrj imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
188538fd1498Szrj IMAGPART_EXPR, arg);
188638fd1498Szrj addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
188738fd1498Szrj imag_part, imag_part);
188838fd1498Szrj sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
188938fd1498Szrj result = build_and_insert_call (gsi, loc, sqrtfn, sum);
189038fd1498Szrj
189138fd1498Szrj return result;
189238fd1498Szrj }
189338fd1498Szrj
189438fd1498Szrj /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
189538fd1498Szrj on the SSA_NAME argument of each of them. Also expand powi(x,n) into
189638fd1498Szrj an optimal number of multiplies, when n is a constant. */
189738fd1498Szrj
189838fd1498Szrj namespace {
189938fd1498Szrj
190038fd1498Szrj const pass_data pass_data_cse_sincos =
190138fd1498Szrj {
190238fd1498Szrj GIMPLE_PASS, /* type */
190338fd1498Szrj "sincos", /* name */
190438fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
190538fd1498Szrj TV_TREE_SINCOS, /* tv_id */
190638fd1498Szrj PROP_ssa, /* properties_required */
190738fd1498Szrj PROP_gimple_opt_math, /* properties_provided */
190838fd1498Szrj 0, /* properties_destroyed */
190938fd1498Szrj 0, /* todo_flags_start */
191038fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
191138fd1498Szrj };
191238fd1498Szrj
191338fd1498Szrj class pass_cse_sincos : public gimple_opt_pass
191438fd1498Szrj {
191538fd1498Szrj public:
pass_cse_sincos(gcc::context * ctxt)191638fd1498Szrj pass_cse_sincos (gcc::context *ctxt)
191738fd1498Szrj : gimple_opt_pass (pass_data_cse_sincos, ctxt)
191838fd1498Szrj {}
191938fd1498Szrj
192038fd1498Szrj /* opt_pass methods: */
gate(function *)192138fd1498Szrj virtual bool gate (function *)
192238fd1498Szrj {
192338fd1498Szrj /* We no longer require either sincos or cexp, since powi expansion
192438fd1498Szrj piggybacks on this pass. */
192538fd1498Szrj return optimize;
192638fd1498Szrj }
192738fd1498Szrj
192838fd1498Szrj virtual unsigned int execute (function *);
192938fd1498Szrj
193038fd1498Szrj }; // class pass_cse_sincos
193138fd1498Szrj
193238fd1498Szrj unsigned int
execute(function * fun)193338fd1498Szrj pass_cse_sincos::execute (function *fun)
193438fd1498Szrj {
193538fd1498Szrj basic_block bb;
193638fd1498Szrj bool cfg_changed = false;
193738fd1498Szrj
193838fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
193938fd1498Szrj memset (&sincos_stats, 0, sizeof (sincos_stats));
194038fd1498Szrj
194138fd1498Szrj FOR_EACH_BB_FN (bb, fun)
194238fd1498Szrj {
194338fd1498Szrj gimple_stmt_iterator gsi;
194438fd1498Szrj bool cleanup_eh = false;
194538fd1498Szrj
194638fd1498Szrj for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
194738fd1498Szrj {
194838fd1498Szrj gimple *stmt = gsi_stmt (gsi);
194938fd1498Szrj
195038fd1498Szrj /* Only the last stmt in a bb could throw, no need to call
195138fd1498Szrj gimple_purge_dead_eh_edges if we change something in the middle
195238fd1498Szrj of a basic block. */
195338fd1498Szrj cleanup_eh = false;
195438fd1498Szrj
195538fd1498Szrj if (is_gimple_call (stmt)
195638fd1498Szrj && gimple_call_lhs (stmt))
195738fd1498Szrj {
195838fd1498Szrj tree arg, arg0, arg1, result;
195938fd1498Szrj HOST_WIDE_INT n;
196038fd1498Szrj location_t loc;
196138fd1498Szrj
196238fd1498Szrj switch (gimple_call_combined_fn (stmt))
196338fd1498Szrj {
196438fd1498Szrj CASE_CFN_COS:
196538fd1498Szrj CASE_CFN_SIN:
196638fd1498Szrj CASE_CFN_CEXPI:
196738fd1498Szrj /* Make sure we have either sincos or cexp. */
196838fd1498Szrj if (!targetm.libc_has_function (function_c99_math_complex)
196938fd1498Szrj && !targetm.libc_has_function (function_sincos))
197038fd1498Szrj break;
197138fd1498Szrj
197238fd1498Szrj arg = gimple_call_arg (stmt, 0);
197338fd1498Szrj if (TREE_CODE (arg) == SSA_NAME)
197438fd1498Szrj cfg_changed |= execute_cse_sincos_1 (arg);
197538fd1498Szrj break;
197638fd1498Szrj
197738fd1498Szrj CASE_CFN_POW:
197838fd1498Szrj arg0 = gimple_call_arg (stmt, 0);
197938fd1498Szrj arg1 = gimple_call_arg (stmt, 1);
198038fd1498Szrj
198138fd1498Szrj loc = gimple_location (stmt);
198238fd1498Szrj result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
198338fd1498Szrj
198438fd1498Szrj if (result)
198538fd1498Szrj {
198638fd1498Szrj tree lhs = gimple_get_lhs (stmt);
198738fd1498Szrj gassign *new_stmt = gimple_build_assign (lhs, result);
198838fd1498Szrj gimple_set_location (new_stmt, loc);
198938fd1498Szrj unlink_stmt_vdef (stmt);
199038fd1498Szrj gsi_replace (&gsi, new_stmt, true);
199138fd1498Szrj cleanup_eh = true;
199238fd1498Szrj if (gimple_vdef (stmt))
199338fd1498Szrj release_ssa_name (gimple_vdef (stmt));
199438fd1498Szrj }
199538fd1498Szrj break;
199638fd1498Szrj
199738fd1498Szrj CASE_CFN_POWI:
199838fd1498Szrj arg0 = gimple_call_arg (stmt, 0);
199938fd1498Szrj arg1 = gimple_call_arg (stmt, 1);
200038fd1498Szrj loc = gimple_location (stmt);
200138fd1498Szrj
200238fd1498Szrj if (real_minus_onep (arg0))
200338fd1498Szrj {
200438fd1498Szrj tree t0, t1, cond, one, minus_one;
200538fd1498Szrj gassign *stmt;
200638fd1498Szrj
200738fd1498Szrj t0 = TREE_TYPE (arg0);
200838fd1498Szrj t1 = TREE_TYPE (arg1);
200938fd1498Szrj one = build_real (t0, dconst1);
201038fd1498Szrj minus_one = build_real (t0, dconstm1);
201138fd1498Szrj
201238fd1498Szrj cond = make_temp_ssa_name (t1, NULL, "powi_cond");
201338fd1498Szrj stmt = gimple_build_assign (cond, BIT_AND_EXPR,
201438fd1498Szrj arg1, build_int_cst (t1, 1));
201538fd1498Szrj gimple_set_location (stmt, loc);
201638fd1498Szrj gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
201738fd1498Szrj
201838fd1498Szrj result = make_temp_ssa_name (t0, NULL, "powi");
201938fd1498Szrj stmt = gimple_build_assign (result, COND_EXPR, cond,
202038fd1498Szrj minus_one, one);
202138fd1498Szrj gimple_set_location (stmt, loc);
202238fd1498Szrj gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
202338fd1498Szrj }
202438fd1498Szrj else
202538fd1498Szrj {
202638fd1498Szrj if (!tree_fits_shwi_p (arg1))
202738fd1498Szrj break;
202838fd1498Szrj
202938fd1498Szrj n = tree_to_shwi (arg1);
203038fd1498Szrj result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
203138fd1498Szrj }
203238fd1498Szrj
203338fd1498Szrj if (result)
203438fd1498Szrj {
203538fd1498Szrj tree lhs = gimple_get_lhs (stmt);
203638fd1498Szrj gassign *new_stmt = gimple_build_assign (lhs, result);
203738fd1498Szrj gimple_set_location (new_stmt, loc);
203838fd1498Szrj unlink_stmt_vdef (stmt);
203938fd1498Szrj gsi_replace (&gsi, new_stmt, true);
204038fd1498Szrj cleanup_eh = true;
204138fd1498Szrj if (gimple_vdef (stmt))
204238fd1498Szrj release_ssa_name (gimple_vdef (stmt));
204338fd1498Szrj }
204438fd1498Szrj break;
204538fd1498Szrj
204638fd1498Szrj CASE_CFN_CABS:
204738fd1498Szrj arg0 = gimple_call_arg (stmt, 0);
204838fd1498Szrj loc = gimple_location (stmt);
204938fd1498Szrj result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
205038fd1498Szrj
205138fd1498Szrj if (result)
205238fd1498Szrj {
205338fd1498Szrj tree lhs = gimple_get_lhs (stmt);
205438fd1498Szrj gassign *new_stmt = gimple_build_assign (lhs, result);
205538fd1498Szrj gimple_set_location (new_stmt, loc);
205638fd1498Szrj unlink_stmt_vdef (stmt);
205738fd1498Szrj gsi_replace (&gsi, new_stmt, true);
205838fd1498Szrj cleanup_eh = true;
205938fd1498Szrj if (gimple_vdef (stmt))
206038fd1498Szrj release_ssa_name (gimple_vdef (stmt));
206138fd1498Szrj }
206238fd1498Szrj break;
206338fd1498Szrj
206438fd1498Szrj default:;
206538fd1498Szrj }
206638fd1498Szrj }
206738fd1498Szrj }
206838fd1498Szrj if (cleanup_eh)
206938fd1498Szrj cfg_changed |= gimple_purge_dead_eh_edges (bb);
207038fd1498Szrj }
207138fd1498Szrj
207238fd1498Szrj statistics_counter_event (fun, "sincos statements inserted",
207338fd1498Szrj sincos_stats.inserted);
207438fd1498Szrj
207538fd1498Szrj return cfg_changed ? TODO_cleanup_cfg : 0;
207638fd1498Szrj }
207738fd1498Szrj
207838fd1498Szrj } // anon namespace
207938fd1498Szrj
208038fd1498Szrj gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)208138fd1498Szrj make_pass_cse_sincos (gcc::context *ctxt)
208238fd1498Szrj {
208338fd1498Szrj return new pass_cse_sincos (ctxt);
208438fd1498Szrj }
208538fd1498Szrj
208638fd1498Szrj /* Return true if stmt is a type conversion operation that can be stripped
208738fd1498Szrj when used in a widening multiply operation. */
208838fd1498Szrj static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)208938fd1498Szrj widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
209038fd1498Szrj {
209138fd1498Szrj enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
209238fd1498Szrj
209338fd1498Szrj if (TREE_CODE (result_type) == INTEGER_TYPE)
209438fd1498Szrj {
209538fd1498Szrj tree op_type;
209638fd1498Szrj tree inner_op_type;
209738fd1498Szrj
209838fd1498Szrj if (!CONVERT_EXPR_CODE_P (rhs_code))
209938fd1498Szrj return false;
210038fd1498Szrj
210138fd1498Szrj op_type = TREE_TYPE (gimple_assign_lhs (stmt));
210238fd1498Szrj
210338fd1498Szrj /* If the type of OP has the same precision as the result, then
210438fd1498Szrj we can strip this conversion. The multiply operation will be
210538fd1498Szrj selected to create the correct extension as a by-product. */
210638fd1498Szrj if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
210738fd1498Szrj return true;
210838fd1498Szrj
210938fd1498Szrj /* We can also strip a conversion if it preserves the signed-ness of
211038fd1498Szrj the operation and doesn't narrow the range. */
211138fd1498Szrj inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
211238fd1498Szrj
211338fd1498Szrj /* If the inner-most type is unsigned, then we can strip any
211438fd1498Szrj intermediate widening operation. If it's signed, then the
211538fd1498Szrj intermediate widening operation must also be signed. */
211638fd1498Szrj if ((TYPE_UNSIGNED (inner_op_type)
211738fd1498Szrj || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
211838fd1498Szrj && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
211938fd1498Szrj return true;
212038fd1498Szrj
212138fd1498Szrj return false;
212238fd1498Szrj }
212338fd1498Szrj
212438fd1498Szrj return rhs_code == FIXED_CONVERT_EXPR;
212538fd1498Szrj }
212638fd1498Szrj
212738fd1498Szrj /* Return true if RHS is a suitable operand for a widening multiplication,
212838fd1498Szrj assuming a target type of TYPE.
212938fd1498Szrj There are two cases:
213038fd1498Szrj
213138fd1498Szrj - RHS makes some value at least twice as wide. Store that value
213238fd1498Szrj in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
213338fd1498Szrj
213438fd1498Szrj - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
213538fd1498Szrj but leave *TYPE_OUT untouched. */
213638fd1498Szrj
213738fd1498Szrj static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)213838fd1498Szrj is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
213938fd1498Szrj tree *new_rhs_out)
214038fd1498Szrj {
214138fd1498Szrj gimple *stmt;
214238fd1498Szrj tree type1, rhs1;
214338fd1498Szrj
214438fd1498Szrj if (TREE_CODE (rhs) == SSA_NAME)
214538fd1498Szrj {
214638fd1498Szrj stmt = SSA_NAME_DEF_STMT (rhs);
214738fd1498Szrj if (is_gimple_assign (stmt))
214838fd1498Szrj {
214938fd1498Szrj if (! widening_mult_conversion_strippable_p (type, stmt))
215038fd1498Szrj rhs1 = rhs;
215138fd1498Szrj else
215238fd1498Szrj {
215338fd1498Szrj rhs1 = gimple_assign_rhs1 (stmt);
215438fd1498Szrj
215538fd1498Szrj if (TREE_CODE (rhs1) == INTEGER_CST)
215638fd1498Szrj {
215738fd1498Szrj *new_rhs_out = rhs1;
215838fd1498Szrj *type_out = NULL;
215938fd1498Szrj return true;
216038fd1498Szrj }
216138fd1498Szrj }
216238fd1498Szrj }
216338fd1498Szrj else
216438fd1498Szrj rhs1 = rhs;
216538fd1498Szrj
216638fd1498Szrj type1 = TREE_TYPE (rhs1);
216738fd1498Szrj
216838fd1498Szrj if (TREE_CODE (type1) != TREE_CODE (type)
216938fd1498Szrj || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
217038fd1498Szrj return false;
217138fd1498Szrj
217238fd1498Szrj *new_rhs_out = rhs1;
217338fd1498Szrj *type_out = type1;
217438fd1498Szrj return true;
217538fd1498Szrj }
217638fd1498Szrj
217738fd1498Szrj if (TREE_CODE (rhs) == INTEGER_CST)
217838fd1498Szrj {
217938fd1498Szrj *new_rhs_out = rhs;
218038fd1498Szrj *type_out = NULL;
218138fd1498Szrj return true;
218238fd1498Szrj }
218338fd1498Szrj
218438fd1498Szrj return false;
218538fd1498Szrj }
218638fd1498Szrj
218738fd1498Szrj /* Return true if STMT performs a widening multiplication, assuming the
218838fd1498Szrj output type is TYPE. If so, store the unwidened types of the operands
218938fd1498Szrj in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
219038fd1498Szrj *RHS2_OUT such that converting those operands to types *TYPE1_OUT
219138fd1498Szrj and *TYPE2_OUT would give the operands of the multiplication. */
219238fd1498Szrj
219338fd1498Szrj static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)219438fd1498Szrj is_widening_mult_p (gimple *stmt,
219538fd1498Szrj tree *type1_out, tree *rhs1_out,
219638fd1498Szrj tree *type2_out, tree *rhs2_out)
219738fd1498Szrj {
219838fd1498Szrj tree type = TREE_TYPE (gimple_assign_lhs (stmt));
219938fd1498Szrj
220038fd1498Szrj if (TREE_CODE (type) == INTEGER_TYPE)
220138fd1498Szrj {
220238fd1498Szrj if (TYPE_OVERFLOW_TRAPS (type))
220338fd1498Szrj return false;
220438fd1498Szrj }
220538fd1498Szrj else if (TREE_CODE (type) != FIXED_POINT_TYPE)
220638fd1498Szrj return false;
220738fd1498Szrj
220838fd1498Szrj if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
220938fd1498Szrj rhs1_out))
221038fd1498Szrj return false;
221138fd1498Szrj
221238fd1498Szrj if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
221338fd1498Szrj rhs2_out))
221438fd1498Szrj return false;
221538fd1498Szrj
221638fd1498Szrj if (*type1_out == NULL)
221738fd1498Szrj {
221838fd1498Szrj if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
221938fd1498Szrj return false;
222038fd1498Szrj *type1_out = *type2_out;
222138fd1498Szrj }
222238fd1498Szrj
222338fd1498Szrj if (*type2_out == NULL)
222438fd1498Szrj {
222538fd1498Szrj if (!int_fits_type_p (*rhs2_out, *type1_out))
222638fd1498Szrj return false;
222738fd1498Szrj *type2_out = *type1_out;
222838fd1498Szrj }
222938fd1498Szrj
223038fd1498Szrj /* Ensure that the larger of the two operands comes first. */
223138fd1498Szrj if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
223238fd1498Szrj {
223338fd1498Szrj std::swap (*type1_out, *type2_out);
223438fd1498Szrj std::swap (*rhs1_out, *rhs2_out);
223538fd1498Szrj }
223638fd1498Szrj
223738fd1498Szrj return true;
223838fd1498Szrj }
223938fd1498Szrj
224038fd1498Szrj /* Check to see if the CALL statement is an invocation of copysign
224138fd1498Szrj with 1. being the first argument. */
224238fd1498Szrj static bool
is_copysign_call_with_1(gimple * call)224338fd1498Szrj is_copysign_call_with_1 (gimple *call)
224438fd1498Szrj {
224538fd1498Szrj gcall *c = dyn_cast <gcall *> (call);
224638fd1498Szrj if (! c)
224738fd1498Szrj return false;
224838fd1498Szrj
224938fd1498Szrj enum combined_fn code = gimple_call_combined_fn (c);
225038fd1498Szrj
225138fd1498Szrj if (code == CFN_LAST)
225238fd1498Szrj return false;
225338fd1498Szrj
225438fd1498Szrj if (builtin_fn_p (code))
225538fd1498Szrj {
225638fd1498Szrj switch (as_builtin_fn (code))
225738fd1498Szrj {
225838fd1498Szrj CASE_FLT_FN (BUILT_IN_COPYSIGN):
225938fd1498Szrj CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
226038fd1498Szrj return real_onep (gimple_call_arg (c, 0));
226138fd1498Szrj default:
226238fd1498Szrj return false;
226338fd1498Szrj }
226438fd1498Szrj }
226538fd1498Szrj
226638fd1498Szrj if (internal_fn_p (code))
226738fd1498Szrj {
226838fd1498Szrj switch (as_internal_fn (code))
226938fd1498Szrj {
227038fd1498Szrj case IFN_COPYSIGN:
227138fd1498Szrj return real_onep (gimple_call_arg (c, 0));
227238fd1498Szrj default:
227338fd1498Szrj return false;
227438fd1498Szrj }
227538fd1498Szrj }
227638fd1498Szrj
227738fd1498Szrj return false;
227838fd1498Szrj }
227938fd1498Szrj
228038fd1498Szrj /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
228138fd1498Szrj This only happens when the the xorsign optab is defined, if the
228238fd1498Szrj pattern is not a xorsign pattern or if expansion fails FALSE is
228338fd1498Szrj returned, otherwise TRUE is returned. */
228438fd1498Szrj static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)228538fd1498Szrj convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
228638fd1498Szrj {
228738fd1498Szrj tree treeop0, treeop1, lhs, type;
228838fd1498Szrj location_t loc = gimple_location (stmt);
228938fd1498Szrj lhs = gimple_assign_lhs (stmt);
229038fd1498Szrj treeop0 = gimple_assign_rhs1 (stmt);
229138fd1498Szrj treeop1 = gimple_assign_rhs2 (stmt);
229238fd1498Szrj type = TREE_TYPE (lhs);
229338fd1498Szrj machine_mode mode = TYPE_MODE (type);
229438fd1498Szrj
229538fd1498Szrj if (HONOR_SNANS (type))
229638fd1498Szrj return false;
229738fd1498Szrj
229838fd1498Szrj if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
229938fd1498Szrj {
230038fd1498Szrj gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
230138fd1498Szrj if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
230238fd1498Szrj {
230338fd1498Szrj call0 = SSA_NAME_DEF_STMT (treeop1);
230438fd1498Szrj if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
230538fd1498Szrj return false;
230638fd1498Szrj
230738fd1498Szrj treeop1 = treeop0;
230838fd1498Szrj }
230938fd1498Szrj if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
231038fd1498Szrj return false;
231138fd1498Szrj
231238fd1498Szrj gcall *c = as_a<gcall*> (call0);
231338fd1498Szrj treeop0 = gimple_call_arg (c, 1);
231438fd1498Szrj
231538fd1498Szrj gcall *call_stmt
231638fd1498Szrj = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
231738fd1498Szrj gimple_set_lhs (call_stmt, lhs);
231838fd1498Szrj gimple_set_location (call_stmt, loc);
231938fd1498Szrj gsi_replace (gsi, call_stmt, true);
232038fd1498Szrj return true;
232138fd1498Szrj }
232238fd1498Szrj
232338fd1498Szrj return false;
232438fd1498Szrj }
232538fd1498Szrj
232638fd1498Szrj /* Process a single gimple statement STMT, which has a MULT_EXPR as
232738fd1498Szrj its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
232838fd1498Szrj value is true iff we converted the statement. */
232938fd1498Szrj
233038fd1498Szrj static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)233138fd1498Szrj convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
233238fd1498Szrj {
233338fd1498Szrj tree lhs, rhs1, rhs2, type, type1, type2;
233438fd1498Szrj enum insn_code handler;
233538fd1498Szrj scalar_int_mode to_mode, from_mode, actual_mode;
233638fd1498Szrj optab op;
233738fd1498Szrj int actual_precision;
233838fd1498Szrj location_t loc = gimple_location (stmt);
233938fd1498Szrj bool from_unsigned1, from_unsigned2;
234038fd1498Szrj
234138fd1498Szrj lhs = gimple_assign_lhs (stmt);
234238fd1498Szrj type = TREE_TYPE (lhs);
234338fd1498Szrj if (TREE_CODE (type) != INTEGER_TYPE)
234438fd1498Szrj return false;
234538fd1498Szrj
234638fd1498Szrj if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
234738fd1498Szrj return false;
234838fd1498Szrj
234938fd1498Szrj to_mode = SCALAR_INT_TYPE_MODE (type);
235038fd1498Szrj from_mode = SCALAR_INT_TYPE_MODE (type1);
235138fd1498Szrj if (to_mode == from_mode)
235238fd1498Szrj return false;
235338fd1498Szrj
235438fd1498Szrj from_unsigned1 = TYPE_UNSIGNED (type1);
235538fd1498Szrj from_unsigned2 = TYPE_UNSIGNED (type2);
235638fd1498Szrj
235738fd1498Szrj if (from_unsigned1 && from_unsigned2)
235838fd1498Szrj op = umul_widen_optab;
235938fd1498Szrj else if (!from_unsigned1 && !from_unsigned2)
236038fd1498Szrj op = smul_widen_optab;
236138fd1498Szrj else
236238fd1498Szrj op = usmul_widen_optab;
236338fd1498Szrj
236438fd1498Szrj handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
236538fd1498Szrj &actual_mode);
236638fd1498Szrj
236738fd1498Szrj if (handler == CODE_FOR_nothing)
236838fd1498Szrj {
236938fd1498Szrj if (op != smul_widen_optab)
237038fd1498Szrj {
237138fd1498Szrj /* We can use a signed multiply with unsigned types as long as
237238fd1498Szrj there is a wider mode to use, or it is the smaller of the two
237338fd1498Szrj types that is unsigned. Note that type1 >= type2, always. */
237438fd1498Szrj if ((TYPE_UNSIGNED (type1)
237538fd1498Szrj && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
237638fd1498Szrj || (TYPE_UNSIGNED (type2)
237738fd1498Szrj && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
237838fd1498Szrj {
237938fd1498Szrj if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
238038fd1498Szrj || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
238138fd1498Szrj return false;
238238fd1498Szrj }
238338fd1498Szrj
238438fd1498Szrj op = smul_widen_optab;
238538fd1498Szrj handler = find_widening_optab_handler_and_mode (op, to_mode,
238638fd1498Szrj from_mode,
238738fd1498Szrj &actual_mode);
238838fd1498Szrj
238938fd1498Szrj if (handler == CODE_FOR_nothing)
239038fd1498Szrj return false;
239138fd1498Szrj
239238fd1498Szrj from_unsigned1 = from_unsigned2 = false;
239338fd1498Szrj }
239438fd1498Szrj else
239538fd1498Szrj return false;
239638fd1498Szrj }
239738fd1498Szrj
239838fd1498Szrj /* Ensure that the inputs to the handler are in the correct precison
239938fd1498Szrj for the opcode. This will be the full mode size. */
240038fd1498Szrj actual_precision = GET_MODE_PRECISION (actual_mode);
240138fd1498Szrj if (2 * actual_precision > TYPE_PRECISION (type))
240238fd1498Szrj return false;
240338fd1498Szrj if (actual_precision != TYPE_PRECISION (type1)
240438fd1498Szrj || from_unsigned1 != TYPE_UNSIGNED (type1))
240538fd1498Szrj rhs1 = build_and_insert_cast (gsi, loc,
240638fd1498Szrj build_nonstandard_integer_type
240738fd1498Szrj (actual_precision, from_unsigned1), rhs1);
240838fd1498Szrj if (actual_precision != TYPE_PRECISION (type2)
240938fd1498Szrj || from_unsigned2 != TYPE_UNSIGNED (type2))
241038fd1498Szrj rhs2 = build_and_insert_cast (gsi, loc,
241138fd1498Szrj build_nonstandard_integer_type
241238fd1498Szrj (actual_precision, from_unsigned2), rhs2);
241338fd1498Szrj
241438fd1498Szrj /* Handle constants. */
241538fd1498Szrj if (TREE_CODE (rhs1) == INTEGER_CST)
241638fd1498Szrj rhs1 = fold_convert (type1, rhs1);
241738fd1498Szrj if (TREE_CODE (rhs2) == INTEGER_CST)
241838fd1498Szrj rhs2 = fold_convert (type2, rhs2);
241938fd1498Szrj
242038fd1498Szrj gimple_assign_set_rhs1 (stmt, rhs1);
242138fd1498Szrj gimple_assign_set_rhs2 (stmt, rhs2);
242238fd1498Szrj gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
242338fd1498Szrj update_stmt (stmt);
242438fd1498Szrj widen_mul_stats.widen_mults_inserted++;
242538fd1498Szrj return true;
242638fd1498Szrj }
242738fd1498Szrj
242838fd1498Szrj /* Process a single gimple statement STMT, which is found at the
242938fd1498Szrj iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
243038fd1498Szrj rhs (given by CODE), and try to convert it into a
243138fd1498Szrj WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
243238fd1498Szrj is true iff we converted the statement. */
243338fd1498Szrj
243438fd1498Szrj static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)243538fd1498Szrj convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
243638fd1498Szrj enum tree_code code)
243738fd1498Szrj {
243838fd1498Szrj gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
243938fd1498Szrj gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
244038fd1498Szrj tree type, type1, type2, optype;
244138fd1498Szrj tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
244238fd1498Szrj enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
244338fd1498Szrj optab this_optab;
244438fd1498Szrj enum tree_code wmult_code;
244538fd1498Szrj enum insn_code handler;
244638fd1498Szrj scalar_mode to_mode, from_mode, actual_mode;
244738fd1498Szrj location_t loc = gimple_location (stmt);
244838fd1498Szrj int actual_precision;
244938fd1498Szrj bool from_unsigned1, from_unsigned2;
245038fd1498Szrj
245138fd1498Szrj lhs = gimple_assign_lhs (stmt);
245238fd1498Szrj type = TREE_TYPE (lhs);
245338fd1498Szrj if (TREE_CODE (type) != INTEGER_TYPE
245438fd1498Szrj && TREE_CODE (type) != FIXED_POINT_TYPE)
245538fd1498Szrj return false;
245638fd1498Szrj
245738fd1498Szrj if (code == MINUS_EXPR)
245838fd1498Szrj wmult_code = WIDEN_MULT_MINUS_EXPR;
245938fd1498Szrj else
246038fd1498Szrj wmult_code = WIDEN_MULT_PLUS_EXPR;
246138fd1498Szrj
246238fd1498Szrj rhs1 = gimple_assign_rhs1 (stmt);
246338fd1498Szrj rhs2 = gimple_assign_rhs2 (stmt);
246438fd1498Szrj
246538fd1498Szrj if (TREE_CODE (rhs1) == SSA_NAME)
246638fd1498Szrj {
246738fd1498Szrj rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
246838fd1498Szrj if (is_gimple_assign (rhs1_stmt))
246938fd1498Szrj rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
247038fd1498Szrj }
247138fd1498Szrj
247238fd1498Szrj if (TREE_CODE (rhs2) == SSA_NAME)
247338fd1498Szrj {
247438fd1498Szrj rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
247538fd1498Szrj if (is_gimple_assign (rhs2_stmt))
247638fd1498Szrj rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
247738fd1498Szrj }
247838fd1498Szrj
247938fd1498Szrj /* Allow for one conversion statement between the multiply
248038fd1498Szrj and addition/subtraction statement. If there are more than
248138fd1498Szrj one conversions then we assume they would invalidate this
248238fd1498Szrj transformation. If that's not the case then they should have
248338fd1498Szrj been folded before now. */
248438fd1498Szrj if (CONVERT_EXPR_CODE_P (rhs1_code))
248538fd1498Szrj {
248638fd1498Szrj conv1_stmt = rhs1_stmt;
248738fd1498Szrj rhs1 = gimple_assign_rhs1 (rhs1_stmt);
248838fd1498Szrj if (TREE_CODE (rhs1) == SSA_NAME)
248938fd1498Szrj {
249038fd1498Szrj rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
249138fd1498Szrj if (is_gimple_assign (rhs1_stmt))
249238fd1498Szrj rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
249338fd1498Szrj }
249438fd1498Szrj else
249538fd1498Szrj return false;
249638fd1498Szrj }
249738fd1498Szrj if (CONVERT_EXPR_CODE_P (rhs2_code))
249838fd1498Szrj {
249938fd1498Szrj conv2_stmt = rhs2_stmt;
250038fd1498Szrj rhs2 = gimple_assign_rhs1 (rhs2_stmt);
250138fd1498Szrj if (TREE_CODE (rhs2) == SSA_NAME)
250238fd1498Szrj {
250338fd1498Szrj rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
250438fd1498Szrj if (is_gimple_assign (rhs2_stmt))
250538fd1498Szrj rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
250638fd1498Szrj }
250738fd1498Szrj else
250838fd1498Szrj return false;
250938fd1498Szrj }
251038fd1498Szrj
251138fd1498Szrj /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
251238fd1498Szrj is_widening_mult_p, but we still need the rhs returns.
251338fd1498Szrj
251438fd1498Szrj It might also appear that it would be sufficient to use the existing
251538fd1498Szrj operands of the widening multiply, but that would limit the choice of
251638fd1498Szrj multiply-and-accumulate instructions.
251738fd1498Szrj
251838fd1498Szrj If the widened-multiplication result has more than one uses, it is
251938fd1498Szrj probably wiser not to do the conversion. */
252038fd1498Szrj if (code == PLUS_EXPR
252138fd1498Szrj && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
252238fd1498Szrj {
252338fd1498Szrj if (!has_single_use (rhs1)
252438fd1498Szrj || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
252538fd1498Szrj &type2, &mult_rhs2))
252638fd1498Szrj return false;
252738fd1498Szrj add_rhs = rhs2;
252838fd1498Szrj conv_stmt = conv1_stmt;
252938fd1498Szrj }
253038fd1498Szrj else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
253138fd1498Szrj {
253238fd1498Szrj if (!has_single_use (rhs2)
253338fd1498Szrj || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
253438fd1498Szrj &type2, &mult_rhs2))
253538fd1498Szrj return false;
253638fd1498Szrj add_rhs = rhs1;
253738fd1498Szrj conv_stmt = conv2_stmt;
253838fd1498Szrj }
253938fd1498Szrj else
254038fd1498Szrj return false;
254138fd1498Szrj
254238fd1498Szrj to_mode = SCALAR_TYPE_MODE (type);
254338fd1498Szrj from_mode = SCALAR_TYPE_MODE (type1);
254438fd1498Szrj if (to_mode == from_mode)
254538fd1498Szrj return false;
254638fd1498Szrj
254738fd1498Szrj from_unsigned1 = TYPE_UNSIGNED (type1);
254838fd1498Szrj from_unsigned2 = TYPE_UNSIGNED (type2);
254938fd1498Szrj optype = type1;
255038fd1498Szrj
255138fd1498Szrj /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
255238fd1498Szrj if (from_unsigned1 != from_unsigned2)
255338fd1498Szrj {
255438fd1498Szrj if (!INTEGRAL_TYPE_P (type))
255538fd1498Szrj return false;
255638fd1498Szrj /* We can use a signed multiply with unsigned types as long as
255738fd1498Szrj there is a wider mode to use, or it is the smaller of the two
255838fd1498Szrj types that is unsigned. Note that type1 >= type2, always. */
255938fd1498Szrj if ((from_unsigned1
256038fd1498Szrj && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
256138fd1498Szrj || (from_unsigned2
256238fd1498Szrj && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
256338fd1498Szrj {
256438fd1498Szrj if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
256538fd1498Szrj || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
256638fd1498Szrj return false;
256738fd1498Szrj }
256838fd1498Szrj
256938fd1498Szrj from_unsigned1 = from_unsigned2 = false;
257038fd1498Szrj optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
257138fd1498Szrj false);
257238fd1498Szrj }
257338fd1498Szrj
257438fd1498Szrj /* If there was a conversion between the multiply and addition
257538fd1498Szrj then we need to make sure it fits a multiply-and-accumulate.
257638fd1498Szrj The should be a single mode change which does not change the
257738fd1498Szrj value. */
257838fd1498Szrj if (conv_stmt)
257938fd1498Szrj {
258038fd1498Szrj /* We use the original, unmodified data types for this. */
258138fd1498Szrj tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
258238fd1498Szrj tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
258338fd1498Szrj int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
258438fd1498Szrj bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
258538fd1498Szrj
258638fd1498Szrj if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
258738fd1498Szrj {
258838fd1498Szrj /* Conversion is a truncate. */
258938fd1498Szrj if (TYPE_PRECISION (to_type) < data_size)
259038fd1498Szrj return false;
259138fd1498Szrj }
259238fd1498Szrj else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
259338fd1498Szrj {
259438fd1498Szrj /* Conversion is an extend. Check it's the right sort. */
259538fd1498Szrj if (TYPE_UNSIGNED (from_type) != is_unsigned
259638fd1498Szrj && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
259738fd1498Szrj return false;
259838fd1498Szrj }
259938fd1498Szrj /* else convert is a no-op for our purposes. */
260038fd1498Szrj }
260138fd1498Szrj
260238fd1498Szrj /* Verify that the machine can perform a widening multiply
260338fd1498Szrj accumulate in this mode/signedness combination, otherwise
260438fd1498Szrj this transformation is likely to pessimize code. */
260538fd1498Szrj this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
260638fd1498Szrj handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
260738fd1498Szrj from_mode, &actual_mode);
260838fd1498Szrj
260938fd1498Szrj if (handler == CODE_FOR_nothing)
261038fd1498Szrj return false;
261138fd1498Szrj
261238fd1498Szrj /* Ensure that the inputs to the handler are in the correct precison
261338fd1498Szrj for the opcode. This will be the full mode size. */
261438fd1498Szrj actual_precision = GET_MODE_PRECISION (actual_mode);
261538fd1498Szrj if (actual_precision != TYPE_PRECISION (type1)
261638fd1498Szrj || from_unsigned1 != TYPE_UNSIGNED (type1))
261738fd1498Szrj mult_rhs1 = build_and_insert_cast (gsi, loc,
261838fd1498Szrj build_nonstandard_integer_type
261938fd1498Szrj (actual_precision, from_unsigned1),
262038fd1498Szrj mult_rhs1);
262138fd1498Szrj if (actual_precision != TYPE_PRECISION (type2)
262238fd1498Szrj || from_unsigned2 != TYPE_UNSIGNED (type2))
262338fd1498Szrj mult_rhs2 = build_and_insert_cast (gsi, loc,
262438fd1498Szrj build_nonstandard_integer_type
262538fd1498Szrj (actual_precision, from_unsigned2),
262638fd1498Szrj mult_rhs2);
262738fd1498Szrj
262838fd1498Szrj if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
262938fd1498Szrj add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
263038fd1498Szrj
263138fd1498Szrj /* Handle constants. */
263238fd1498Szrj if (TREE_CODE (mult_rhs1) == INTEGER_CST)
263338fd1498Szrj mult_rhs1 = fold_convert (type1, mult_rhs1);
263438fd1498Szrj if (TREE_CODE (mult_rhs2) == INTEGER_CST)
263538fd1498Szrj mult_rhs2 = fold_convert (type2, mult_rhs2);
263638fd1498Szrj
263738fd1498Szrj gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
263838fd1498Szrj add_rhs);
263938fd1498Szrj update_stmt (gsi_stmt (*gsi));
264038fd1498Szrj widen_mul_stats.maccs_inserted++;
264138fd1498Szrj return true;
264238fd1498Szrj }
264338fd1498Szrj
264438fd1498Szrj /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
264538fd1498Szrj OP2 and which we know is used in statements that can be, together with the
264638fd1498Szrj multiplication, converted to FMAs, perform the transformation. */
264738fd1498Szrj
264838fd1498Szrj static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)264938fd1498Szrj convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
265038fd1498Szrj {
265138fd1498Szrj tree type = TREE_TYPE (mul_result);
265238fd1498Szrj gimple *use_stmt;
265338fd1498Szrj imm_use_iterator imm_iter;
265438fd1498Szrj gassign *fma_stmt;
265538fd1498Szrj
265638fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
265738fd1498Szrj {
265838fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
265938fd1498Szrj enum tree_code use_code;
266038fd1498Szrj tree addop, mulop1 = op1, result = mul_result;
266138fd1498Szrj bool negate_p = false;
266238fd1498Szrj
266338fd1498Szrj if (is_gimple_debug (use_stmt))
266438fd1498Szrj continue;
266538fd1498Szrj
266638fd1498Szrj use_code = gimple_assign_rhs_code (use_stmt);
266738fd1498Szrj if (use_code == NEGATE_EXPR)
266838fd1498Szrj {
266938fd1498Szrj result = gimple_assign_lhs (use_stmt);
267038fd1498Szrj use_operand_p use_p;
267138fd1498Szrj gimple *neguse_stmt;
267238fd1498Szrj single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
267338fd1498Szrj gsi_remove (&gsi, true);
267438fd1498Szrj release_defs (use_stmt);
267538fd1498Szrj
267638fd1498Szrj use_stmt = neguse_stmt;
267738fd1498Szrj gsi = gsi_for_stmt (use_stmt);
267838fd1498Szrj use_code = gimple_assign_rhs_code (use_stmt);
267938fd1498Szrj negate_p = true;
268038fd1498Szrj }
268138fd1498Szrj
268238fd1498Szrj if (gimple_assign_rhs1 (use_stmt) == result)
268338fd1498Szrj {
268438fd1498Szrj addop = gimple_assign_rhs2 (use_stmt);
268538fd1498Szrj /* a * b - c -> a * b + (-c) */
268638fd1498Szrj if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
268738fd1498Szrj addop = force_gimple_operand_gsi (&gsi,
268838fd1498Szrj build1 (NEGATE_EXPR,
268938fd1498Szrj type, addop),
269038fd1498Szrj true, NULL_TREE, true,
269138fd1498Szrj GSI_SAME_STMT);
269238fd1498Szrj }
269338fd1498Szrj else
269438fd1498Szrj {
269538fd1498Szrj addop = gimple_assign_rhs1 (use_stmt);
269638fd1498Szrj /* a - b * c -> (-b) * c + a */
269738fd1498Szrj if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
269838fd1498Szrj negate_p = !negate_p;
269938fd1498Szrj }
270038fd1498Szrj
270138fd1498Szrj if (negate_p)
270238fd1498Szrj mulop1 = force_gimple_operand_gsi (&gsi,
270338fd1498Szrj build1 (NEGATE_EXPR,
270438fd1498Szrj type, mulop1),
270538fd1498Szrj true, NULL_TREE, true,
270638fd1498Szrj GSI_SAME_STMT);
270738fd1498Szrj
270838fd1498Szrj fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
270938fd1498Szrj FMA_EXPR, mulop1, op2, addop);
271038fd1498Szrj
271138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
271238fd1498Szrj {
271338fd1498Szrj fprintf (dump_file, "Generated FMA ");
271438fd1498Szrj print_gimple_stmt (dump_file, fma_stmt, 0, 0);
271538fd1498Szrj fprintf (dump_file, "\n");
271638fd1498Szrj }
271738fd1498Szrj
271838fd1498Szrj gsi_replace (&gsi, fma_stmt, true);
271938fd1498Szrj widen_mul_stats.fmas_inserted++;
272038fd1498Szrj }
272138fd1498Szrj }
272238fd1498Szrj
272338fd1498Szrj /* Data necessary to perform the actual transformation from a multiplication
272438fd1498Szrj and an addition to an FMA after decision is taken it should be done and to
272538fd1498Szrj then delete the multiplication statement from the function IL. */
272638fd1498Szrj
272738fd1498Szrj struct fma_transformation_info
272838fd1498Szrj {
272938fd1498Szrj gimple *mul_stmt;
273038fd1498Szrj tree mul_result;
273138fd1498Szrj tree op1;
273238fd1498Szrj tree op2;
273338fd1498Szrj };
273438fd1498Szrj
273538fd1498Szrj /* Structure containing the current state of FMA deferring, i.e. whether we are
273638fd1498Szrj deferring, whether to continue deferring, and all data necessary to come
273738fd1498Szrj back and perform all deferred transformations. */
273838fd1498Szrj
273938fd1498Szrj class fma_deferring_state
274038fd1498Szrj {
274138fd1498Szrj public:
274238fd1498Szrj /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
274338fd1498Szrj do any deferring. */
274438fd1498Szrj
fma_deferring_state(bool perform_deferring)274538fd1498Szrj fma_deferring_state (bool perform_deferring)
274638fd1498Szrj : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
274738fd1498Szrj m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
274838fd1498Szrj
274938fd1498Szrj /* List of FMA candidates for which we the transformation has been determined
275038fd1498Szrj possible but we at this point in BB analysis we do not consider them
275138fd1498Szrj beneficial. */
275238fd1498Szrj auto_vec<fma_transformation_info, 8> m_candidates;
275338fd1498Szrj
275438fd1498Szrj /* Set of results of multiplication that are part of an already deferred FMA
275538fd1498Szrj candidates. */
275638fd1498Szrj hash_set<tree> m_mul_result_set;
275738fd1498Szrj
275838fd1498Szrj /* The PHI that supposedly feeds back result of a FMA to another over loop
275938fd1498Szrj boundary. */
276038fd1498Szrj gphi *m_initial_phi;
276138fd1498Szrj
276238fd1498Szrj /* Result of the last produced FMA candidate or NULL if there has not been
276338fd1498Szrj one. */
276438fd1498Szrj tree m_last_result;
276538fd1498Szrj
276638fd1498Szrj /* If true, deferring might still be profitable. If false, transform all
276738fd1498Szrj candidates and no longer defer. */
276838fd1498Szrj bool m_deferring_p;
276938fd1498Szrj };
277038fd1498Szrj
277138fd1498Szrj /* Transform all deferred FMA candidates and mark STATE as no longer
277238fd1498Szrj deferring. */
277338fd1498Szrj
277438fd1498Szrj static void
cancel_fma_deferring(fma_deferring_state * state)277538fd1498Szrj cancel_fma_deferring (fma_deferring_state *state)
277638fd1498Szrj {
277738fd1498Szrj if (!state->m_deferring_p)
277838fd1498Szrj return;
277938fd1498Szrj
278038fd1498Szrj for (unsigned i = 0; i < state->m_candidates.length (); i++)
278138fd1498Szrj {
278238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
278338fd1498Szrj fprintf (dump_file, "Generating deferred FMA\n");
278438fd1498Szrj
278538fd1498Szrj const fma_transformation_info &fti = state->m_candidates[i];
278638fd1498Szrj convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
278738fd1498Szrj
278838fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
278938fd1498Szrj gsi_remove (&gsi, true);
279038fd1498Szrj release_defs (fti.mul_stmt);
279138fd1498Szrj }
279238fd1498Szrj state->m_deferring_p = false;
279338fd1498Szrj }
279438fd1498Szrj
279538fd1498Szrj /* If OP is an SSA name defined by a PHI node, return the PHI statement.
279638fd1498Szrj Otherwise return NULL. */
279738fd1498Szrj
279838fd1498Szrj static gphi *
result_of_phi(tree op)279938fd1498Szrj result_of_phi (tree op)
280038fd1498Szrj {
280138fd1498Szrj if (TREE_CODE (op) != SSA_NAME)
280238fd1498Szrj return NULL;
280338fd1498Szrj
280438fd1498Szrj return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
280538fd1498Szrj }
280638fd1498Szrj
280738fd1498Szrj /* After processing statements of a BB and recording STATE, return true if the
280838fd1498Szrj initial phi is fed by the last FMA candidate result ore one such result from
280938fd1498Szrj previously processed BBs marked in LAST_RESULT_SET. */
281038fd1498Szrj
281138fd1498Szrj static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)281238fd1498Szrj last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
281338fd1498Szrj hash_set<tree> *last_result_set)
281438fd1498Szrj {
281538fd1498Szrj ssa_op_iter iter;
281638fd1498Szrj use_operand_p use;
281738fd1498Szrj FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
281838fd1498Szrj {
281938fd1498Szrj tree t = USE_FROM_PTR (use);
282038fd1498Szrj if (t == state->m_last_result
282138fd1498Szrj || last_result_set->contains (t))
282238fd1498Szrj return true;
282338fd1498Szrj }
282438fd1498Szrj
282538fd1498Szrj return false;
282638fd1498Szrj }
282738fd1498Szrj
282838fd1498Szrj /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
282938fd1498Szrj with uses in additions and subtractions to form fused multiply-add
283038fd1498Szrj operations. Returns true if successful and MUL_STMT should be removed.
283138fd1498Szrj
283238fd1498Szrj If STATE indicates that we are deferring FMA transformation, that means
283338fd1498Szrj that we do not produce FMAs for basic blocks which look like:
283438fd1498Szrj
283538fd1498Szrj <bb 6>
283638fd1498Szrj # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
283738fd1498Szrj _65 = _14 * _16;
283838fd1498Szrj accumulator_66 = _65 + accumulator_111;
283938fd1498Szrj
284038fd1498Szrj or its unrolled version, i.e. with several FMA candidates that feed result
284138fd1498Szrj of one into the addend of another. Instead, we add them to a list in STATE
284238fd1498Szrj and if we later discover an FMA candidate that is not part of such a chain,
284338fd1498Szrj we go back and perform all deferred past candidates. */
284438fd1498Szrj
284538fd1498Szrj static bool
convert_mult_to_fma(gimple * mul_stmt,tree op1,tree op2,fma_deferring_state * state)284638fd1498Szrj convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
284738fd1498Szrj fma_deferring_state *state)
284838fd1498Szrj {
284938fd1498Szrj tree mul_result = gimple_get_lhs (mul_stmt);
285038fd1498Szrj tree type = TREE_TYPE (mul_result);
285138fd1498Szrj gimple *use_stmt, *neguse_stmt;
285238fd1498Szrj use_operand_p use_p;
285338fd1498Szrj imm_use_iterator imm_iter;
285438fd1498Szrj
285538fd1498Szrj if (FLOAT_TYPE_P (type)
285638fd1498Szrj && flag_fp_contract_mode == FP_CONTRACT_OFF)
285738fd1498Szrj return false;
285838fd1498Szrj
285938fd1498Szrj /* We don't want to do bitfield reduction ops. */
286038fd1498Szrj if (INTEGRAL_TYPE_P (type)
286138fd1498Szrj && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
286238fd1498Szrj return false;
286338fd1498Szrj
286438fd1498Szrj /* If the target doesn't support it, don't generate it. We assume that
286538fd1498Szrj if fma isn't available then fms, fnma or fnms are not either. */
286638fd1498Szrj if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
286738fd1498Szrj return false;
286838fd1498Szrj
286938fd1498Szrj /* If the multiplication has zero uses, it is kept around probably because
287038fd1498Szrj of -fnon-call-exceptions. Don't optimize it away in that case,
287138fd1498Szrj it is DCE job. */
287238fd1498Szrj if (has_zero_uses (mul_result))
287338fd1498Szrj return false;
287438fd1498Szrj
287538fd1498Szrj bool check_defer
287638fd1498Szrj = (state->m_deferring_p
287738fd1498Szrj && (tree_to_shwi (TYPE_SIZE (type))
287838fd1498Szrj <= PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS)));
287938fd1498Szrj bool defer = check_defer;
288038fd1498Szrj /* Make sure that the multiplication statement becomes dead after
288138fd1498Szrj the transformation, thus that all uses are transformed to FMAs.
288238fd1498Szrj This means we assume that an FMA operation has the same cost
288338fd1498Szrj as an addition. */
288438fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
288538fd1498Szrj {
288638fd1498Szrj enum tree_code use_code;
288738fd1498Szrj tree result = mul_result;
288838fd1498Szrj bool negate_p = false;
288938fd1498Szrj
289038fd1498Szrj use_stmt = USE_STMT (use_p);
289138fd1498Szrj
289238fd1498Szrj if (is_gimple_debug (use_stmt))
289338fd1498Szrj continue;
289438fd1498Szrj
289538fd1498Szrj /* For now restrict this operations to single basic blocks. In theory
289638fd1498Szrj we would want to support sinking the multiplication in
289738fd1498Szrj m = a*b;
289838fd1498Szrj if ()
289938fd1498Szrj ma = m + c;
290038fd1498Szrj else
290138fd1498Szrj d = m;
290238fd1498Szrj to form a fma in the then block and sink the multiplication to the
290338fd1498Szrj else block. */
290438fd1498Szrj if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
290538fd1498Szrj return false;
290638fd1498Szrj
290738fd1498Szrj if (!is_gimple_assign (use_stmt))
290838fd1498Szrj return false;
290938fd1498Szrj
291038fd1498Szrj use_code = gimple_assign_rhs_code (use_stmt);
291138fd1498Szrj
291238fd1498Szrj /* A negate on the multiplication leads to FNMA. */
291338fd1498Szrj if (use_code == NEGATE_EXPR)
291438fd1498Szrj {
291538fd1498Szrj ssa_op_iter iter;
291638fd1498Szrj use_operand_p usep;
291738fd1498Szrj
291838fd1498Szrj result = gimple_assign_lhs (use_stmt);
291938fd1498Szrj
292038fd1498Szrj /* Make sure the negate statement becomes dead with this
292138fd1498Szrj single transformation. */
292238fd1498Szrj if (!single_imm_use (gimple_assign_lhs (use_stmt),
292338fd1498Szrj &use_p, &neguse_stmt))
292438fd1498Szrj return false;
292538fd1498Szrj
292638fd1498Szrj /* Make sure the multiplication isn't also used on that stmt. */
292738fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
292838fd1498Szrj if (USE_FROM_PTR (usep) == mul_result)
292938fd1498Szrj return false;
293038fd1498Szrj
293138fd1498Szrj /* Re-validate. */
293238fd1498Szrj use_stmt = neguse_stmt;
293338fd1498Szrj if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
293438fd1498Szrj return false;
293538fd1498Szrj if (!is_gimple_assign (use_stmt))
293638fd1498Szrj return false;
293738fd1498Szrj
293838fd1498Szrj use_code = gimple_assign_rhs_code (use_stmt);
293938fd1498Szrj negate_p = true;
294038fd1498Szrj }
294138fd1498Szrj
294238fd1498Szrj switch (use_code)
294338fd1498Szrj {
294438fd1498Szrj case MINUS_EXPR:
294538fd1498Szrj if (gimple_assign_rhs2 (use_stmt) == result)
294638fd1498Szrj negate_p = !negate_p;
294738fd1498Szrj break;
294838fd1498Szrj case PLUS_EXPR:
294938fd1498Szrj break;
295038fd1498Szrj default:
295138fd1498Szrj /* FMA can only be formed from PLUS and MINUS. */
295238fd1498Szrj return false;
295338fd1498Szrj }
295438fd1498Szrj
295538fd1498Szrj /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
295638fd1498Szrj by a MULT_EXPR that we'll visit later, we might be able to
295738fd1498Szrj get a more profitable match with fnma.
295838fd1498Szrj OTOH, if we don't, a negate / fma pair has likely lower latency
295938fd1498Szrj that a mult / subtract pair. */
296038fd1498Szrj if (use_code == MINUS_EXPR && !negate_p
296138fd1498Szrj && gimple_assign_rhs1 (use_stmt) == result
296238fd1498Szrj && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
296338fd1498Szrj && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
296438fd1498Szrj {
296538fd1498Szrj tree rhs2 = gimple_assign_rhs2 (use_stmt);
296638fd1498Szrj
296738fd1498Szrj if (TREE_CODE (rhs2) == SSA_NAME)
296838fd1498Szrj {
296938fd1498Szrj gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
297038fd1498Szrj if (has_single_use (rhs2)
297138fd1498Szrj && is_gimple_assign (stmt2)
297238fd1498Szrj && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
297338fd1498Szrj return false;
297438fd1498Szrj }
297538fd1498Szrj }
297638fd1498Szrj
297738fd1498Szrj tree use_rhs1 = gimple_assign_rhs1 (use_stmt);
297838fd1498Szrj tree use_rhs2 = gimple_assign_rhs2 (use_stmt);
297938fd1498Szrj /* We can't handle a * b + a * b. */
298038fd1498Szrj if (use_rhs1 == use_rhs2)
298138fd1498Szrj return false;
298238fd1498Szrj /* If deferring, make sure we are not looking at an instruction that
298338fd1498Szrj wouldn't have existed if we were not. */
298438fd1498Szrj if (state->m_deferring_p
298538fd1498Szrj && (state->m_mul_result_set.contains (use_rhs1)
298638fd1498Szrj || state->m_mul_result_set.contains (use_rhs2)))
298738fd1498Szrj return false;
298838fd1498Szrj
298938fd1498Szrj if (check_defer)
299038fd1498Szrj {
299138fd1498Szrj tree use_lhs = gimple_assign_lhs (use_stmt);
299238fd1498Szrj if (state->m_last_result)
299338fd1498Szrj {
299438fd1498Szrj if (use_rhs2 == state->m_last_result
299538fd1498Szrj || use_rhs1 == state->m_last_result)
299638fd1498Szrj defer = true;
299738fd1498Szrj else
299838fd1498Szrj defer = false;
299938fd1498Szrj }
300038fd1498Szrj else
300138fd1498Szrj {
300238fd1498Szrj gcc_checking_assert (!state->m_initial_phi);
300338fd1498Szrj gphi *phi;
300438fd1498Szrj if (use_rhs1 == result)
300538fd1498Szrj phi = result_of_phi (use_rhs2);
300638fd1498Szrj else
300738fd1498Szrj {
300838fd1498Szrj gcc_assert (use_rhs2 == result);
300938fd1498Szrj phi = result_of_phi (use_rhs1);
301038fd1498Szrj }
301138fd1498Szrj
301238fd1498Szrj if (phi)
301338fd1498Szrj {
301438fd1498Szrj state->m_initial_phi = phi;
301538fd1498Szrj defer = true;
301638fd1498Szrj }
301738fd1498Szrj else
301838fd1498Szrj defer = false;
301938fd1498Szrj }
302038fd1498Szrj
302138fd1498Szrj state->m_last_result = use_lhs;
302238fd1498Szrj check_defer = false;
302338fd1498Szrj }
302438fd1498Szrj else
302538fd1498Szrj defer = false;
302638fd1498Szrj
302738fd1498Szrj /* While it is possible to validate whether or not the exact form that
302838fd1498Szrj we've recognized is available in the backend, the assumption is that
302938fd1498Szrj if the deferring logic above did not trigger, the transformation is
303038fd1498Szrj never a loss. For instance, suppose the target only has the plain FMA
303138fd1498Szrj pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
303238fd1498Szrj MUL+SUB for FMA+NEG, which is still two operations. Consider
303338fd1498Szrj -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
303438fd1498Szrj form the two NEGs are independent and could be run in parallel. */
303538fd1498Szrj }
303638fd1498Szrj
303738fd1498Szrj if (defer)
303838fd1498Szrj {
303938fd1498Szrj fma_transformation_info fti;
304038fd1498Szrj fti.mul_stmt = mul_stmt;
304138fd1498Szrj fti.mul_result = mul_result;
304238fd1498Szrj fti.op1 = op1;
304338fd1498Szrj fti.op2 = op2;
304438fd1498Szrj state->m_candidates.safe_push (fti);
304538fd1498Szrj state->m_mul_result_set.add (mul_result);
304638fd1498Szrj
304738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
304838fd1498Szrj {
304938fd1498Szrj fprintf (dump_file, "Deferred generating FMA for multiplication ");
305038fd1498Szrj print_gimple_stmt (dump_file, mul_stmt, 0, 0);
305138fd1498Szrj fprintf (dump_file, "\n");
305238fd1498Szrj }
305338fd1498Szrj
305438fd1498Szrj return false;
305538fd1498Szrj }
305638fd1498Szrj else
305738fd1498Szrj {
305838fd1498Szrj if (state->m_deferring_p)
305938fd1498Szrj cancel_fma_deferring (state);
306038fd1498Szrj convert_mult_to_fma_1 (mul_result, op1, op2);
306138fd1498Szrj return true;
306238fd1498Szrj }
306338fd1498Szrj }
306438fd1498Szrj
306538fd1498Szrj
306638fd1498Szrj /* Helper function of match_uaddsub_overflow. Return 1
306738fd1498Szrj if USE_STMT is unsigned overflow check ovf != 0 for
306838fd1498Szrj STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
306938fd1498Szrj and 0 otherwise. */
307038fd1498Szrj
307138fd1498Szrj static int
uaddsub_overflow_check_p(gimple * stmt,gimple * use_stmt)307238fd1498Szrj uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
307338fd1498Szrj {
307438fd1498Szrj enum tree_code ccode = ERROR_MARK;
307538fd1498Szrj tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
307638fd1498Szrj if (gimple_code (use_stmt) == GIMPLE_COND)
307738fd1498Szrj {
307838fd1498Szrj ccode = gimple_cond_code (use_stmt);
307938fd1498Szrj crhs1 = gimple_cond_lhs (use_stmt);
308038fd1498Szrj crhs2 = gimple_cond_rhs (use_stmt);
308138fd1498Szrj }
308238fd1498Szrj else if (is_gimple_assign (use_stmt))
308338fd1498Szrj {
308438fd1498Szrj if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
308538fd1498Szrj {
308638fd1498Szrj ccode = gimple_assign_rhs_code (use_stmt);
308738fd1498Szrj crhs1 = gimple_assign_rhs1 (use_stmt);
308838fd1498Szrj crhs2 = gimple_assign_rhs2 (use_stmt);
308938fd1498Szrj }
309038fd1498Szrj else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
309138fd1498Szrj {
309238fd1498Szrj tree cond = gimple_assign_rhs1 (use_stmt);
309338fd1498Szrj if (COMPARISON_CLASS_P (cond))
309438fd1498Szrj {
309538fd1498Szrj ccode = TREE_CODE (cond);
309638fd1498Szrj crhs1 = TREE_OPERAND (cond, 0);
309738fd1498Szrj crhs2 = TREE_OPERAND (cond, 1);
309838fd1498Szrj }
309938fd1498Szrj else
310038fd1498Szrj return 0;
310138fd1498Szrj }
310238fd1498Szrj else
310338fd1498Szrj return 0;
310438fd1498Szrj }
310538fd1498Szrj else
310638fd1498Szrj return 0;
310738fd1498Szrj
310838fd1498Szrj if (TREE_CODE_CLASS (ccode) != tcc_comparison)
310938fd1498Szrj return 0;
311038fd1498Szrj
311138fd1498Szrj enum tree_code code = gimple_assign_rhs_code (stmt);
311238fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
311338fd1498Szrj tree rhs1 = gimple_assign_rhs1 (stmt);
311438fd1498Szrj tree rhs2 = gimple_assign_rhs2 (stmt);
311538fd1498Szrj
311638fd1498Szrj switch (ccode)
311738fd1498Szrj {
311838fd1498Szrj case GT_EXPR:
311938fd1498Szrj case LE_EXPR:
312038fd1498Szrj /* r = a - b; r > a or r <= a
312138fd1498Szrj r = a + b; a > r or a <= r or b > r or b <= r. */
312238fd1498Szrj if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
312338fd1498Szrj || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
312438fd1498Szrj && crhs2 == lhs))
312538fd1498Szrj return ccode == GT_EXPR ? 1 : -1;
312638fd1498Szrj break;
312738fd1498Szrj case LT_EXPR:
312838fd1498Szrj case GE_EXPR:
312938fd1498Szrj /* r = a - b; a < r or a >= r
313038fd1498Szrj r = a + b; r < a or r >= a or r < b or r >= b. */
313138fd1498Szrj if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
313238fd1498Szrj || (code == PLUS_EXPR && crhs1 == lhs
313338fd1498Szrj && (crhs2 == rhs1 || crhs2 == rhs2)))
313438fd1498Szrj return ccode == LT_EXPR ? 1 : -1;
313538fd1498Szrj break;
313638fd1498Szrj default:
313738fd1498Szrj break;
313838fd1498Szrj }
313938fd1498Szrj return 0;
314038fd1498Szrj }
314138fd1498Szrj
314238fd1498Szrj /* Recognize for unsigned x
314338fd1498Szrj x = y - z;
314438fd1498Szrj if (x > y)
314538fd1498Szrj where there are other uses of x and replace it with
314638fd1498Szrj _7 = SUB_OVERFLOW (y, z);
314738fd1498Szrj x = REALPART_EXPR <_7>;
314838fd1498Szrj _8 = IMAGPART_EXPR <_7>;
314938fd1498Szrj if (_8)
315038fd1498Szrj and similarly for addition. */
315138fd1498Szrj
315238fd1498Szrj static bool
match_uaddsub_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)315338fd1498Szrj match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
315438fd1498Szrj enum tree_code code)
315538fd1498Szrj {
315638fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
315738fd1498Szrj tree type = TREE_TYPE (lhs);
315838fd1498Szrj use_operand_p use_p;
315938fd1498Szrj imm_use_iterator iter;
316038fd1498Szrj bool use_seen = false;
316138fd1498Szrj bool ovf_use_seen = false;
316238fd1498Szrj gimple *use_stmt;
316338fd1498Szrj
316438fd1498Szrj gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
316538fd1498Szrj if (!INTEGRAL_TYPE_P (type)
316638fd1498Szrj || !TYPE_UNSIGNED (type)
316738fd1498Szrj || has_zero_uses (lhs)
316838fd1498Szrj || has_single_use (lhs)
316938fd1498Szrj || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
317038fd1498Szrj TYPE_MODE (type)) == CODE_FOR_nothing)
317138fd1498Szrj return false;
317238fd1498Szrj
317338fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
317438fd1498Szrj {
317538fd1498Szrj use_stmt = USE_STMT (use_p);
317638fd1498Szrj if (is_gimple_debug (use_stmt))
317738fd1498Szrj continue;
317838fd1498Szrj
317938fd1498Szrj if (uaddsub_overflow_check_p (stmt, use_stmt))
318038fd1498Szrj ovf_use_seen = true;
318138fd1498Szrj else
318238fd1498Szrj use_seen = true;
318338fd1498Szrj if (ovf_use_seen && use_seen)
318438fd1498Szrj break;
318538fd1498Szrj }
318638fd1498Szrj
318738fd1498Szrj if (!ovf_use_seen || !use_seen)
318838fd1498Szrj return false;
318938fd1498Szrj
319038fd1498Szrj tree ctype = build_complex_type (type);
319138fd1498Szrj tree rhs1 = gimple_assign_rhs1 (stmt);
319238fd1498Szrj tree rhs2 = gimple_assign_rhs2 (stmt);
319338fd1498Szrj gcall *g = gimple_build_call_internal (code == PLUS_EXPR
319438fd1498Szrj ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
319538fd1498Szrj 2, rhs1, rhs2);
319638fd1498Szrj tree ctmp = make_ssa_name (ctype);
319738fd1498Szrj gimple_call_set_lhs (g, ctmp);
319838fd1498Szrj gsi_insert_before (gsi, g, GSI_SAME_STMT);
319938fd1498Szrj gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
320038fd1498Szrj build1 (REALPART_EXPR, type, ctmp));
320138fd1498Szrj gsi_replace (gsi, g2, true);
320238fd1498Szrj tree ovf = make_ssa_name (type);
320338fd1498Szrj g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
320438fd1498Szrj build1 (IMAGPART_EXPR, type, ctmp));
320538fd1498Szrj gsi_insert_after (gsi, g2, GSI_NEW_STMT);
320638fd1498Szrj
320738fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
320838fd1498Szrj {
320938fd1498Szrj if (is_gimple_debug (use_stmt))
321038fd1498Szrj continue;
321138fd1498Szrj
321238fd1498Szrj int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
321338fd1498Szrj if (ovf_use == 0)
321438fd1498Szrj continue;
321538fd1498Szrj if (gimple_code (use_stmt) == GIMPLE_COND)
321638fd1498Szrj {
321738fd1498Szrj gcond *cond_stmt = as_a <gcond *> (use_stmt);
321838fd1498Szrj gimple_cond_set_lhs (cond_stmt, ovf);
321938fd1498Szrj gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
322038fd1498Szrj gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
322138fd1498Szrj }
322238fd1498Szrj else
322338fd1498Szrj {
322438fd1498Szrj gcc_checking_assert (is_gimple_assign (use_stmt));
322538fd1498Szrj if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
322638fd1498Szrj {
322738fd1498Szrj gimple_assign_set_rhs1 (use_stmt, ovf);
322838fd1498Szrj gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
322938fd1498Szrj gimple_assign_set_rhs_code (use_stmt,
323038fd1498Szrj ovf_use == 1 ? NE_EXPR : EQ_EXPR);
323138fd1498Szrj }
323238fd1498Szrj else
323338fd1498Szrj {
323438fd1498Szrj gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
323538fd1498Szrj == COND_EXPR);
323638fd1498Szrj tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
323738fd1498Szrj boolean_type_node, ovf,
323838fd1498Szrj build_int_cst (type, 0));
323938fd1498Szrj gimple_assign_set_rhs1 (use_stmt, cond);
324038fd1498Szrj }
324138fd1498Szrj }
324238fd1498Szrj update_stmt (use_stmt);
324338fd1498Szrj }
324438fd1498Szrj return true;
324538fd1498Szrj }
324638fd1498Szrj
324738fd1498Szrj /* Return true if target has support for divmod. */
324838fd1498Szrj
324938fd1498Szrj static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)325038fd1498Szrj target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
325138fd1498Szrj {
325238fd1498Szrj /* If target supports hardware divmod insn, use it for divmod. */
325338fd1498Szrj if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
325438fd1498Szrj return true;
325538fd1498Szrj
325638fd1498Szrj /* Check if libfunc for divmod is available. */
325738fd1498Szrj rtx libfunc = optab_libfunc (divmod_optab, mode);
325838fd1498Szrj if (libfunc != NULL_RTX)
325938fd1498Szrj {
326038fd1498Szrj /* If optab_handler exists for div_optab, perhaps in a wider mode,
326138fd1498Szrj we don't want to use the libfunc even if it exists for given mode. */
326238fd1498Szrj machine_mode div_mode;
326338fd1498Szrj FOR_EACH_MODE_FROM (div_mode, mode)
326438fd1498Szrj if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
326538fd1498Szrj return false;
326638fd1498Szrj
326738fd1498Szrj return targetm.expand_divmod_libfunc != NULL;
326838fd1498Szrj }
326938fd1498Szrj
327038fd1498Szrj return false;
327138fd1498Szrj }
327238fd1498Szrj
327338fd1498Szrj /* Check if stmt is candidate for divmod transform. */
327438fd1498Szrj
327538fd1498Szrj static bool
divmod_candidate_p(gassign * stmt)327638fd1498Szrj divmod_candidate_p (gassign *stmt)
327738fd1498Szrj {
327838fd1498Szrj tree type = TREE_TYPE (gimple_assign_lhs (stmt));
327938fd1498Szrj machine_mode mode = TYPE_MODE (type);
328038fd1498Szrj optab divmod_optab, div_optab;
328138fd1498Szrj
328238fd1498Szrj if (TYPE_UNSIGNED (type))
328338fd1498Szrj {
328438fd1498Szrj divmod_optab = udivmod_optab;
328538fd1498Szrj div_optab = udiv_optab;
328638fd1498Szrj }
328738fd1498Szrj else
328838fd1498Szrj {
328938fd1498Szrj divmod_optab = sdivmod_optab;
329038fd1498Szrj div_optab = sdiv_optab;
329138fd1498Szrj }
329238fd1498Szrj
329338fd1498Szrj tree op1 = gimple_assign_rhs1 (stmt);
329438fd1498Szrj tree op2 = gimple_assign_rhs2 (stmt);
329538fd1498Szrj
329638fd1498Szrj /* Disable the transform if either is a constant, since division-by-constant
329738fd1498Szrj may have specialized expansion. */
329838fd1498Szrj if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
329938fd1498Szrj return false;
330038fd1498Szrj
330138fd1498Szrj /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
330238fd1498Szrj expand using the [su]divv optabs. */
330338fd1498Szrj if (TYPE_OVERFLOW_TRAPS (type))
330438fd1498Szrj return false;
330538fd1498Szrj
330638fd1498Szrj if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
330738fd1498Szrj return false;
330838fd1498Szrj
330938fd1498Szrj return true;
331038fd1498Szrj }
331138fd1498Szrj
331238fd1498Szrj /* This function looks for:
331338fd1498Szrj t1 = a TRUNC_DIV_EXPR b;
331438fd1498Szrj t2 = a TRUNC_MOD_EXPR b;
331538fd1498Szrj and transforms it to the following sequence:
331638fd1498Szrj complex_tmp = DIVMOD (a, b);
331738fd1498Szrj t1 = REALPART_EXPR(a);
331838fd1498Szrj t2 = IMAGPART_EXPR(b);
331938fd1498Szrj For conditions enabling the transform see divmod_candidate_p().
332038fd1498Szrj
332138fd1498Szrj The pass has three parts:
332238fd1498Szrj 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
332338fd1498Szrj other trunc_div_expr and trunc_mod_expr stmts.
332438fd1498Szrj 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
332538fd1498Szrj to stmts vector.
332638fd1498Szrj 3) Insert DIVMOD call just before top_stmt and update entries in
332738fd1498Szrj stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
332838fd1498Szrj IMAGPART_EXPR for mod). */
332938fd1498Szrj
333038fd1498Szrj static bool
convert_to_divmod(gassign * stmt)333138fd1498Szrj convert_to_divmod (gassign *stmt)
333238fd1498Szrj {
333338fd1498Szrj if (stmt_can_throw_internal (stmt)
333438fd1498Szrj || !divmod_candidate_p (stmt))
333538fd1498Szrj return false;
333638fd1498Szrj
333738fd1498Szrj tree op1 = gimple_assign_rhs1 (stmt);
333838fd1498Szrj tree op2 = gimple_assign_rhs2 (stmt);
333938fd1498Szrj
334038fd1498Szrj imm_use_iterator use_iter;
334138fd1498Szrj gimple *use_stmt;
334238fd1498Szrj auto_vec<gimple *> stmts;
334338fd1498Szrj
334438fd1498Szrj gimple *top_stmt = stmt;
334538fd1498Szrj basic_block top_bb = gimple_bb (stmt);
334638fd1498Szrj
334738fd1498Szrj /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
334838fd1498Szrj at-least stmt and possibly other trunc_div/trunc_mod stmts
334938fd1498Szrj having same operands as stmt. */
335038fd1498Szrj
335138fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
335238fd1498Szrj {
335338fd1498Szrj if (is_gimple_assign (use_stmt)
335438fd1498Szrj && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
335538fd1498Szrj || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
335638fd1498Szrj && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
335738fd1498Szrj && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
335838fd1498Szrj {
335938fd1498Szrj if (stmt_can_throw_internal (use_stmt))
336038fd1498Szrj continue;
336138fd1498Szrj
336238fd1498Szrj basic_block bb = gimple_bb (use_stmt);
336338fd1498Szrj
336438fd1498Szrj if (bb == top_bb)
336538fd1498Szrj {
336638fd1498Szrj if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
336738fd1498Szrj top_stmt = use_stmt;
336838fd1498Szrj }
336938fd1498Szrj else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
337038fd1498Szrj {
337138fd1498Szrj top_bb = bb;
337238fd1498Szrj top_stmt = use_stmt;
337338fd1498Szrj }
337438fd1498Szrj }
337538fd1498Szrj }
337638fd1498Szrj
337738fd1498Szrj tree top_op1 = gimple_assign_rhs1 (top_stmt);
337838fd1498Szrj tree top_op2 = gimple_assign_rhs2 (top_stmt);
337938fd1498Szrj
338038fd1498Szrj stmts.safe_push (top_stmt);
338138fd1498Szrj bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
338238fd1498Szrj
338338fd1498Szrj /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
338438fd1498Szrj to stmts vector. The 2nd loop will always add stmt to stmts vector, since
338538fd1498Szrj gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
338638fd1498Szrj 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
338738fd1498Szrj
338838fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
338938fd1498Szrj {
339038fd1498Szrj if (is_gimple_assign (use_stmt)
339138fd1498Szrj && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
339238fd1498Szrj || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
339338fd1498Szrj && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
339438fd1498Szrj && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
339538fd1498Szrj {
339638fd1498Szrj if (use_stmt == top_stmt
339738fd1498Szrj || stmt_can_throw_internal (use_stmt)
339838fd1498Szrj || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
339938fd1498Szrj continue;
340038fd1498Szrj
340138fd1498Szrj stmts.safe_push (use_stmt);
340238fd1498Szrj if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
340338fd1498Szrj div_seen = true;
340438fd1498Szrj }
340538fd1498Szrj }
340638fd1498Szrj
340738fd1498Szrj if (!div_seen)
340838fd1498Szrj return false;
340938fd1498Szrj
341038fd1498Szrj /* Part 3: Create libcall to internal fn DIVMOD:
341138fd1498Szrj divmod_tmp = DIVMOD (op1, op2). */
341238fd1498Szrj
341338fd1498Szrj gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
341438fd1498Szrj tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
341538fd1498Szrj call_stmt, "divmod_tmp");
341638fd1498Szrj gimple_call_set_lhs (call_stmt, res);
341738fd1498Szrj /* We rejected throwing statements above. */
341838fd1498Szrj gimple_call_set_nothrow (call_stmt, true);
341938fd1498Szrj
342038fd1498Szrj /* Insert the call before top_stmt. */
342138fd1498Szrj gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
342238fd1498Szrj gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
342338fd1498Szrj
342438fd1498Szrj widen_mul_stats.divmod_calls_inserted++;
342538fd1498Szrj
342638fd1498Szrj /* Update all statements in stmts vector:
342738fd1498Szrj lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
342838fd1498Szrj lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
342938fd1498Szrj
343038fd1498Szrj for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
343138fd1498Szrj {
343238fd1498Szrj tree new_rhs;
343338fd1498Szrj
343438fd1498Szrj switch (gimple_assign_rhs_code (use_stmt))
343538fd1498Szrj {
343638fd1498Szrj case TRUNC_DIV_EXPR:
343738fd1498Szrj new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
343838fd1498Szrj break;
343938fd1498Szrj
344038fd1498Szrj case TRUNC_MOD_EXPR:
344138fd1498Szrj new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
344238fd1498Szrj break;
344338fd1498Szrj
344438fd1498Szrj default:
344538fd1498Szrj gcc_unreachable ();
344638fd1498Szrj }
344738fd1498Szrj
344838fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
344938fd1498Szrj gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
345038fd1498Szrj update_stmt (use_stmt);
345138fd1498Szrj }
345238fd1498Szrj
345338fd1498Szrj return true;
345438fd1498Szrj }
345538fd1498Szrj
345638fd1498Szrj /* Find integer multiplications where the operands are extended from
345738fd1498Szrj smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
345838fd1498Szrj where appropriate. */
345938fd1498Szrj
346038fd1498Szrj namespace {
346138fd1498Szrj
346238fd1498Szrj const pass_data pass_data_optimize_widening_mul =
346338fd1498Szrj {
346438fd1498Szrj GIMPLE_PASS, /* type */
346538fd1498Szrj "widening_mul", /* name */
346638fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
346738fd1498Szrj TV_TREE_WIDEN_MUL, /* tv_id */
346838fd1498Szrj PROP_ssa, /* properties_required */
346938fd1498Szrj 0, /* properties_provided */
347038fd1498Szrj 0, /* properties_destroyed */
347138fd1498Szrj 0, /* todo_flags_start */
347238fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
347338fd1498Szrj };
347438fd1498Szrj
347538fd1498Szrj class pass_optimize_widening_mul : public gimple_opt_pass
347638fd1498Szrj {
347738fd1498Szrj public:
pass_optimize_widening_mul(gcc::context * ctxt)347838fd1498Szrj pass_optimize_widening_mul (gcc::context *ctxt)
347938fd1498Szrj : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
348038fd1498Szrj {}
348138fd1498Szrj
348238fd1498Szrj /* opt_pass methods: */
gate(function *)348338fd1498Szrj virtual bool gate (function *)
348438fd1498Szrj {
348538fd1498Szrj return flag_expensive_optimizations && optimize;
348638fd1498Szrj }
348738fd1498Szrj
348838fd1498Szrj virtual unsigned int execute (function *);
348938fd1498Szrj
349038fd1498Szrj }; // class pass_optimize_widening_mul
349138fd1498Szrj
349238fd1498Szrj /* Walker class to perform the transformation in reverse dominance order. */
349338fd1498Szrj
349438fd1498Szrj class math_opts_dom_walker : public dom_walker
349538fd1498Szrj {
349638fd1498Szrj public:
349738fd1498Szrj /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
349838fd1498Szrj if walking modidifes the CFG. */
349938fd1498Szrj
math_opts_dom_walker(bool * cfg_changed_p)350038fd1498Szrj math_opts_dom_walker (bool *cfg_changed_p)
350138fd1498Szrj : dom_walker (CDI_DOMINATORS), m_last_result_set (),
350238fd1498Szrj m_cfg_changed_p (cfg_changed_p) {}
350338fd1498Szrj
350438fd1498Szrj /* The actual actions performed in the walk. */
350538fd1498Szrj
350638fd1498Szrj virtual void after_dom_children (basic_block);
350738fd1498Szrj
350838fd1498Szrj /* Set of results of chains of multiply and add statement combinations that
350938fd1498Szrj were not transformed into FMAs because of active deferring. */
351038fd1498Szrj hash_set<tree> m_last_result_set;
351138fd1498Szrj
351238fd1498Szrj /* Pointer to a flag of the user that needs to be set if CFG has been
351338fd1498Szrj modified. */
351438fd1498Szrj bool *m_cfg_changed_p;
351538fd1498Szrj };
351638fd1498Szrj
351738fd1498Szrj void
after_dom_children(basic_block bb)351838fd1498Szrj math_opts_dom_walker::after_dom_children (basic_block bb)
351938fd1498Szrj {
352038fd1498Szrj gimple_stmt_iterator gsi;
352138fd1498Szrj
352238fd1498Szrj fma_deferring_state fma_state (PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS) > 0);
352338fd1498Szrj
352438fd1498Szrj for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
352538fd1498Szrj {
352638fd1498Szrj gimple *stmt = gsi_stmt (gsi);
352738fd1498Szrj enum tree_code code;
352838fd1498Szrj
352938fd1498Szrj if (is_gimple_assign (stmt))
353038fd1498Szrj {
353138fd1498Szrj code = gimple_assign_rhs_code (stmt);
353238fd1498Szrj switch (code)
353338fd1498Szrj {
353438fd1498Szrj case MULT_EXPR:
353538fd1498Szrj if (!convert_mult_to_widen (stmt, &gsi)
353638fd1498Szrj && !convert_expand_mult_copysign (stmt, &gsi)
353738fd1498Szrj && convert_mult_to_fma (stmt,
353838fd1498Szrj gimple_assign_rhs1 (stmt),
353938fd1498Szrj gimple_assign_rhs2 (stmt),
354038fd1498Szrj &fma_state))
354138fd1498Szrj {
354238fd1498Szrj gsi_remove (&gsi, true);
354338fd1498Szrj release_defs (stmt);
354438fd1498Szrj continue;
354538fd1498Szrj }
354638fd1498Szrj break;
354738fd1498Szrj
354838fd1498Szrj case PLUS_EXPR:
354938fd1498Szrj case MINUS_EXPR:
355038fd1498Szrj if (!convert_plusminus_to_widen (&gsi, stmt, code))
355138fd1498Szrj match_uaddsub_overflow (&gsi, stmt, code);
355238fd1498Szrj break;
355338fd1498Szrj
355438fd1498Szrj case TRUNC_MOD_EXPR:
355538fd1498Szrj convert_to_divmod (as_a<gassign *> (stmt));
355638fd1498Szrj break;
355738fd1498Szrj
355838fd1498Szrj default:;
355938fd1498Szrj }
356038fd1498Szrj }
356138fd1498Szrj else if (is_gimple_call (stmt))
356238fd1498Szrj {
356338fd1498Szrj tree fndecl = gimple_call_fndecl (stmt);
356438fd1498Szrj if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
356538fd1498Szrj {
356638fd1498Szrj switch (DECL_FUNCTION_CODE (fndecl))
356738fd1498Szrj {
356838fd1498Szrj case BUILT_IN_POWF:
356938fd1498Szrj case BUILT_IN_POW:
357038fd1498Szrj case BUILT_IN_POWL:
357138fd1498Szrj if (gimple_call_lhs (stmt)
357238fd1498Szrj && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
357338fd1498Szrj && real_equal
357438fd1498Szrj (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
357538fd1498Szrj &dconst2)
357638fd1498Szrj && convert_mult_to_fma (stmt,
357738fd1498Szrj gimple_call_arg (stmt, 0),
357838fd1498Szrj gimple_call_arg (stmt, 0),
357938fd1498Szrj &fma_state))
358038fd1498Szrj {
358138fd1498Szrj unlink_stmt_vdef (stmt);
358238fd1498Szrj if (gsi_remove (&gsi, true)
358338fd1498Szrj && gimple_purge_dead_eh_edges (bb))
358438fd1498Szrj *m_cfg_changed_p = true;
358538fd1498Szrj release_defs (stmt);
358638fd1498Szrj continue;
358738fd1498Szrj }
358838fd1498Szrj break;
358938fd1498Szrj
359038fd1498Szrj default:;
359138fd1498Szrj }
359238fd1498Szrj }
359338fd1498Szrj else
359438fd1498Szrj cancel_fma_deferring (&fma_state);
359538fd1498Szrj }
359638fd1498Szrj gsi_next (&gsi);
359738fd1498Szrj }
359838fd1498Szrj if (fma_state.m_deferring_p
359938fd1498Szrj && fma_state.m_initial_phi)
360038fd1498Szrj {
360138fd1498Szrj gcc_checking_assert (fma_state.m_last_result);
360238fd1498Szrj if (!last_fma_candidate_feeds_initial_phi (&fma_state,
360338fd1498Szrj &m_last_result_set))
360438fd1498Szrj cancel_fma_deferring (&fma_state);
360538fd1498Szrj else
360638fd1498Szrj m_last_result_set.add (fma_state.m_last_result);
360738fd1498Szrj }
360838fd1498Szrj }
360938fd1498Szrj
361038fd1498Szrj
361138fd1498Szrj unsigned int
execute(function * fun)361238fd1498Szrj pass_optimize_widening_mul::execute (function *fun)
361338fd1498Szrj {
361438fd1498Szrj bool cfg_changed = false;
361538fd1498Szrj
361638fd1498Szrj memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
361738fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
361838fd1498Szrj renumber_gimple_stmt_uids ();
361938fd1498Szrj
362038fd1498Szrj math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
362138fd1498Szrj
362238fd1498Szrj statistics_counter_event (fun, "widening multiplications inserted",
362338fd1498Szrj widen_mul_stats.widen_mults_inserted);
362438fd1498Szrj statistics_counter_event (fun, "widening maccs inserted",
362538fd1498Szrj widen_mul_stats.maccs_inserted);
362638fd1498Szrj statistics_counter_event (fun, "fused multiply-adds inserted",
362738fd1498Szrj widen_mul_stats.fmas_inserted);
362838fd1498Szrj statistics_counter_event (fun, "divmod calls inserted",
362938fd1498Szrj widen_mul_stats.divmod_calls_inserted);
363038fd1498Szrj
363138fd1498Szrj return cfg_changed ? TODO_cleanup_cfg : 0;
363238fd1498Szrj }
363338fd1498Szrj
363438fd1498Szrj } // anon namespace
363538fd1498Szrj
363638fd1498Szrj gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)363738fd1498Szrj make_pass_optimize_widening_mul (gcc::context *ctxt)
363838fd1498Szrj {
363938fd1498Szrj return new pass_optimize_widening_mul (ctxt);
364038fd1498Szrj }
3641