138fd1498Szrj /* Global, SSA-based optimizations using mathematical identities.
238fd1498Szrj    Copyright (C) 2005-2018 Free Software Foundation, Inc.
338fd1498Szrj 
438fd1498Szrj This file is part of GCC.
538fd1498Szrj 
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it
738fd1498Szrj under the terms of the GNU General Public License as published by the
838fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
938fd1498Szrj later version.
1038fd1498Szrj 
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1238fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj 
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3.  If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>.  */
1938fd1498Szrj 
2038fd1498Szrj /* Currently, the only mini-pass in this file tries to CSE reciprocal
2138fd1498Szrj    operations.  These are common in sequences such as this one:
2238fd1498Szrj 
2338fd1498Szrj 	modulus = sqrt(x*x + y*y + z*z);
2438fd1498Szrj 	x = x / modulus;
2538fd1498Szrj 	y = y / modulus;
2638fd1498Szrj 	z = z / modulus;
2738fd1498Szrj 
2838fd1498Szrj    that can be optimized to
2938fd1498Szrj 
3038fd1498Szrj 	modulus = sqrt(x*x + y*y + z*z);
3138fd1498Szrj         rmodulus = 1.0 / modulus;
3238fd1498Szrj 	x = x * rmodulus;
3338fd1498Szrj 	y = y * rmodulus;
3438fd1498Szrj 	z = z * rmodulus;
3538fd1498Szrj 
3638fd1498Szrj    We do this for loop invariant divisors, and with this pass whenever
3738fd1498Szrj    we notice that a division has the same divisor multiple times.
3838fd1498Szrj 
3938fd1498Szrj    Of course, like in PRE, we don't insert a division if a dominator
4038fd1498Szrj    already has one.  However, this cannot be done as an extension of
4138fd1498Szrj    PRE for several reasons.
4238fd1498Szrj 
4338fd1498Szrj    First of all, with some experiments it was found out that the
4438fd1498Szrj    transformation is not always useful if there are only two divisions
4538fd1498Szrj    by the same divisor.  This is probably because modern processors
4638fd1498Szrj    can pipeline the divisions; on older, in-order processors it should
4738fd1498Szrj    still be effective to optimize two divisions by the same number.
4838fd1498Szrj    We make this a param, and it shall be called N in the remainder of
4938fd1498Szrj    this comment.
5038fd1498Szrj 
5138fd1498Szrj    Second, if trapping math is active, we have less freedom on where
5238fd1498Szrj    to insert divisions: we can only do so in basic blocks that already
5338fd1498Szrj    contain one.  (If divisions don't trap, instead, we can insert
5438fd1498Szrj    divisions elsewhere, which will be in blocks that are common dominators
5538fd1498Szrj    of those that have the division).
5638fd1498Szrj 
5738fd1498Szrj    We really don't want to compute the reciprocal unless a division will
5838fd1498Szrj    be found.  To do this, we won't insert the division in a basic block
5938fd1498Szrj    that has less than N divisions *post-dominating* it.
6038fd1498Szrj 
6138fd1498Szrj    The algorithm constructs a subset of the dominator tree, holding the
6238fd1498Szrj    blocks containing the divisions and the common dominators to them,
6338fd1498Szrj    and walk it twice.  The first walk is in post-order, and it annotates
6438fd1498Szrj    each block with the number of divisions that post-dominate it: this
6538fd1498Szrj    gives information on where divisions can be inserted profitably.
6638fd1498Szrj    The second walk is in pre-order, and it inserts divisions as explained
6738fd1498Szrj    above, and replaces divisions by multiplications.
6838fd1498Szrj 
6938fd1498Szrj    In the best case, the cost of the pass is O(n_statements).  In the
7038fd1498Szrj    worst-case, the cost is due to creating the dominator tree subset,
7138fd1498Szrj    with a cost of O(n_basic_blocks ^ 2); however this can only happen
7238fd1498Szrj    for n_statements / n_basic_blocks statements.  So, the amortized cost
7338fd1498Szrj    of creating the dominator tree subset is O(n_basic_blocks) and the
7438fd1498Szrj    worst-case cost of the pass is O(n_statements * n_basic_blocks).
7538fd1498Szrj 
7638fd1498Szrj    More practically, the cost will be small because there are few
7738fd1498Szrj    divisions, and they tend to be in the same basic block, so insert_bb
7838fd1498Szrj    is called very few times.
7938fd1498Szrj 
8038fd1498Szrj    If we did this using domwalk.c, an efficient implementation would have
8138fd1498Szrj    to work on all the variables in a single pass, because we could not
8238fd1498Szrj    work on just a subset of the dominator tree, as we do now, and the
8338fd1498Szrj    cost would also be something like O(n_statements * n_basic_blocks).
8438fd1498Szrj    The data structures would be more complex in order to work on all the
8538fd1498Szrj    variables in a single pass.  */
8638fd1498Szrj 
8738fd1498Szrj #include "config.h"
8838fd1498Szrj #include "system.h"
8938fd1498Szrj #include "coretypes.h"
9038fd1498Szrj #include "backend.h"
9138fd1498Szrj #include "target.h"
9238fd1498Szrj #include "rtl.h"
9338fd1498Szrj #include "tree.h"
9438fd1498Szrj #include "gimple.h"
9538fd1498Szrj #include "predict.h"
9638fd1498Szrj #include "alloc-pool.h"
9738fd1498Szrj #include "tree-pass.h"
9838fd1498Szrj #include "ssa.h"
9938fd1498Szrj #include "optabs-tree.h"
10038fd1498Szrj #include "gimple-pretty-print.h"
10138fd1498Szrj #include "alias.h"
10238fd1498Szrj #include "fold-const.h"
10338fd1498Szrj #include "gimple-fold.h"
10438fd1498Szrj #include "gimple-iterator.h"
10538fd1498Szrj #include "gimplify.h"
10638fd1498Szrj #include "gimplify-me.h"
10738fd1498Szrj #include "stor-layout.h"
10838fd1498Szrj #include "tree-cfg.h"
10938fd1498Szrj #include "tree-dfa.h"
11038fd1498Szrj #include "tree-ssa.h"
11138fd1498Szrj #include "builtins.h"
11238fd1498Szrj #include "params.h"
11338fd1498Szrj #include "internal-fn.h"
11438fd1498Szrj #include "case-cfn-macros.h"
11538fd1498Szrj #include "optabs-libfuncs.h"
11638fd1498Szrj #include "tree-eh.h"
11738fd1498Szrj #include "targhooks.h"
11838fd1498Szrj #include "domwalk.h"
11938fd1498Szrj 
12038fd1498Szrj /* This structure represents one basic block that either computes a
12138fd1498Szrj    division, or is a common dominator for basic block that compute a
12238fd1498Szrj    division.  */
12338fd1498Szrj struct occurrence {
12438fd1498Szrj   /* The basic block represented by this structure.  */
12538fd1498Szrj   basic_block bb;
12638fd1498Szrj 
12738fd1498Szrj   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
12838fd1498Szrj      inserted in BB.  */
12938fd1498Szrj   tree recip_def;
13038fd1498Szrj 
13138fd1498Szrj   /* If non-NULL, the SSA_NAME holding the definition for a squared
13238fd1498Szrj      reciprocal inserted in BB.  */
13338fd1498Szrj   tree square_recip_def;
13438fd1498Szrj 
13538fd1498Szrj   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
13638fd1498Szrj      was inserted in BB.  */
13738fd1498Szrj   gimple *recip_def_stmt;
13838fd1498Szrj 
13938fd1498Szrj   /* Pointer to a list of "struct occurrence"s for blocks dominated
14038fd1498Szrj      by BB.  */
14138fd1498Szrj   struct occurrence *children;
14238fd1498Szrj 
14338fd1498Szrj   /* Pointer to the next "struct occurrence"s in the list of blocks
14438fd1498Szrj      sharing a common dominator.  */
14538fd1498Szrj   struct occurrence *next;
14638fd1498Szrj 
14738fd1498Szrj   /* The number of divisions that are in BB before compute_merit.  The
14838fd1498Szrj      number of divisions that are in BB or post-dominate it after
14938fd1498Szrj      compute_merit.  */
15038fd1498Szrj   int num_divisions;
15138fd1498Szrj 
15238fd1498Szrj   /* True if the basic block has a division, false if it is a common
15338fd1498Szrj      dominator for basic blocks that do.  If it is false and trapping
15438fd1498Szrj      math is active, BB is not a candidate for inserting a reciprocal.  */
15538fd1498Szrj   bool bb_has_division;
15638fd1498Szrj };
15738fd1498Szrj 
15838fd1498Szrj static struct
15938fd1498Szrj {
16038fd1498Szrj   /* Number of 1.0/X ops inserted.  */
16138fd1498Szrj   int rdivs_inserted;
16238fd1498Szrj 
16338fd1498Szrj   /* Number of 1.0/FUNC ops inserted.  */
16438fd1498Szrj   int rfuncs_inserted;
16538fd1498Szrj } reciprocal_stats;
16638fd1498Szrj 
16738fd1498Szrj static struct
16838fd1498Szrj {
16938fd1498Szrj   /* Number of cexpi calls inserted.  */
17038fd1498Szrj   int inserted;
17138fd1498Szrj } sincos_stats;
17238fd1498Szrj 
17338fd1498Szrj static struct
17438fd1498Szrj {
17538fd1498Szrj   /* Number of widening multiplication ops inserted.  */
17638fd1498Szrj   int widen_mults_inserted;
17738fd1498Szrj 
17838fd1498Szrj   /* Number of integer multiply-and-accumulate ops inserted.  */
17938fd1498Szrj   int maccs_inserted;
18038fd1498Szrj 
18138fd1498Szrj   /* Number of fp fused multiply-add ops inserted.  */
18238fd1498Szrj   int fmas_inserted;
18338fd1498Szrj 
18438fd1498Szrj   /* Number of divmod calls inserted.  */
18538fd1498Szrj   int divmod_calls_inserted;
18638fd1498Szrj } widen_mul_stats;
18738fd1498Szrj 
18838fd1498Szrj /* The instance of "struct occurrence" representing the highest
18938fd1498Szrj    interesting block in the dominator tree.  */
19038fd1498Szrj static struct occurrence *occ_head;
19138fd1498Szrj 
19238fd1498Szrj /* Allocation pool for getting instances of "struct occurrence".  */
19338fd1498Szrj static object_allocator<occurrence> *occ_pool;
19438fd1498Szrj 
19538fd1498Szrj 
19638fd1498Szrj 
19738fd1498Szrj /* Allocate and return a new struct occurrence for basic block BB, and
19838fd1498Szrj    whose children list is headed by CHILDREN.  */
19938fd1498Szrj static struct occurrence *
occ_new(basic_block bb,struct occurrence * children)20038fd1498Szrj occ_new (basic_block bb, struct occurrence *children)
20138fd1498Szrj {
20238fd1498Szrj   struct occurrence *occ;
20338fd1498Szrj 
20438fd1498Szrj   bb->aux = occ = occ_pool->allocate ();
20538fd1498Szrj   memset (occ, 0, sizeof (struct occurrence));
20638fd1498Szrj 
20738fd1498Szrj   occ->bb = bb;
20838fd1498Szrj   occ->children = children;
20938fd1498Szrj   return occ;
21038fd1498Szrj }
21138fd1498Szrj 
21238fd1498Szrj 
21338fd1498Szrj /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
21438fd1498Szrj    list of "struct occurrence"s, one per basic block, having IDOM as
21538fd1498Szrj    their common dominator.
21638fd1498Szrj 
21738fd1498Szrj    We try to insert NEW_OCC as deep as possible in the tree, and we also
21838fd1498Szrj    insert any other block that is a common dominator for BB and one
21938fd1498Szrj    block already in the tree.  */
22038fd1498Szrj 
22138fd1498Szrj static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)22238fd1498Szrj insert_bb (struct occurrence *new_occ, basic_block idom,
22338fd1498Szrj 	   struct occurrence **p_head)
22438fd1498Szrj {
22538fd1498Szrj   struct occurrence *occ, **p_occ;
22638fd1498Szrj 
22738fd1498Szrj   for (p_occ = p_head; (occ = *p_occ) != NULL; )
22838fd1498Szrj     {
22938fd1498Szrj       basic_block bb = new_occ->bb, occ_bb = occ->bb;
23038fd1498Szrj       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
23138fd1498Szrj       if (dom == bb)
23238fd1498Szrj 	{
23338fd1498Szrj 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
23438fd1498Szrj 	     from its list.  */
23538fd1498Szrj 	  *p_occ = occ->next;
23638fd1498Szrj 	  occ->next = new_occ->children;
23738fd1498Szrj 	  new_occ->children = occ;
23838fd1498Szrj 
23938fd1498Szrj 	  /* Try the next block (it may as well be dominated by BB).  */
24038fd1498Szrj 	}
24138fd1498Szrj 
24238fd1498Szrj       else if (dom == occ_bb)
24338fd1498Szrj 	{
24438fd1498Szrj 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
24538fd1498Szrj 	  insert_bb (new_occ, dom, &occ->children);
24638fd1498Szrj 	  return;
24738fd1498Szrj 	}
24838fd1498Szrj 
24938fd1498Szrj       else if (dom != idom)
25038fd1498Szrj 	{
25138fd1498Szrj 	  gcc_assert (!dom->aux);
25238fd1498Szrj 
25338fd1498Szrj 	  /* There is a dominator between IDOM and BB, add it and make
25438fd1498Szrj 	     two children out of NEW_OCC and OCC.  First, remove OCC from
25538fd1498Szrj 	     its list.	*/
25638fd1498Szrj 	  *p_occ = occ->next;
25738fd1498Szrj 	  new_occ->next = occ;
25838fd1498Szrj 	  occ->next = NULL;
25938fd1498Szrj 
26038fd1498Szrj 	  /* None of the previous blocks has DOM as a dominator: if we tail
26138fd1498Szrj 	     recursed, we would reexamine them uselessly. Just switch BB with
26238fd1498Szrj 	     DOM, and go on looking for blocks dominated by DOM.  */
26338fd1498Szrj           new_occ = occ_new (dom, new_occ);
26438fd1498Szrj 	}
26538fd1498Szrj 
26638fd1498Szrj       else
26738fd1498Szrj 	{
26838fd1498Szrj 	  /* Nothing special, go on with the next element.  */
26938fd1498Szrj 	  p_occ = &occ->next;
27038fd1498Szrj 	}
27138fd1498Szrj     }
27238fd1498Szrj 
27338fd1498Szrj   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
27438fd1498Szrj   new_occ->next = *p_head;
27538fd1498Szrj   *p_head = new_occ;
27638fd1498Szrj }
27738fd1498Szrj 
27838fd1498Szrj /* Register that we found a division in BB.
27938fd1498Szrj    IMPORTANCE is a measure of how much weighting to give
28038fd1498Szrj    that division.  Use IMPORTANCE = 2 to register a single
28138fd1498Szrj    division.  If the division is going to be found multiple
28238fd1498Szrj    times use 1 (as it is with squares).  */
28338fd1498Szrj 
28438fd1498Szrj static inline void
register_division_in(basic_block bb,int importance)28538fd1498Szrj register_division_in (basic_block bb, int importance)
28638fd1498Szrj {
28738fd1498Szrj   struct occurrence *occ;
28838fd1498Szrj 
28938fd1498Szrj   occ = (struct occurrence *) bb->aux;
29038fd1498Szrj   if (!occ)
29138fd1498Szrj     {
29238fd1498Szrj       occ = occ_new (bb, NULL);
29338fd1498Szrj       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
29438fd1498Szrj     }
29538fd1498Szrj 
29638fd1498Szrj   occ->bb_has_division = true;
29738fd1498Szrj   occ->num_divisions += importance;
29838fd1498Szrj }
29938fd1498Szrj 
30038fd1498Szrj 
30138fd1498Szrj /* Compute the number of divisions that postdominate each block in OCC and
30238fd1498Szrj    its children.  */
30338fd1498Szrj 
30438fd1498Szrj static void
compute_merit(struct occurrence * occ)30538fd1498Szrj compute_merit (struct occurrence *occ)
30638fd1498Szrj {
30738fd1498Szrj   struct occurrence *occ_child;
30838fd1498Szrj   basic_block dom = occ->bb;
30938fd1498Szrj 
31038fd1498Szrj   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
31138fd1498Szrj     {
31238fd1498Szrj       basic_block bb;
31338fd1498Szrj       if (occ_child->children)
31438fd1498Szrj         compute_merit (occ_child);
31538fd1498Szrj 
31638fd1498Szrj       if (flag_exceptions)
31738fd1498Szrj 	bb = single_noncomplex_succ (dom);
31838fd1498Szrj       else
31938fd1498Szrj 	bb = dom;
32038fd1498Szrj 
32138fd1498Szrj       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
32238fd1498Szrj         occ->num_divisions += occ_child->num_divisions;
32338fd1498Szrj     }
32438fd1498Szrj }
32538fd1498Szrj 
32638fd1498Szrj 
32738fd1498Szrj /* Return whether USE_STMT is a floating-point division by DEF.  */
32838fd1498Szrj static inline bool
is_division_by(gimple * use_stmt,tree def)32938fd1498Szrj is_division_by (gimple *use_stmt, tree def)
33038fd1498Szrj {
33138fd1498Szrj   return is_gimple_assign (use_stmt)
33238fd1498Szrj 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
33338fd1498Szrj 	 && gimple_assign_rhs2 (use_stmt) == def
33438fd1498Szrj 	 /* Do not recognize x / x as valid division, as we are getting
33538fd1498Szrj 	    confused later by replacing all immediate uses x in such
33638fd1498Szrj 	    a stmt.  */
33738fd1498Szrj 	 && gimple_assign_rhs1 (use_stmt) != def;
33838fd1498Szrj }
33938fd1498Szrj 
34038fd1498Szrj /* Return whether USE_STMT is DEF * DEF.  */
34138fd1498Szrj static inline bool
is_square_of(gimple * use_stmt,tree def)34238fd1498Szrj is_square_of (gimple *use_stmt, tree def)
34338fd1498Szrj {
34438fd1498Szrj   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
34538fd1498Szrj       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
34638fd1498Szrj     {
34738fd1498Szrj       tree op0 = gimple_assign_rhs1 (use_stmt);
34838fd1498Szrj       tree op1 = gimple_assign_rhs2 (use_stmt);
34938fd1498Szrj 
35038fd1498Szrj       return op0 == op1 && op0 == def;
35138fd1498Szrj     }
35238fd1498Szrj   return 0;
35338fd1498Szrj }
35438fd1498Szrj 
35538fd1498Szrj /* Return whether USE_STMT is a floating-point division by
35638fd1498Szrj    DEF * DEF.  */
35738fd1498Szrj static inline bool
is_division_by_square(gimple * use_stmt,tree def)35838fd1498Szrj is_division_by_square (gimple *use_stmt, tree def)
35938fd1498Szrj {
36038fd1498Szrj   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
36138fd1498Szrj       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
36238fd1498Szrj       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
36338fd1498Szrj     {
36438fd1498Szrj       tree denominator = gimple_assign_rhs2 (use_stmt);
36538fd1498Szrj       if (TREE_CODE (denominator) == SSA_NAME)
36638fd1498Szrj 	{
36738fd1498Szrj 	  return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
36838fd1498Szrj 	}
36938fd1498Szrj     }
37038fd1498Szrj   return 0;
37138fd1498Szrj }
37238fd1498Szrj 
37338fd1498Szrj /* Walk the subset of the dominator tree rooted at OCC, setting the
37438fd1498Szrj    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
37538fd1498Szrj    the given basic block.  The field may be left NULL, of course,
37638fd1498Szrj    if it is not possible or profitable to do the optimization.
37738fd1498Szrj 
37838fd1498Szrj    DEF_BSI is an iterator pointing at the statement defining DEF.
37938fd1498Szrj    If RECIP_DEF is set, a dominator already has a computation that can
38038fd1498Szrj    be used.
38138fd1498Szrj 
38238fd1498Szrj    If should_insert_square_recip is set, then this also inserts
38338fd1498Szrj    the square of the reciprocal immediately after the definition
38438fd1498Szrj    of the reciprocal.  */
38538fd1498Szrj 
38638fd1498Szrj static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)38738fd1498Szrj insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
38838fd1498Szrj 		    tree def, tree recip_def, tree square_recip_def,
38938fd1498Szrj 		    int should_insert_square_recip, int threshold)
39038fd1498Szrj {
39138fd1498Szrj   tree type;
39238fd1498Szrj   gassign *new_stmt, *new_square_stmt;
39338fd1498Szrj   gimple_stmt_iterator gsi;
39438fd1498Szrj   struct occurrence *occ_child;
39538fd1498Szrj 
39638fd1498Szrj   if (!recip_def
39738fd1498Szrj       && (occ->bb_has_division || !flag_trapping_math)
39838fd1498Szrj       /* Divide by two as all divisions are counted twice in
39938fd1498Szrj 	 the costing loop.  */
40038fd1498Szrj       && occ->num_divisions / 2 >= threshold)
40138fd1498Szrj     {
40238fd1498Szrj       /* Make a variable with the replacement and substitute it.  */
40338fd1498Szrj       type = TREE_TYPE (def);
40438fd1498Szrj       recip_def = create_tmp_reg (type, "reciptmp");
40538fd1498Szrj       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
40638fd1498Szrj 				      build_one_cst (type), def);
40738fd1498Szrj 
40838fd1498Szrj       if (should_insert_square_recip)
40938fd1498Szrj 	{
41038fd1498Szrj 	  square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
41138fd1498Szrj 	  new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
41238fd1498Szrj 						 recip_def, recip_def);
41338fd1498Szrj 	}
41438fd1498Szrj 
41538fd1498Szrj       if (occ->bb_has_division)
41638fd1498Szrj 	{
41738fd1498Szrj 	  /* Case 1: insert before an existing division.  */
41838fd1498Szrj 	  gsi = gsi_after_labels (occ->bb);
41938fd1498Szrj 	  while (!gsi_end_p (gsi)
42038fd1498Szrj 		 && (!is_division_by (gsi_stmt (gsi), def))
42138fd1498Szrj 		 && (!is_division_by_square (gsi_stmt (gsi), def)))
42238fd1498Szrj 	    gsi_next (&gsi);
42338fd1498Szrj 
42438fd1498Szrj 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
42558e805e6Szrj 	  if (should_insert_square_recip)
42658e805e6Szrj 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
42738fd1498Szrj 	}
42838fd1498Szrj       else if (def_gsi && occ->bb == def_gsi->bb)
42938fd1498Szrj 	{
43038fd1498Szrj 	  /* Case 2: insert right after the definition.  Note that this will
43138fd1498Szrj 	     never happen if the definition statement can throw, because in
43238fd1498Szrj 	     that case the sole successor of the statement's basic block will
43338fd1498Szrj 	     dominate all the uses as well.  */
43438fd1498Szrj 	  gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
43558e805e6Szrj 	  if (should_insert_square_recip)
43658e805e6Szrj 	    gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
43738fd1498Szrj 	}
43838fd1498Szrj       else
43938fd1498Szrj 	{
44038fd1498Szrj 	  /* Case 3: insert in a basic block not containing defs/uses.  */
44138fd1498Szrj 	  gsi = gsi_after_labels (occ->bb);
44238fd1498Szrj 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
44338fd1498Szrj 	  if (should_insert_square_recip)
44438fd1498Szrj 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
44558e805e6Szrj 	}
44638fd1498Szrj 
44738fd1498Szrj       reciprocal_stats.rdivs_inserted++;
44838fd1498Szrj 
44938fd1498Szrj       occ->recip_def_stmt = new_stmt;
45038fd1498Szrj     }
45138fd1498Szrj 
45238fd1498Szrj   occ->recip_def = recip_def;
45338fd1498Szrj   occ->square_recip_def = square_recip_def;
45438fd1498Szrj   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
45538fd1498Szrj     insert_reciprocals (def_gsi, occ_child, def, recip_def,
45638fd1498Szrj 			square_recip_def, should_insert_square_recip,
45738fd1498Szrj 			threshold);
45838fd1498Szrj }
45938fd1498Szrj 
46038fd1498Szrj /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
46138fd1498Szrj    Take as argument the use for (x * x).  */
46238fd1498Szrj static inline void
replace_reciprocal_squares(use_operand_p use_p)46338fd1498Szrj replace_reciprocal_squares (use_operand_p use_p)
46438fd1498Szrj {
46538fd1498Szrj   gimple *use_stmt = USE_STMT (use_p);
46638fd1498Szrj   basic_block bb = gimple_bb (use_stmt);
46738fd1498Szrj   struct occurrence *occ = (struct occurrence *) bb->aux;
46838fd1498Szrj 
46938fd1498Szrj   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
47038fd1498Szrj       && occ->recip_def)
47138fd1498Szrj     {
47238fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
47338fd1498Szrj       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
47438fd1498Szrj       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
47538fd1498Szrj       SET_USE (use_p, occ->square_recip_def);
47638fd1498Szrj       fold_stmt_inplace (&gsi);
47738fd1498Szrj       update_stmt (use_stmt);
47838fd1498Szrj     }
47938fd1498Szrj }
48038fd1498Szrj 
48138fd1498Szrj 
48238fd1498Szrj /* Replace the division at USE_P with a multiplication by the reciprocal, if
48338fd1498Szrj    possible.  */
48438fd1498Szrj 
48538fd1498Szrj static inline void
replace_reciprocal(use_operand_p use_p)48638fd1498Szrj replace_reciprocal (use_operand_p use_p)
48738fd1498Szrj {
48838fd1498Szrj   gimple *use_stmt = USE_STMT (use_p);
48938fd1498Szrj   basic_block bb = gimple_bb (use_stmt);
49038fd1498Szrj   struct occurrence *occ = (struct occurrence *) bb->aux;
49138fd1498Szrj 
49238fd1498Szrj   if (optimize_bb_for_speed_p (bb)
49338fd1498Szrj       && occ->recip_def && use_stmt != occ->recip_def_stmt)
49438fd1498Szrj     {
49538fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
49638fd1498Szrj       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
49738fd1498Szrj       SET_USE (use_p, occ->recip_def);
49838fd1498Szrj       fold_stmt_inplace (&gsi);
49938fd1498Szrj       update_stmt (use_stmt);
50038fd1498Szrj     }
50138fd1498Szrj }
50238fd1498Szrj 
50338fd1498Szrj 
50438fd1498Szrj /* Free OCC and return one more "struct occurrence" to be freed.  */
50538fd1498Szrj 
50638fd1498Szrj static struct occurrence *
free_bb(struct occurrence * occ)50738fd1498Szrj free_bb (struct occurrence *occ)
50838fd1498Szrj {
50938fd1498Szrj   struct occurrence *child, *next;
51038fd1498Szrj 
51138fd1498Szrj   /* First get the two pointers hanging off OCC.  */
51238fd1498Szrj   next = occ->next;
51338fd1498Szrj   child = occ->children;
51438fd1498Szrj   occ->bb->aux = NULL;
51538fd1498Szrj   occ_pool->remove (occ);
51638fd1498Szrj 
51738fd1498Szrj   /* Now ensure that we don't recurse unless it is necessary.  */
51838fd1498Szrj   if (!child)
51938fd1498Szrj     return next;
52038fd1498Szrj   else
52138fd1498Szrj     {
52238fd1498Szrj       while (next)
52338fd1498Szrj 	next = free_bb (next);
52438fd1498Szrj 
52538fd1498Szrj       return child;
52638fd1498Szrj     }
52738fd1498Szrj }
52838fd1498Szrj 
52938fd1498Szrj 
53038fd1498Szrj /* Look for floating-point divisions among DEF's uses, and try to
53138fd1498Szrj    replace them by multiplications with the reciprocal.  Add
53238fd1498Szrj    as many statements computing the reciprocal as needed.
53338fd1498Szrj 
53438fd1498Szrj    DEF must be a GIMPLE register of a floating-point type.  */
53538fd1498Szrj 
53638fd1498Szrj static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)53738fd1498Szrj execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
53838fd1498Szrj {
53938fd1498Szrj   use_operand_p use_p, square_use_p;
54038fd1498Szrj   imm_use_iterator use_iter, square_use_iter;
54138fd1498Szrj   tree square_def;
54238fd1498Szrj   struct occurrence *occ;
54338fd1498Szrj   int count = 0;
54438fd1498Szrj   int threshold;
54538fd1498Szrj   int square_recip_count = 0;
54638fd1498Szrj   int sqrt_recip_count = 0;
54738fd1498Szrj 
54838fd1498Szrj   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
54938fd1498Szrj   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
55038fd1498Szrj 
55138fd1498Szrj   /* If DEF is a square (x * x), count the number of divisions by x.
55238fd1498Szrj      If there are more divisions by x than by (DEF * DEF), prefer to optimize
55338fd1498Szrj      the reciprocal of x instead of DEF.  This improves cases like:
55438fd1498Szrj        def = x * x
55538fd1498Szrj        t0 = a / def
55638fd1498Szrj        t1 = b / def
55738fd1498Szrj        t2 = c / x
55838fd1498Szrj      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
55938fd1498Szrj   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
56038fd1498Szrj 
56138fd1498Szrj   if (is_gimple_assign (def_stmt)
56238fd1498Szrj       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
56338fd1498Szrj       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
56438fd1498Szrj       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
56538fd1498Szrj     {
56638fd1498Szrj       tree op0 = gimple_assign_rhs1 (def_stmt);
56738fd1498Szrj 
56838fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
56938fd1498Szrj 	{
57038fd1498Szrj 	  gimple *use_stmt = USE_STMT (use_p);
57138fd1498Szrj 	  if (is_division_by (use_stmt, op0))
57238fd1498Szrj 	    sqrt_recip_count++;
57338fd1498Szrj 	}
57438fd1498Szrj     }
57538fd1498Szrj 
57638fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
57738fd1498Szrj     {
57838fd1498Szrj       gimple *use_stmt = USE_STMT (use_p);
57938fd1498Szrj       if (is_division_by (use_stmt, def))
58038fd1498Szrj 	{
58138fd1498Szrj 	  register_division_in (gimple_bb (use_stmt), 2);
58238fd1498Szrj 	  count++;
58338fd1498Szrj 	}
58438fd1498Szrj 
58538fd1498Szrj       if (is_square_of (use_stmt, def))
58638fd1498Szrj 	{
58738fd1498Szrj 	  square_def = gimple_assign_lhs (use_stmt);
58838fd1498Szrj 	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
58938fd1498Szrj 	    {
59038fd1498Szrj 	      gimple *square_use_stmt = USE_STMT (square_use_p);
59138fd1498Szrj 	      if (is_division_by (square_use_stmt, square_def))
59238fd1498Szrj 		{
59338fd1498Szrj 		  /* This is executed twice for each division by a square.  */
59438fd1498Szrj 		  register_division_in (gimple_bb (square_use_stmt), 1);
59538fd1498Szrj 		  square_recip_count++;
59638fd1498Szrj 		}
59738fd1498Szrj 	    }
59838fd1498Szrj 	}
59938fd1498Szrj     }
60038fd1498Szrj 
60138fd1498Szrj   /* Square reciprocals were counted twice above.  */
60238fd1498Szrj   square_recip_count /= 2;
60338fd1498Szrj 
60438fd1498Szrj   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
60538fd1498Szrj   if (sqrt_recip_count > square_recip_count)
606*e215fc28Szrj     goto out;
60738fd1498Szrj 
60838fd1498Szrj   /* Do the expensive part only if we can hope to optimize something.  */
60938fd1498Szrj   if (count + square_recip_count >= threshold && count >= 1)
61038fd1498Szrj     {
61138fd1498Szrj       gimple *use_stmt;
61238fd1498Szrj       for (occ = occ_head; occ; occ = occ->next)
61338fd1498Szrj 	{
61438fd1498Szrj 	  compute_merit (occ);
61538fd1498Szrj 	  insert_reciprocals (def_gsi, occ, def, NULL, NULL,
61638fd1498Szrj 			      square_recip_count, threshold);
61738fd1498Szrj 	}
61838fd1498Szrj 
61938fd1498Szrj       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
62038fd1498Szrj 	{
62138fd1498Szrj 	  if (is_division_by (use_stmt, def))
62238fd1498Szrj 	    {
62338fd1498Szrj 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
62438fd1498Szrj 		replace_reciprocal (use_p);
62538fd1498Szrj 	    }
62638fd1498Szrj 	  else if (square_recip_count > 0 && is_square_of (use_stmt, def))
62738fd1498Szrj 	    {
62838fd1498Szrj 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
62938fd1498Szrj 		{
63038fd1498Szrj 		  /* Find all uses of the square that are divisions and
63138fd1498Szrj 		   * replace them by multiplications with the inverse.  */
63238fd1498Szrj 		  imm_use_iterator square_iterator;
63338fd1498Szrj 		  gimple *powmult_use_stmt = USE_STMT (use_p);
63438fd1498Szrj 		  tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
63538fd1498Szrj 
63638fd1498Szrj 		  FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
63738fd1498Szrj 					 square_iterator, powmult_def_name)
63838fd1498Szrj 		    FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
63938fd1498Szrj 		      {
64038fd1498Szrj 			gimple *powmult_use_stmt = USE_STMT (square_use_p);
64138fd1498Szrj 			if (is_division_by (powmult_use_stmt, powmult_def_name))
64238fd1498Szrj 			  replace_reciprocal_squares (square_use_p);
64338fd1498Szrj 		      }
64438fd1498Szrj 		}
64538fd1498Szrj 	    }
64638fd1498Szrj 	}
64738fd1498Szrj     }
64838fd1498Szrj 
649*e215fc28Szrj out:
65038fd1498Szrj   for (occ = occ_head; occ; )
65138fd1498Szrj     occ = free_bb (occ);
65238fd1498Szrj 
65338fd1498Szrj   occ_head = NULL;
65438fd1498Szrj }
65538fd1498Szrj 
65638fd1498Szrj /* Return an internal function that implements the reciprocal of CALL,
65738fd1498Szrj    or IFN_LAST if there is no such function that the target supports.  */
65838fd1498Szrj 
65938fd1498Szrj internal_fn
internal_fn_reciprocal(gcall * call)66038fd1498Szrj internal_fn_reciprocal (gcall *call)
66138fd1498Szrj {
66238fd1498Szrj   internal_fn ifn;
66338fd1498Szrj 
66438fd1498Szrj   switch (gimple_call_combined_fn (call))
66538fd1498Szrj     {
66638fd1498Szrj     CASE_CFN_SQRT:
66738fd1498Szrj     CASE_CFN_SQRT_FN:
66838fd1498Szrj       ifn = IFN_RSQRT;
66938fd1498Szrj       break;
67038fd1498Szrj 
67138fd1498Szrj     default:
67238fd1498Szrj       return IFN_LAST;
67338fd1498Szrj     }
67438fd1498Szrj 
67538fd1498Szrj   tree_pair types = direct_internal_fn_types (ifn, call);
67638fd1498Szrj   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
67738fd1498Szrj     return IFN_LAST;
67838fd1498Szrj 
67938fd1498Szrj   return ifn;
68038fd1498Szrj }
68138fd1498Szrj 
68238fd1498Szrj /* Go through all the floating-point SSA_NAMEs, and call
68338fd1498Szrj    execute_cse_reciprocals_1 on each of them.  */
68438fd1498Szrj namespace {
68538fd1498Szrj 
68638fd1498Szrj const pass_data pass_data_cse_reciprocals =
68738fd1498Szrj {
68838fd1498Szrj   GIMPLE_PASS, /* type */
68938fd1498Szrj   "recip", /* name */
69038fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
69138fd1498Szrj   TV_TREE_RECIP, /* tv_id */
69238fd1498Szrj   PROP_ssa, /* properties_required */
69338fd1498Szrj   0, /* properties_provided */
69438fd1498Szrj   0, /* properties_destroyed */
69538fd1498Szrj   0, /* todo_flags_start */
69638fd1498Szrj   TODO_update_ssa, /* todo_flags_finish */
69738fd1498Szrj };
69838fd1498Szrj 
69938fd1498Szrj class pass_cse_reciprocals : public gimple_opt_pass
70038fd1498Szrj {
70138fd1498Szrj public:
pass_cse_reciprocals(gcc::context * ctxt)70238fd1498Szrj   pass_cse_reciprocals (gcc::context *ctxt)
70338fd1498Szrj     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
70438fd1498Szrj   {}
70538fd1498Szrj 
70638fd1498Szrj   /* opt_pass methods: */
gate(function *)70738fd1498Szrj   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
70838fd1498Szrj   virtual unsigned int execute (function *);
70938fd1498Szrj 
71038fd1498Szrj }; // class pass_cse_reciprocals
71138fd1498Szrj 
71238fd1498Szrj unsigned int
execute(function * fun)71338fd1498Szrj pass_cse_reciprocals::execute (function *fun)
71438fd1498Szrj {
71538fd1498Szrj   basic_block bb;
71638fd1498Szrj   tree arg;
71738fd1498Szrj 
71838fd1498Szrj   occ_pool = new object_allocator<occurrence> ("dominators for recip");
71938fd1498Szrj 
72038fd1498Szrj   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
72138fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
72238fd1498Szrj   calculate_dominance_info (CDI_POST_DOMINATORS);
72338fd1498Szrj 
72438fd1498Szrj   if (flag_checking)
72538fd1498Szrj     FOR_EACH_BB_FN (bb, fun)
72638fd1498Szrj       gcc_assert (!bb->aux);
72738fd1498Szrj 
72838fd1498Szrj   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
72938fd1498Szrj     if (FLOAT_TYPE_P (TREE_TYPE (arg))
73038fd1498Szrj 	&& is_gimple_reg (arg))
73138fd1498Szrj       {
73238fd1498Szrj 	tree name = ssa_default_def (fun, arg);
73338fd1498Szrj 	if (name)
73438fd1498Szrj 	  execute_cse_reciprocals_1 (NULL, name);
73538fd1498Szrj       }
73638fd1498Szrj 
73738fd1498Szrj   FOR_EACH_BB_FN (bb, fun)
73838fd1498Szrj     {
73938fd1498Szrj       tree def;
74038fd1498Szrj 
74138fd1498Szrj       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
74238fd1498Szrj 	   gsi_next (&gsi))
74338fd1498Szrj 	{
74438fd1498Szrj 	  gphi *phi = gsi.phi ();
74538fd1498Szrj 	  def = PHI_RESULT (phi);
74638fd1498Szrj 	  if (! virtual_operand_p (def)
74738fd1498Szrj 	      && FLOAT_TYPE_P (TREE_TYPE (def)))
74838fd1498Szrj 	    execute_cse_reciprocals_1 (NULL, def);
74938fd1498Szrj 	}
75038fd1498Szrj 
75138fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
75238fd1498Szrj 	   gsi_next (&gsi))
75338fd1498Szrj         {
75438fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
75538fd1498Szrj 
75638fd1498Szrj 	  if (gimple_has_lhs (stmt)
75738fd1498Szrj 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
75838fd1498Szrj 	      && FLOAT_TYPE_P (TREE_TYPE (def))
75938fd1498Szrj 	      && TREE_CODE (def) == SSA_NAME)
76038fd1498Szrj 	    execute_cse_reciprocals_1 (&gsi, def);
76138fd1498Szrj 	}
76238fd1498Szrj 
76338fd1498Szrj       if (optimize_bb_for_size_p (bb))
76438fd1498Szrj         continue;
76538fd1498Szrj 
76638fd1498Szrj       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
76738fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
76838fd1498Szrj 	   gsi_next (&gsi))
76938fd1498Szrj         {
77038fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
77138fd1498Szrj 
77238fd1498Szrj 	  if (is_gimple_assign (stmt)
77338fd1498Szrj 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
77438fd1498Szrj 	    {
77538fd1498Szrj 	      tree arg1 = gimple_assign_rhs2 (stmt);
77638fd1498Szrj 	      gimple *stmt1;
77738fd1498Szrj 
77838fd1498Szrj 	      if (TREE_CODE (arg1) != SSA_NAME)
77938fd1498Szrj 		continue;
78038fd1498Szrj 
78138fd1498Szrj 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
78238fd1498Szrj 
78338fd1498Szrj 	      if (is_gimple_call (stmt1)
78438fd1498Szrj 		  && gimple_call_lhs (stmt1))
78538fd1498Szrj 		{
78638fd1498Szrj 		  bool fail;
78738fd1498Szrj 		  imm_use_iterator ui;
78838fd1498Szrj 		  use_operand_p use_p;
78938fd1498Szrj 		  tree fndecl = NULL_TREE;
79038fd1498Szrj 
79138fd1498Szrj 		  gcall *call = as_a <gcall *> (stmt1);
79238fd1498Szrj 		  internal_fn ifn = internal_fn_reciprocal (call);
79338fd1498Szrj 		  if (ifn == IFN_LAST)
79438fd1498Szrj 		    {
79538fd1498Szrj 		      fndecl = gimple_call_fndecl (call);
79638fd1498Szrj 		      if (!fndecl
79738fd1498Szrj 			  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
79838fd1498Szrj 			continue;
79938fd1498Szrj 		      fndecl = targetm.builtin_reciprocal (fndecl);
80038fd1498Szrj 		      if (!fndecl)
80138fd1498Szrj 			continue;
80238fd1498Szrj 		    }
80338fd1498Szrj 
80438fd1498Szrj 		  /* Check that all uses of the SSA name are divisions,
80538fd1498Szrj 		     otherwise replacing the defining statement will do
80638fd1498Szrj 		     the wrong thing.  */
80738fd1498Szrj 		  fail = false;
80838fd1498Szrj 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
80938fd1498Szrj 		    {
81038fd1498Szrj 		      gimple *stmt2 = USE_STMT (use_p);
81138fd1498Szrj 		      if (is_gimple_debug (stmt2))
81238fd1498Szrj 			continue;
81338fd1498Szrj 		      if (!is_gimple_assign (stmt2)
81438fd1498Szrj 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
81538fd1498Szrj 			  || gimple_assign_rhs1 (stmt2) == arg1
81638fd1498Szrj 			  || gimple_assign_rhs2 (stmt2) != arg1)
81738fd1498Szrj 			{
81838fd1498Szrj 			  fail = true;
81938fd1498Szrj 			  break;
82038fd1498Szrj 			}
82138fd1498Szrj 		    }
82238fd1498Szrj 		  if (fail)
82338fd1498Szrj 		    continue;
82438fd1498Szrj 
82538fd1498Szrj 		  gimple_replace_ssa_lhs (call, arg1);
82638fd1498Szrj 		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
82738fd1498Szrj 		    {
82838fd1498Szrj 		      auto_vec<tree, 4> args;
82938fd1498Szrj 		      for (unsigned int i = 0;
83038fd1498Szrj 			   i < gimple_call_num_args (call); i++)
83138fd1498Szrj 			args.safe_push (gimple_call_arg (call, i));
83238fd1498Szrj 		      gcall *stmt2;
83338fd1498Szrj 		      if (ifn == IFN_LAST)
83438fd1498Szrj 			stmt2 = gimple_build_call_vec (fndecl, args);
83538fd1498Szrj 		      else
83638fd1498Szrj 			stmt2 = gimple_build_call_internal_vec (ifn, args);
83738fd1498Szrj 		      gimple_call_set_lhs (stmt2, arg1);
83838fd1498Szrj 		      if (gimple_vdef (call))
83938fd1498Szrj 			{
84038fd1498Szrj 			  gimple_set_vdef (stmt2, gimple_vdef (call));
84138fd1498Szrj 			  SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
84238fd1498Szrj 			}
84338fd1498Szrj 		      gimple_call_set_nothrow (stmt2,
84438fd1498Szrj 					       gimple_call_nothrow_p (call));
84538fd1498Szrj 		      gimple_set_vuse (stmt2, gimple_vuse (call));
84638fd1498Szrj 		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
84738fd1498Szrj 		      gsi_replace (&gsi2, stmt2, true);
84838fd1498Szrj 		    }
84938fd1498Szrj 		  else
85038fd1498Szrj 		    {
85138fd1498Szrj 		      if (ifn == IFN_LAST)
85238fd1498Szrj 			gimple_call_set_fndecl (call, fndecl);
85338fd1498Szrj 		      else
85438fd1498Szrj 			gimple_call_set_internal_fn (call, ifn);
85538fd1498Szrj 		      update_stmt (call);
85638fd1498Szrj 		    }
85738fd1498Szrj 		  reciprocal_stats.rfuncs_inserted++;
85838fd1498Szrj 
85938fd1498Szrj 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
86038fd1498Szrj 		    {
86138fd1498Szrj 		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
86238fd1498Szrj 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
86338fd1498Szrj 		      fold_stmt_inplace (&gsi);
86438fd1498Szrj 		      update_stmt (stmt);
86538fd1498Szrj 		    }
86638fd1498Szrj 		}
86738fd1498Szrj 	    }
86838fd1498Szrj 	}
86938fd1498Szrj     }
87038fd1498Szrj 
87138fd1498Szrj   statistics_counter_event (fun, "reciprocal divs inserted",
87238fd1498Szrj 			    reciprocal_stats.rdivs_inserted);
87338fd1498Szrj   statistics_counter_event (fun, "reciprocal functions inserted",
87438fd1498Szrj 			    reciprocal_stats.rfuncs_inserted);
87538fd1498Szrj 
87638fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
87738fd1498Szrj   free_dominance_info (CDI_POST_DOMINATORS);
87838fd1498Szrj   delete occ_pool;
87938fd1498Szrj   return 0;
88038fd1498Szrj }
88138fd1498Szrj 
88238fd1498Szrj } // anon namespace
88338fd1498Szrj 
88438fd1498Szrj gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)88538fd1498Szrj make_pass_cse_reciprocals (gcc::context *ctxt)
88638fd1498Szrj {
88738fd1498Szrj   return new pass_cse_reciprocals (ctxt);
88838fd1498Szrj }
88938fd1498Szrj 
89038fd1498Szrj /* Records an occurrence at statement USE_STMT in the vector of trees
89138fd1498Szrj    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
89238fd1498Szrj    is not yet initialized.  Returns true if the occurrence was pushed on
89338fd1498Szrj    the vector.  Adjusts *TOP_BB to be the basic block dominating all
89438fd1498Szrj    statements in the vector.  */
89538fd1498Szrj 
89638fd1498Szrj static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)89738fd1498Szrj maybe_record_sincos (vec<gimple *> *stmts,
89838fd1498Szrj 		     basic_block *top_bb, gimple *use_stmt)
89938fd1498Szrj {
90038fd1498Szrj   basic_block use_bb = gimple_bb (use_stmt);
90138fd1498Szrj   if (*top_bb
90238fd1498Szrj       && (*top_bb == use_bb
90338fd1498Szrj 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
90438fd1498Szrj     stmts->safe_push (use_stmt);
90538fd1498Szrj   else if (!*top_bb
90638fd1498Szrj 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
90738fd1498Szrj     {
90838fd1498Szrj       stmts->safe_push (use_stmt);
90938fd1498Szrj       *top_bb = use_bb;
91038fd1498Szrj     }
91138fd1498Szrj   else
91238fd1498Szrj     return false;
91338fd1498Szrj 
91438fd1498Szrj   return true;
91538fd1498Szrj }
91638fd1498Szrj 
91738fd1498Szrj /* Look for sin, cos and cexpi calls with the same argument NAME and
91838fd1498Szrj    create a single call to cexpi CSEing the result in this case.
91938fd1498Szrj    We first walk over all immediate uses of the argument collecting
92038fd1498Szrj    statements that we can CSE in a vector and in a second pass replace
92138fd1498Szrj    the statement rhs with a REALPART or IMAGPART expression on the
92238fd1498Szrj    result of the cexpi call we insert before the use statement that
92338fd1498Szrj    dominates all other candidates.  */
92438fd1498Szrj 
92538fd1498Szrj static bool
execute_cse_sincos_1(tree name)92638fd1498Szrj execute_cse_sincos_1 (tree name)
92738fd1498Szrj {
92838fd1498Szrj   gimple_stmt_iterator gsi;
92938fd1498Szrj   imm_use_iterator use_iter;
93038fd1498Szrj   tree fndecl, res, type;
93138fd1498Szrj   gimple *def_stmt, *use_stmt, *stmt;
93238fd1498Szrj   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
93338fd1498Szrj   auto_vec<gimple *> stmts;
93438fd1498Szrj   basic_block top_bb = NULL;
93538fd1498Szrj   int i;
93638fd1498Szrj   bool cfg_changed = false;
93738fd1498Szrj 
93838fd1498Szrj   type = TREE_TYPE (name);
93938fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
94038fd1498Szrj     {
94138fd1498Szrj       if (gimple_code (use_stmt) != GIMPLE_CALL
94238fd1498Szrj 	  || !gimple_call_lhs (use_stmt))
94338fd1498Szrj 	continue;
94438fd1498Szrj 
94538fd1498Szrj       switch (gimple_call_combined_fn (use_stmt))
94638fd1498Szrj 	{
94738fd1498Szrj 	CASE_CFN_COS:
94838fd1498Szrj 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
94938fd1498Szrj 	  break;
95038fd1498Szrj 
95138fd1498Szrj 	CASE_CFN_SIN:
95238fd1498Szrj 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
95338fd1498Szrj 	  break;
95438fd1498Szrj 
95538fd1498Szrj 	CASE_CFN_CEXPI:
95638fd1498Szrj 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
95738fd1498Szrj 	  break;
95838fd1498Szrj 
95938fd1498Szrj 	default:;
96038fd1498Szrj 	}
96138fd1498Szrj     }
96238fd1498Szrj 
96338fd1498Szrj   if (seen_cos + seen_sin + seen_cexpi <= 1)
96438fd1498Szrj     return false;
96538fd1498Szrj 
96638fd1498Szrj   /* Simply insert cexpi at the beginning of top_bb but not earlier than
96738fd1498Szrj      the name def statement.  */
96838fd1498Szrj   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
96938fd1498Szrj   if (!fndecl)
97038fd1498Szrj     return false;
97138fd1498Szrj   stmt = gimple_build_call (fndecl, 1, name);
97238fd1498Szrj   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
97338fd1498Szrj   gimple_call_set_lhs (stmt, res);
97438fd1498Szrj 
97538fd1498Szrj   def_stmt = SSA_NAME_DEF_STMT (name);
97638fd1498Szrj   if (!SSA_NAME_IS_DEFAULT_DEF (name)
97738fd1498Szrj       && gimple_code (def_stmt) != GIMPLE_PHI
97838fd1498Szrj       && gimple_bb (def_stmt) == top_bb)
97938fd1498Szrj     {
98038fd1498Szrj       gsi = gsi_for_stmt (def_stmt);
98138fd1498Szrj       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
98238fd1498Szrj     }
98338fd1498Szrj   else
98438fd1498Szrj     {
98538fd1498Szrj       gsi = gsi_after_labels (top_bb);
98638fd1498Szrj       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
98738fd1498Szrj     }
98838fd1498Szrj   sincos_stats.inserted++;
98938fd1498Szrj 
99038fd1498Szrj   /* And adjust the recorded old call sites.  */
99138fd1498Szrj   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
99238fd1498Szrj     {
99338fd1498Szrj       tree rhs = NULL;
99438fd1498Szrj 
99538fd1498Szrj       switch (gimple_call_combined_fn (use_stmt))
99638fd1498Szrj 	{
99738fd1498Szrj 	CASE_CFN_COS:
99838fd1498Szrj 	  rhs = fold_build1 (REALPART_EXPR, type, res);
99938fd1498Szrj 	  break;
100038fd1498Szrj 
100138fd1498Szrj 	CASE_CFN_SIN:
100238fd1498Szrj 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
100338fd1498Szrj 	  break;
100438fd1498Szrj 
100538fd1498Szrj 	CASE_CFN_CEXPI:
100638fd1498Szrj 	  rhs = res;
100738fd1498Szrj 	  break;
100838fd1498Szrj 
100938fd1498Szrj 	default:;
101038fd1498Szrj 	  gcc_unreachable ();
101138fd1498Szrj 	}
101238fd1498Szrj 
101338fd1498Szrj 	/* Replace call with a copy.  */
101438fd1498Szrj 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
101538fd1498Szrj 
101638fd1498Szrj 	gsi = gsi_for_stmt (use_stmt);
101738fd1498Szrj 	gsi_replace (&gsi, stmt, true);
101838fd1498Szrj 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
101938fd1498Szrj 	  cfg_changed = true;
102038fd1498Szrj     }
102138fd1498Szrj 
102238fd1498Szrj   return cfg_changed;
102338fd1498Szrj }
102438fd1498Szrj 
102538fd1498Szrj /* To evaluate powi(x,n), the floating point value x raised to the
102638fd1498Szrj    constant integer exponent n, we use a hybrid algorithm that
102738fd1498Szrj    combines the "window method" with look-up tables.  For an
102838fd1498Szrj    introduction to exponentiation algorithms and "addition chains",
102938fd1498Szrj    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
103038fd1498Szrj    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
103138fd1498Szrj    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
103238fd1498Szrj    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
103338fd1498Szrj 
103438fd1498Szrj /* Provide a default value for POWI_MAX_MULTS, the maximum number of
103538fd1498Szrj    multiplications to inline before calling the system library's pow
103638fd1498Szrj    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
103738fd1498Szrj    so this default never requires calling pow, powf or powl.  */
103838fd1498Szrj 
103938fd1498Szrj #ifndef POWI_MAX_MULTS
104038fd1498Szrj #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
104138fd1498Szrj #endif
104238fd1498Szrj 
104338fd1498Szrj /* The size of the "optimal power tree" lookup table.  All
104438fd1498Szrj    exponents less than this value are simply looked up in the
104538fd1498Szrj    powi_table below.  This threshold is also used to size the
104638fd1498Szrj    cache of pseudo registers that hold intermediate results.  */
104738fd1498Szrj #define POWI_TABLE_SIZE 256
104838fd1498Szrj 
104938fd1498Szrj /* The size, in bits of the window, used in the "window method"
105038fd1498Szrj    exponentiation algorithm.  This is equivalent to a radix of
105138fd1498Szrj    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
105238fd1498Szrj #define POWI_WINDOW_SIZE 3
105338fd1498Szrj 
105438fd1498Szrj /* The following table is an efficient representation of an
105538fd1498Szrj    "optimal power tree".  For each value, i, the corresponding
105638fd1498Szrj    value, j, in the table states than an optimal evaluation
105738fd1498Szrj    sequence for calculating pow(x,i) can be found by evaluating
105838fd1498Szrj    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
105938fd1498Szrj    100 integers is given in Knuth's "Seminumerical algorithms".  */
106038fd1498Szrj 
106138fd1498Szrj static const unsigned char powi_table[POWI_TABLE_SIZE] =
106238fd1498Szrj   {
106338fd1498Szrj       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
106438fd1498Szrj       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
106538fd1498Szrj       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
106638fd1498Szrj      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
106738fd1498Szrj      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
106838fd1498Szrj      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
106938fd1498Szrj      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
107038fd1498Szrj      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
107138fd1498Szrj      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
107238fd1498Szrj      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
107338fd1498Szrj      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
107438fd1498Szrj      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
107538fd1498Szrj      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
107638fd1498Szrj      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
107738fd1498Szrj      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
107838fd1498Szrj      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
107938fd1498Szrj      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
108038fd1498Szrj      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
108138fd1498Szrj      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
108238fd1498Szrj      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
108338fd1498Szrj      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
108438fd1498Szrj      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
108538fd1498Szrj      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
108638fd1498Szrj      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
108738fd1498Szrj      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
108838fd1498Szrj     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
108938fd1498Szrj     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
109038fd1498Szrj     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
109138fd1498Szrj     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
109238fd1498Szrj     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
109338fd1498Szrj     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
109438fd1498Szrj     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
109538fd1498Szrj   };
109638fd1498Szrj 
109738fd1498Szrj 
109838fd1498Szrj /* Return the number of multiplications required to calculate
109938fd1498Szrj    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
110038fd1498Szrj    subroutine of powi_cost.  CACHE is an array indicating
110138fd1498Szrj    which exponents have already been calculated.  */
110238fd1498Szrj 
110338fd1498Szrj static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)110438fd1498Szrj powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
110538fd1498Szrj {
110638fd1498Szrj   /* If we've already calculated this exponent, then this evaluation
110738fd1498Szrj      doesn't require any additional multiplications.  */
110838fd1498Szrj   if (cache[n])
110938fd1498Szrj     return 0;
111038fd1498Szrj 
111138fd1498Szrj   cache[n] = true;
111238fd1498Szrj   return powi_lookup_cost (n - powi_table[n], cache)
111338fd1498Szrj 	 + powi_lookup_cost (powi_table[n], cache) + 1;
111438fd1498Szrj }
111538fd1498Szrj 
111638fd1498Szrj /* Return the number of multiplications required to calculate
111738fd1498Szrj    powi(x,n) for an arbitrary x, given the exponent N.  This
111838fd1498Szrj    function needs to be kept in sync with powi_as_mults below.  */
111938fd1498Szrj 
112038fd1498Szrj static int
powi_cost(HOST_WIDE_INT n)112138fd1498Szrj powi_cost (HOST_WIDE_INT n)
112238fd1498Szrj {
112338fd1498Szrj   bool cache[POWI_TABLE_SIZE];
112438fd1498Szrj   unsigned HOST_WIDE_INT digit;
112538fd1498Szrj   unsigned HOST_WIDE_INT val;
112638fd1498Szrj   int result;
112738fd1498Szrj 
112838fd1498Szrj   if (n == 0)
112938fd1498Szrj     return 0;
113038fd1498Szrj 
113138fd1498Szrj   /* Ignore the reciprocal when calculating the cost.  */
113238fd1498Szrj   val = (n < 0) ? -n : n;
113338fd1498Szrj 
113438fd1498Szrj   /* Initialize the exponent cache.  */
113538fd1498Szrj   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
113638fd1498Szrj   cache[1] = true;
113738fd1498Szrj 
113838fd1498Szrj   result = 0;
113938fd1498Szrj 
114038fd1498Szrj   while (val >= POWI_TABLE_SIZE)
114138fd1498Szrj     {
114238fd1498Szrj       if (val & 1)
114338fd1498Szrj 	{
114438fd1498Szrj 	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
114538fd1498Szrj 	  result += powi_lookup_cost (digit, cache)
114638fd1498Szrj 		    + POWI_WINDOW_SIZE + 1;
114738fd1498Szrj 	  val >>= POWI_WINDOW_SIZE;
114838fd1498Szrj 	}
114938fd1498Szrj       else
115038fd1498Szrj 	{
115138fd1498Szrj 	  val >>= 1;
115238fd1498Szrj 	  result++;
115338fd1498Szrj 	}
115438fd1498Szrj     }
115538fd1498Szrj 
115638fd1498Szrj   return result + powi_lookup_cost (val, cache);
115738fd1498Szrj }
115838fd1498Szrj 
115938fd1498Szrj /* Recursive subroutine of powi_as_mults.  This function takes the
116038fd1498Szrj    array, CACHE, of already calculated exponents and an exponent N and
116138fd1498Szrj    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
116238fd1498Szrj 
116338fd1498Szrj static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache)116438fd1498Szrj powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
116538fd1498Szrj 		 HOST_WIDE_INT n, tree *cache)
116638fd1498Szrj {
116738fd1498Szrj   tree op0, op1, ssa_target;
116838fd1498Szrj   unsigned HOST_WIDE_INT digit;
116938fd1498Szrj   gassign *mult_stmt;
117038fd1498Szrj 
117138fd1498Szrj   if (n < POWI_TABLE_SIZE && cache[n])
117238fd1498Szrj     return cache[n];
117338fd1498Szrj 
117438fd1498Szrj   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
117538fd1498Szrj 
117638fd1498Szrj   if (n < POWI_TABLE_SIZE)
117738fd1498Szrj     {
117838fd1498Szrj       cache[n] = ssa_target;
117938fd1498Szrj       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
118038fd1498Szrj       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
118138fd1498Szrj     }
118238fd1498Szrj   else if (n & 1)
118338fd1498Szrj     {
118438fd1498Szrj       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
118538fd1498Szrj       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
118638fd1498Szrj       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
118738fd1498Szrj     }
118838fd1498Szrj   else
118938fd1498Szrj     {
119038fd1498Szrj       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
119138fd1498Szrj       op1 = op0;
119238fd1498Szrj     }
119338fd1498Szrj 
119438fd1498Szrj   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
119538fd1498Szrj   gimple_set_location (mult_stmt, loc);
119638fd1498Szrj   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
119738fd1498Szrj 
119838fd1498Szrj   return ssa_target;
119938fd1498Szrj }
120038fd1498Szrj 
120138fd1498Szrj /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
120238fd1498Szrj    This function needs to be kept in sync with powi_cost above.  */
120338fd1498Szrj 
120438fd1498Szrj static tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)120538fd1498Szrj powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
120638fd1498Szrj 	       tree arg0, HOST_WIDE_INT n)
120738fd1498Szrj {
120838fd1498Szrj   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
120938fd1498Szrj   gassign *div_stmt;
121038fd1498Szrj   tree target;
121138fd1498Szrj 
121238fd1498Szrj   if (n == 0)
121338fd1498Szrj     return build_real (type, dconst1);
121438fd1498Szrj 
121538fd1498Szrj   memset (cache, 0,  sizeof (cache));
121638fd1498Szrj   cache[1] = arg0;
121738fd1498Szrj 
121838fd1498Szrj   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
121938fd1498Szrj   if (n >= 0)
122038fd1498Szrj     return result;
122138fd1498Szrj 
122238fd1498Szrj   /* If the original exponent was negative, reciprocate the result.  */
122338fd1498Szrj   target = make_temp_ssa_name (type, NULL, "powmult");
122438fd1498Szrj   div_stmt = gimple_build_assign (target, RDIV_EXPR,
122538fd1498Szrj 				  build_real (type, dconst1), result);
122638fd1498Szrj   gimple_set_location (div_stmt, loc);
122738fd1498Szrj   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
122838fd1498Szrj 
122938fd1498Szrj   return target;
123038fd1498Szrj }
123138fd1498Szrj 
123238fd1498Szrj /* ARG0 and N are the two arguments to a powi builtin in GSI with
123338fd1498Szrj    location info LOC.  If the arguments are appropriate, create an
123438fd1498Szrj    equivalent sequence of statements prior to GSI using an optimal
123538fd1498Szrj    number of multiplications, and return an expession holding the
123638fd1498Szrj    result.  */
123738fd1498Szrj 
123838fd1498Szrj static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)123938fd1498Szrj gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
124038fd1498Szrj 			    tree arg0, HOST_WIDE_INT n)
124138fd1498Szrj {
124238fd1498Szrj   /* Avoid largest negative number.  */
124338fd1498Szrj   if (n != -n
124438fd1498Szrj       && ((n >= -1 && n <= 2)
124538fd1498Szrj 	  || (optimize_function_for_speed_p (cfun)
124638fd1498Szrj 	      && powi_cost (n) <= POWI_MAX_MULTS)))
124738fd1498Szrj     return powi_as_mults (gsi, loc, arg0, n);
124838fd1498Szrj 
124938fd1498Szrj   return NULL_TREE;
125038fd1498Szrj }
125138fd1498Szrj 
125238fd1498Szrj /* Build a gimple call statement that calls FN with argument ARG.
125338fd1498Szrj    Set the lhs of the call statement to a fresh SSA name.  Insert the
125438fd1498Szrj    statement prior to GSI's current position, and return the fresh
125538fd1498Szrj    SSA name.  */
125638fd1498Szrj 
125738fd1498Szrj static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)125838fd1498Szrj build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
125938fd1498Szrj 		       tree fn, tree arg)
126038fd1498Szrj {
126138fd1498Szrj   gcall *call_stmt;
126238fd1498Szrj   tree ssa_target;
126338fd1498Szrj 
126438fd1498Szrj   call_stmt = gimple_build_call (fn, 1, arg);
126538fd1498Szrj   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
126638fd1498Szrj   gimple_set_lhs (call_stmt, ssa_target);
126738fd1498Szrj   gimple_set_location (call_stmt, loc);
126838fd1498Szrj   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
126938fd1498Szrj 
127038fd1498Szrj   return ssa_target;
127138fd1498Szrj }
127238fd1498Szrj 
127338fd1498Szrj /* Build a gimple binary operation with the given CODE and arguments
127438fd1498Szrj    ARG0, ARG1, assigning the result to a new SSA name for variable
127538fd1498Szrj    TARGET.  Insert the statement prior to GSI's current position, and
127638fd1498Szrj    return the fresh SSA name.*/
127738fd1498Szrj 
127838fd1498Szrj static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)127938fd1498Szrj build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
128038fd1498Szrj 			const char *name, enum tree_code code,
128138fd1498Szrj 			tree arg0, tree arg1)
128238fd1498Szrj {
128338fd1498Szrj   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
128438fd1498Szrj   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
128538fd1498Szrj   gimple_set_location (stmt, loc);
128638fd1498Szrj   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
128738fd1498Szrj   return result;
128838fd1498Szrj }
128938fd1498Szrj 
129038fd1498Szrj /* Build a gimple reference operation with the given CODE and argument
129138fd1498Szrj    ARG, assigning the result to a new SSA name of TYPE with NAME.
129238fd1498Szrj    Insert the statement prior to GSI's current position, and return
129338fd1498Szrj    the fresh SSA name.  */
129438fd1498Szrj 
129538fd1498Szrj static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)129638fd1498Szrj build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
129738fd1498Szrj 		      const char *name, enum tree_code code, tree arg0)
129838fd1498Szrj {
129938fd1498Szrj   tree result = make_temp_ssa_name (type, NULL, name);
130038fd1498Szrj   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
130138fd1498Szrj   gimple_set_location (stmt, loc);
130238fd1498Szrj   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
130338fd1498Szrj   return result;
130438fd1498Szrj }
130538fd1498Szrj 
130638fd1498Szrj /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
130738fd1498Szrj    prior to GSI's current position, and return the fresh SSA name.  */
130838fd1498Szrj 
130938fd1498Szrj static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)131038fd1498Szrj build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
131138fd1498Szrj 		       tree type, tree val)
131238fd1498Szrj {
131338fd1498Szrj   tree result = make_ssa_name (type);
131438fd1498Szrj   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
131538fd1498Szrj   gimple_set_location (stmt, loc);
131638fd1498Szrj   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
131738fd1498Szrj   return result;
131838fd1498Szrj }
131938fd1498Szrj 
132038fd1498Szrj struct pow_synth_sqrt_info
132138fd1498Szrj {
132238fd1498Szrj   bool *factors;
132338fd1498Szrj   unsigned int deepest;
132438fd1498Szrj   unsigned int num_mults;
132538fd1498Szrj };
132638fd1498Szrj 
132738fd1498Szrj /* Return true iff the real value C can be represented as a
132838fd1498Szrj    sum of powers of 0.5 up to N.  That is:
132938fd1498Szrj    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
133038fd1498Szrj    Record in INFO the various parameters of the synthesis algorithm such
133138fd1498Szrj    as the factors a[i], the maximum 0.5 power and the number of
133238fd1498Szrj    multiplications that will be required.  */
133338fd1498Szrj 
133438fd1498Szrj bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)133538fd1498Szrj representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
133638fd1498Szrj 				 struct pow_synth_sqrt_info *info)
133738fd1498Szrj {
133838fd1498Szrj   REAL_VALUE_TYPE factor = dconsthalf;
133938fd1498Szrj   REAL_VALUE_TYPE remainder = c;
134038fd1498Szrj 
134138fd1498Szrj   info->deepest = 0;
134238fd1498Szrj   info->num_mults = 0;
134338fd1498Szrj   memset (info->factors, 0, n * sizeof (bool));
134438fd1498Szrj 
134538fd1498Szrj   for (unsigned i = 0; i < n; i++)
134638fd1498Szrj     {
134738fd1498Szrj       REAL_VALUE_TYPE res;
134838fd1498Szrj 
134938fd1498Szrj       /* If something inexact happened bail out now.  */
135038fd1498Szrj       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
135138fd1498Szrj 	return false;
135238fd1498Szrj 
135338fd1498Szrj       /* We have hit zero.  The number is representable as a sum
135438fd1498Szrj          of powers of 0.5.  */
135538fd1498Szrj       if (real_equal (&res, &dconst0))
135638fd1498Szrj 	{
135738fd1498Szrj 	  info->factors[i] = true;
135838fd1498Szrj 	  info->deepest = i + 1;
135938fd1498Szrj 	  return true;
136038fd1498Szrj 	}
136138fd1498Szrj       else if (!REAL_VALUE_NEGATIVE (res))
136238fd1498Szrj 	{
136338fd1498Szrj 	  remainder = res;
136438fd1498Szrj 	  info->factors[i] = true;
136538fd1498Szrj 	  info->num_mults++;
136638fd1498Szrj 	}
136738fd1498Szrj       else
136838fd1498Szrj 	info->factors[i] = false;
136938fd1498Szrj 
137038fd1498Szrj       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
137138fd1498Szrj     }
137238fd1498Szrj   return false;
137338fd1498Szrj }
137438fd1498Szrj 
137538fd1498Szrj /* Return the tree corresponding to FN being applied
137638fd1498Szrj    to ARG N times at GSI and LOC.
137738fd1498Szrj    Look up previous results from CACHE if need be.
137838fd1498Szrj    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
137938fd1498Szrj 
138038fd1498Szrj static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)138138fd1498Szrj get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
138238fd1498Szrj 	      tree fn, location_t loc, tree *cache)
138338fd1498Szrj {
138438fd1498Szrj   tree res = cache[n];
138538fd1498Szrj   if (!res)
138638fd1498Szrj     {
138738fd1498Szrj       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
138838fd1498Szrj       res = build_and_insert_call (gsi, loc, fn, prev);
138938fd1498Szrj       cache[n] = res;
139038fd1498Szrj     }
139138fd1498Szrj 
139238fd1498Szrj   return res;
139338fd1498Szrj }
139438fd1498Szrj 
139538fd1498Szrj /* Print to STREAM the repeated application of function FNAME to ARG
139638fd1498Szrj    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
139738fd1498Szrj    "foo (foo (x))".  */
139838fd1498Szrj 
139938fd1498Szrj static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)140038fd1498Szrj print_nested_fn (FILE* stream, const char *fname, const char* arg,
140138fd1498Szrj 		 unsigned int n)
140238fd1498Szrj {
140338fd1498Szrj   if (n == 0)
140438fd1498Szrj     fprintf (stream, "%s", arg);
140538fd1498Szrj   else
140638fd1498Szrj     {
140738fd1498Szrj       fprintf (stream, "%s (", fname);
140838fd1498Szrj       print_nested_fn (stream, fname, arg, n - 1);
140938fd1498Szrj       fprintf (stream, ")");
141038fd1498Szrj     }
141138fd1498Szrj }
141238fd1498Szrj 
141338fd1498Szrj /* Print to STREAM the fractional sequence of sqrt chains
141438fd1498Szrj    applied to ARG, described by INFO.  Used for the dump file.  */
141538fd1498Szrj 
141638fd1498Szrj static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)141738fd1498Szrj dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
141838fd1498Szrj 			        struct pow_synth_sqrt_info *info)
141938fd1498Szrj {
142038fd1498Szrj   for (unsigned int i = 0; i < info->deepest; i++)
142138fd1498Szrj     {
142238fd1498Szrj       bool is_set = info->factors[i];
142338fd1498Szrj       if (is_set)
142438fd1498Szrj 	{
142538fd1498Szrj 	  print_nested_fn (stream, "sqrt", arg, i + 1);
142638fd1498Szrj 	  if (i != info->deepest - 1)
142738fd1498Szrj 	    fprintf (stream, " * ");
142838fd1498Szrj 	}
142938fd1498Szrj     }
143038fd1498Szrj }
143138fd1498Szrj 
143238fd1498Szrj /* Print to STREAM a representation of raising ARG to an integer
143338fd1498Szrj    power N.  Used for the dump file.  */
143438fd1498Szrj 
143538fd1498Szrj static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)143638fd1498Szrj dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
143738fd1498Szrj {
143838fd1498Szrj   if (n > 1)
143938fd1498Szrj     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
144038fd1498Szrj   else if (n == 1)
144138fd1498Szrj     fprintf (stream, "%s", arg);
144238fd1498Szrj }
144338fd1498Szrj 
144438fd1498Szrj /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
144538fd1498Szrj    square roots.  Place at GSI and LOC.  Limit the maximum depth
144638fd1498Szrj    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
144738fd1498Szrj    result of the expanded sequence or NULL_TREE if the expansion failed.
144838fd1498Szrj 
144938fd1498Szrj    This routine assumes that ARG1 is a real number with a fractional part
145038fd1498Szrj    (the integer exponent case will have been handled earlier in
145138fd1498Szrj    gimple_expand_builtin_pow).
145238fd1498Szrj 
145338fd1498Szrj    For ARG1 > 0.0:
145438fd1498Szrj    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
145538fd1498Szrj      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
145638fd1498Szrj                     FRAC_PART == ARG1 - WHOLE_PART:
145738fd1498Szrj      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
145838fd1498Szrj      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
145938fd1498Szrj      if it can be expressed as such, that is if FRAC_PART satisfies:
146038fd1498Szrj      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
146138fd1498Szrj      where integer a[i] is either 0 or 1.
146238fd1498Szrj 
146338fd1498Szrj      Example:
146438fd1498Szrj      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
146538fd1498Szrj        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
146638fd1498Szrj 
146738fd1498Szrj    For ARG1 < 0.0 there are two approaches:
146838fd1498Szrj    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
146938fd1498Szrj          is calculated as above.
147038fd1498Szrj 
147138fd1498Szrj      Example:
147238fd1498Szrj      POW (x, -5.625) == 1.0 / POW (x, 5.625)
147338fd1498Szrj        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
147438fd1498Szrj 
147538fd1498Szrj    * (B) : WHOLE_PART := - ceil (abs (ARG1))
147638fd1498Szrj            FRAC_PART  := ARG1 - WHOLE_PART
147738fd1498Szrj      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
147838fd1498Szrj      Example:
147938fd1498Szrj      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
148038fd1498Szrj        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
148138fd1498Szrj 
148238fd1498Szrj    For ARG1 < 0.0 we choose between (A) and (B) depending on
148338fd1498Szrj    how many multiplications we'd have to do.
148438fd1498Szrj    So, for the example in (B): POW (x, -5.875), if we were to
148538fd1498Szrj    follow algorithm (A) we would produce:
148638fd1498Szrj    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
148738fd1498Szrj    which contains more multiplications than approach (B).
148838fd1498Szrj 
148938fd1498Szrj    Hopefully, this approach will eliminate potentially expensive POW library
149038fd1498Szrj    calls when unsafe floating point math is enabled and allow the compiler to
149138fd1498Szrj    further optimise the multiplies, square roots and divides produced by this
149238fd1498Szrj    function.  */
149338fd1498Szrj 
149438fd1498Szrj static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)149538fd1498Szrj expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
149638fd1498Szrj 		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
149738fd1498Szrj {
149838fd1498Szrj   tree type = TREE_TYPE (arg0);
149938fd1498Szrj   machine_mode mode = TYPE_MODE (type);
150038fd1498Szrj   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
150138fd1498Szrj   bool one_over = true;
150238fd1498Szrj 
150338fd1498Szrj   if (!sqrtfn)
150438fd1498Szrj     return NULL_TREE;
150538fd1498Szrj 
150638fd1498Szrj   if (TREE_CODE (arg1) != REAL_CST)
150738fd1498Szrj     return NULL_TREE;
150838fd1498Szrj 
150938fd1498Szrj   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
151038fd1498Szrj 
151138fd1498Szrj   gcc_assert (max_depth > 0);
151238fd1498Szrj   tree *cache = XALLOCAVEC (tree, max_depth + 1);
151338fd1498Szrj 
151438fd1498Szrj   struct pow_synth_sqrt_info synth_info;
151538fd1498Szrj   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
151638fd1498Szrj   synth_info.deepest = 0;
151738fd1498Szrj   synth_info.num_mults = 0;
151838fd1498Szrj 
151938fd1498Szrj   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
152038fd1498Szrj   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
152138fd1498Szrj 
152238fd1498Szrj   /* The whole and fractional parts of exp.  */
152338fd1498Szrj   REAL_VALUE_TYPE whole_part;
152438fd1498Szrj   REAL_VALUE_TYPE frac_part;
152538fd1498Szrj 
152638fd1498Szrj   real_floor (&whole_part, mode, &exp);
152738fd1498Szrj   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
152838fd1498Szrj 
152938fd1498Szrj 
153038fd1498Szrj   REAL_VALUE_TYPE ceil_whole = dconst0;
153138fd1498Szrj   REAL_VALUE_TYPE ceil_fract = dconst0;
153238fd1498Szrj 
153338fd1498Szrj   if (neg_exp)
153438fd1498Szrj     {
153538fd1498Szrj       real_ceil (&ceil_whole, mode, &exp);
153638fd1498Szrj       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
153738fd1498Szrj     }
153838fd1498Szrj 
153938fd1498Szrj   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
154038fd1498Szrj     return NULL_TREE;
154138fd1498Szrj 
154238fd1498Szrj   /* Check whether it's more profitable to not use 1.0 / ...  */
154338fd1498Szrj   if (neg_exp)
154438fd1498Szrj     {
154538fd1498Szrj       struct pow_synth_sqrt_info alt_synth_info;
154638fd1498Szrj       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
154738fd1498Szrj       alt_synth_info.deepest = 0;
154838fd1498Szrj       alt_synth_info.num_mults = 0;
154938fd1498Szrj 
155038fd1498Szrj       if (representable_as_half_series_p (ceil_fract, max_depth,
155138fd1498Szrj 					   &alt_synth_info)
155238fd1498Szrj 	  && alt_synth_info.deepest <= synth_info.deepest
155338fd1498Szrj 	  && alt_synth_info.num_mults < synth_info.num_mults)
155438fd1498Szrj 	{
155538fd1498Szrj 	  whole_part = ceil_whole;
155638fd1498Szrj 	  frac_part = ceil_fract;
155738fd1498Szrj 	  synth_info.deepest = alt_synth_info.deepest;
155838fd1498Szrj 	  synth_info.num_mults = alt_synth_info.num_mults;
155938fd1498Szrj 	  memcpy (synth_info.factors, alt_synth_info.factors,
156038fd1498Szrj 		  (max_depth + 1) * sizeof (bool));
156138fd1498Szrj 	  one_over = false;
156238fd1498Szrj 	}
156338fd1498Szrj     }
156438fd1498Szrj 
156538fd1498Szrj   HOST_WIDE_INT n = real_to_integer (&whole_part);
156638fd1498Szrj   REAL_VALUE_TYPE cint;
156738fd1498Szrj   real_from_integer (&cint, VOIDmode, n, SIGNED);
156838fd1498Szrj 
156938fd1498Szrj   if (!real_identical (&whole_part, &cint))
157038fd1498Szrj     return NULL_TREE;
157138fd1498Szrj 
157238fd1498Szrj   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
157338fd1498Szrj     return NULL_TREE;
157438fd1498Szrj 
157538fd1498Szrj   memset (cache, 0, (max_depth + 1) * sizeof (tree));
157638fd1498Szrj 
157738fd1498Szrj   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
157838fd1498Szrj 
157938fd1498Szrj   /* Calculate the integer part of the exponent.  */
158038fd1498Szrj   if (n > 1)
158138fd1498Szrj     {
158238fd1498Szrj       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
158338fd1498Szrj       if (!integer_res)
158438fd1498Szrj 	return NULL_TREE;
158538fd1498Szrj     }
158638fd1498Szrj 
158738fd1498Szrj   if (dump_file)
158838fd1498Szrj     {
158938fd1498Szrj       char string[64];
159038fd1498Szrj 
159138fd1498Szrj       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
159238fd1498Szrj       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
159338fd1498Szrj 
159438fd1498Szrj       if (neg_exp)
159538fd1498Szrj 	{
159638fd1498Szrj 	  if (one_over)
159738fd1498Szrj 	    {
159838fd1498Szrj 	      fprintf (dump_file, "1.0 / (");
159938fd1498Szrj 	      dump_integer_part (dump_file, "x", n);
160038fd1498Szrj 	      if (n > 0)
160138fd1498Szrj 	        fprintf (dump_file, " * ");
160238fd1498Szrj 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
160338fd1498Szrj 	      fprintf (dump_file, ")");
160438fd1498Szrj 	    }
160538fd1498Szrj 	  else
160638fd1498Szrj 	    {
160738fd1498Szrj 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
160838fd1498Szrj 	      fprintf (dump_file, " / (");
160938fd1498Szrj 	      dump_integer_part (dump_file, "x", n);
161038fd1498Szrj 	      fprintf (dump_file, ")");
161138fd1498Szrj 	    }
161238fd1498Szrj 	}
161338fd1498Szrj       else
161438fd1498Szrj 	{
161538fd1498Szrj 	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
161638fd1498Szrj 	  if (n > 0)
161738fd1498Szrj 	    fprintf (dump_file, " * ");
161838fd1498Szrj 	  dump_integer_part (dump_file, "x", n);
161938fd1498Szrj 	}
162038fd1498Szrj 
162138fd1498Szrj       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
162238fd1498Szrj     }
162338fd1498Szrj 
162438fd1498Szrj 
162538fd1498Szrj   tree fract_res = NULL_TREE;
162638fd1498Szrj   cache[0] = arg0;
162738fd1498Szrj 
162838fd1498Szrj   /* Calculate the fractional part of the exponent.  */
162938fd1498Szrj   for (unsigned i = 0; i < synth_info.deepest; i++)
163038fd1498Szrj     {
163138fd1498Szrj       if (synth_info.factors[i])
163238fd1498Szrj 	{
163338fd1498Szrj 	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
163438fd1498Szrj 
163538fd1498Szrj 	  if (!fract_res)
163638fd1498Szrj 	      fract_res = sqrt_chain;
163738fd1498Szrj 
163838fd1498Szrj 	  else
163938fd1498Szrj 	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
164038fd1498Szrj 					   fract_res, sqrt_chain);
164138fd1498Szrj 	}
164238fd1498Szrj     }
164338fd1498Szrj 
164438fd1498Szrj   tree res = NULL_TREE;
164538fd1498Szrj 
164638fd1498Szrj   if (neg_exp)
164738fd1498Szrj     {
164838fd1498Szrj       if (one_over)
164938fd1498Szrj 	{
165038fd1498Szrj 	  if (n > 0)
165138fd1498Szrj 	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
165238fd1498Szrj 					   fract_res, integer_res);
165338fd1498Szrj 	  else
165438fd1498Szrj 	    res = fract_res;
165538fd1498Szrj 
165638fd1498Szrj 	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
165738fd1498Szrj 					  build_real (type, dconst1), res);
165838fd1498Szrj 	}
165938fd1498Szrj       else
166038fd1498Szrj 	{
166138fd1498Szrj 	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
166238fd1498Szrj 					 fract_res, integer_res);
166338fd1498Szrj 	}
166438fd1498Szrj     }
166538fd1498Szrj   else
166638fd1498Szrj     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
166738fd1498Szrj 				   fract_res, integer_res);
166838fd1498Szrj   return res;
166938fd1498Szrj }
167038fd1498Szrj 
167138fd1498Szrj /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
167238fd1498Szrj    with location info LOC.  If possible, create an equivalent and
167338fd1498Szrj    less expensive sequence of statements prior to GSI, and return an
167438fd1498Szrj    expession holding the result.  */
167538fd1498Szrj 
167638fd1498Szrj static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)167738fd1498Szrj gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
167838fd1498Szrj 			   tree arg0, tree arg1)
167938fd1498Szrj {
168038fd1498Szrj   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
168138fd1498Szrj   REAL_VALUE_TYPE c2, dconst3;
168238fd1498Szrj   HOST_WIDE_INT n;
168338fd1498Szrj   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
168438fd1498Szrj   machine_mode mode;
168538fd1498Szrj   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
168638fd1498Szrj   bool hw_sqrt_exists, c_is_int, c2_is_int;
168738fd1498Szrj 
168838fd1498Szrj   dconst1_4 = dconst1;
168938fd1498Szrj   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
169038fd1498Szrj 
169138fd1498Szrj   /* If the exponent isn't a constant, there's nothing of interest
169238fd1498Szrj      to be done.  */
169338fd1498Szrj   if (TREE_CODE (arg1) != REAL_CST)
169438fd1498Szrj     return NULL_TREE;
169538fd1498Szrj 
169638fd1498Szrj   /* Don't perform the operation if flag_signaling_nans is on
169738fd1498Szrj      and the operand is a signaling NaN.  */
169838fd1498Szrj   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
169938fd1498Szrj       && ((TREE_CODE (arg0) == REAL_CST
170038fd1498Szrj 	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
170138fd1498Szrj 	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
170238fd1498Szrj     return NULL_TREE;
170338fd1498Szrj 
170438fd1498Szrj   /* If the exponent is equivalent to an integer, expand to an optimal
170538fd1498Szrj      multiplication sequence when profitable.  */
170638fd1498Szrj   c = TREE_REAL_CST (arg1);
170738fd1498Szrj   n = real_to_integer (&c);
170838fd1498Szrj   real_from_integer (&cint, VOIDmode, n, SIGNED);
170938fd1498Szrj   c_is_int = real_identical (&c, &cint);
171038fd1498Szrj 
171138fd1498Szrj   if (c_is_int
171238fd1498Szrj       && ((n >= -1 && n <= 2)
171338fd1498Szrj 	  || (flag_unsafe_math_optimizations
171438fd1498Szrj 	      && speed_p
171538fd1498Szrj 	      && powi_cost (n) <= POWI_MAX_MULTS)))
171638fd1498Szrj     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
171738fd1498Szrj 
171838fd1498Szrj   /* Attempt various optimizations using sqrt and cbrt.  */
171938fd1498Szrj   type = TREE_TYPE (arg0);
172038fd1498Szrj   mode = TYPE_MODE (type);
172138fd1498Szrj   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
172238fd1498Szrj 
172338fd1498Szrj   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
172438fd1498Szrj      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
172538fd1498Szrj      sqrt(-0) = -0.  */
172638fd1498Szrj   if (sqrtfn
172738fd1498Szrj       && real_equal (&c, &dconsthalf)
172838fd1498Szrj       && !HONOR_SIGNED_ZEROS (mode))
172938fd1498Szrj     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
173038fd1498Szrj 
173138fd1498Szrj   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
173238fd1498Szrj 
173338fd1498Szrj   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
173438fd1498Szrj      optimizations since 1./3. is not exactly representable.  If x
173538fd1498Szrj      is negative and finite, the correct value of pow(x,1./3.) is
173638fd1498Szrj      a NaN with the "invalid" exception raised, because the value
173738fd1498Szrj      of 1./3. actually has an even denominator.  The correct value
173838fd1498Szrj      of cbrt(x) is a negative real value.  */
173938fd1498Szrj   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
174038fd1498Szrj   dconst1_3 = real_value_truncate (mode, dconst_third ());
174138fd1498Szrj 
174238fd1498Szrj   if (flag_unsafe_math_optimizations
174338fd1498Szrj       && cbrtfn
174438fd1498Szrj       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
174538fd1498Szrj       && real_equal (&c, &dconst1_3))
174638fd1498Szrj     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
174738fd1498Szrj 
174838fd1498Szrj   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
174938fd1498Szrj      if we don't have a hardware sqrt insn.  */
175038fd1498Szrj   dconst1_6 = dconst1_3;
175138fd1498Szrj   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
175238fd1498Szrj 
175338fd1498Szrj   if (flag_unsafe_math_optimizations
175438fd1498Szrj       && sqrtfn
175538fd1498Szrj       && cbrtfn
175638fd1498Szrj       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
175738fd1498Szrj       && speed_p
175838fd1498Szrj       && hw_sqrt_exists
175938fd1498Szrj       && real_equal (&c, &dconst1_6))
176038fd1498Szrj     {
176138fd1498Szrj       /* sqrt(x)  */
176238fd1498Szrj       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
176338fd1498Szrj 
176438fd1498Szrj       /* cbrt(sqrt(x))  */
176538fd1498Szrj       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
176638fd1498Szrj     }
176738fd1498Szrj 
176838fd1498Szrj 
176938fd1498Szrj   /* Attempt to expand the POW as a product of square root chains.
177038fd1498Szrj      Expand the 0.25 case even when otpimising for size.  */
177138fd1498Szrj   if (flag_unsafe_math_optimizations
177238fd1498Szrj       && sqrtfn
177338fd1498Szrj       && hw_sqrt_exists
177438fd1498Szrj       && (speed_p || real_equal (&c, &dconst1_4))
177538fd1498Szrj       && !HONOR_SIGNED_ZEROS (mode))
177638fd1498Szrj     {
177738fd1498Szrj       unsigned int max_depth = speed_p
177838fd1498Szrj 				? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
177938fd1498Szrj 				: 2;
178038fd1498Szrj 
178138fd1498Szrj       tree expand_with_sqrts
178238fd1498Szrj 	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
178338fd1498Szrj 
178438fd1498Szrj       if (expand_with_sqrts)
178538fd1498Szrj 	return expand_with_sqrts;
178638fd1498Szrj     }
178738fd1498Szrj 
178838fd1498Szrj   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
178938fd1498Szrj   n = real_to_integer (&c2);
179038fd1498Szrj   real_from_integer (&cint, VOIDmode, n, SIGNED);
179138fd1498Szrj   c2_is_int = real_identical (&c2, &cint);
179238fd1498Szrj 
179338fd1498Szrj   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
179438fd1498Szrj 
179538fd1498Szrj      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
179638fd1498Szrj      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
179738fd1498Szrj 
179838fd1498Szrj      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
179938fd1498Szrj      different from pow(x, 1./3.) due to rounding and behavior with
180038fd1498Szrj      negative x, we need to constrain this transformation to unsafe
180138fd1498Szrj      math and positive x or finite math.  */
180238fd1498Szrj   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
180338fd1498Szrj   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
180438fd1498Szrj   real_round (&c2, mode, &c2);
180538fd1498Szrj   n = real_to_integer (&c2);
180638fd1498Szrj   real_from_integer (&cint, VOIDmode, n, SIGNED);
180738fd1498Szrj   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
180838fd1498Szrj   real_convert (&c2, mode, &c2);
180938fd1498Szrj 
181038fd1498Szrj   if (flag_unsafe_math_optimizations
181138fd1498Szrj       && cbrtfn
181238fd1498Szrj       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
181338fd1498Szrj       && real_identical (&c2, &c)
181438fd1498Szrj       && !c2_is_int
181538fd1498Szrj       && optimize_function_for_speed_p (cfun)
181638fd1498Szrj       && powi_cost (n / 3) <= POWI_MAX_MULTS)
181738fd1498Szrj     {
181838fd1498Szrj       tree powi_x_ndiv3 = NULL_TREE;
181938fd1498Szrj 
182038fd1498Szrj       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
182138fd1498Szrj          possible or profitable, give up.  Skip the degenerate case when
182238fd1498Szrj          abs(n) < 3, where the result is always 1.  */
182338fd1498Szrj       if (absu_hwi (n) >= 3)
182438fd1498Szrj 	{
182538fd1498Szrj 	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
182638fd1498Szrj 						     abs_hwi (n / 3));
182738fd1498Szrj 	  if (!powi_x_ndiv3)
182838fd1498Szrj 	    return NULL_TREE;
182938fd1498Szrj 	}
183038fd1498Szrj 
183138fd1498Szrj       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
183238fd1498Szrj          as that creates an unnecessary variable.  Instead, just produce
183338fd1498Szrj          either cbrt(x) or cbrt(x) * cbrt(x).  */
183438fd1498Szrj       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
183538fd1498Szrj 
183638fd1498Szrj       if (absu_hwi (n) % 3 == 1)
183738fd1498Szrj 	powi_cbrt_x = cbrt_x;
183838fd1498Szrj       else
183938fd1498Szrj 	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
184038fd1498Szrj 					      cbrt_x, cbrt_x);
184138fd1498Szrj 
184238fd1498Szrj       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
184338fd1498Szrj       if (absu_hwi (n) < 3)
184438fd1498Szrj 	result = powi_cbrt_x;
184538fd1498Szrj       else
184638fd1498Szrj 	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
184738fd1498Szrj 					 powi_x_ndiv3, powi_cbrt_x);
184838fd1498Szrj 
184938fd1498Szrj       /* If n is negative, reciprocate the result.  */
185038fd1498Szrj       if (n < 0)
185138fd1498Szrj 	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
185238fd1498Szrj 					 build_real (type, dconst1), result);
185338fd1498Szrj 
185438fd1498Szrj       return result;
185538fd1498Szrj     }
185638fd1498Szrj 
185738fd1498Szrj   /* No optimizations succeeded.  */
185838fd1498Szrj   return NULL_TREE;
185938fd1498Szrj }
186038fd1498Szrj 
186138fd1498Szrj /* ARG is the argument to a cabs builtin call in GSI with location info
186238fd1498Szrj    LOC.  Create a sequence of statements prior to GSI that calculates
186338fd1498Szrj    sqrt(R*R + I*I), where R and I are the real and imaginary components
186438fd1498Szrj    of ARG, respectively.  Return an expression holding the result.  */
186538fd1498Szrj 
186638fd1498Szrj static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)186738fd1498Szrj gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
186838fd1498Szrj {
186938fd1498Szrj   tree real_part, imag_part, addend1, addend2, sum, result;
187038fd1498Szrj   tree type = TREE_TYPE (TREE_TYPE (arg));
187138fd1498Szrj   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
187238fd1498Szrj   machine_mode mode = TYPE_MODE (type);
187338fd1498Szrj 
187438fd1498Szrj   if (!flag_unsafe_math_optimizations
187538fd1498Szrj       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
187638fd1498Szrj       || !sqrtfn
187738fd1498Szrj       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
187838fd1498Szrj     return NULL_TREE;
187938fd1498Szrj 
188038fd1498Szrj   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
188138fd1498Szrj 				    REALPART_EXPR, arg);
188238fd1498Szrj   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
188338fd1498Szrj 				    real_part, real_part);
188438fd1498Szrj   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
188538fd1498Szrj 				    IMAGPART_EXPR, arg);
188638fd1498Szrj   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
188738fd1498Szrj 				    imag_part, imag_part);
188838fd1498Szrj   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
188938fd1498Szrj   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
189038fd1498Szrj 
189138fd1498Szrj   return result;
189238fd1498Szrj }
189338fd1498Szrj 
189438fd1498Szrj /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
189538fd1498Szrj    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
189638fd1498Szrj    an optimal number of multiplies, when n is a constant.  */
189738fd1498Szrj 
189838fd1498Szrj namespace {
189938fd1498Szrj 
190038fd1498Szrj const pass_data pass_data_cse_sincos =
190138fd1498Szrj {
190238fd1498Szrj   GIMPLE_PASS, /* type */
190338fd1498Szrj   "sincos", /* name */
190438fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
190538fd1498Szrj   TV_TREE_SINCOS, /* tv_id */
190638fd1498Szrj   PROP_ssa, /* properties_required */
190738fd1498Szrj   PROP_gimple_opt_math, /* properties_provided */
190838fd1498Szrj   0, /* properties_destroyed */
190938fd1498Szrj   0, /* todo_flags_start */
191038fd1498Szrj   TODO_update_ssa, /* todo_flags_finish */
191138fd1498Szrj };
191238fd1498Szrj 
191338fd1498Szrj class pass_cse_sincos : public gimple_opt_pass
191438fd1498Szrj {
191538fd1498Szrj public:
pass_cse_sincos(gcc::context * ctxt)191638fd1498Szrj   pass_cse_sincos (gcc::context *ctxt)
191738fd1498Szrj     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
191838fd1498Szrj   {}
191938fd1498Szrj 
192038fd1498Szrj   /* opt_pass methods: */
gate(function *)192138fd1498Szrj   virtual bool gate (function *)
192238fd1498Szrj     {
192338fd1498Szrj       /* We no longer require either sincos or cexp, since powi expansion
192438fd1498Szrj 	 piggybacks on this pass.  */
192538fd1498Szrj       return optimize;
192638fd1498Szrj     }
192738fd1498Szrj 
192838fd1498Szrj   virtual unsigned int execute (function *);
192938fd1498Szrj 
193038fd1498Szrj }; // class pass_cse_sincos
193138fd1498Szrj 
193238fd1498Szrj unsigned int
execute(function * fun)193338fd1498Szrj pass_cse_sincos::execute (function *fun)
193438fd1498Szrj {
193538fd1498Szrj   basic_block bb;
193638fd1498Szrj   bool cfg_changed = false;
193738fd1498Szrj 
193838fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
193938fd1498Szrj   memset (&sincos_stats, 0, sizeof (sincos_stats));
194038fd1498Szrj 
194138fd1498Szrj   FOR_EACH_BB_FN (bb, fun)
194238fd1498Szrj     {
194338fd1498Szrj       gimple_stmt_iterator gsi;
194438fd1498Szrj       bool cleanup_eh = false;
194538fd1498Szrj 
194638fd1498Szrj       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
194738fd1498Szrj         {
194838fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
194938fd1498Szrj 
195038fd1498Szrj 	  /* Only the last stmt in a bb could throw, no need to call
195138fd1498Szrj 	     gimple_purge_dead_eh_edges if we change something in the middle
195238fd1498Szrj 	     of a basic block.  */
195338fd1498Szrj 	  cleanup_eh = false;
195438fd1498Szrj 
195538fd1498Szrj 	  if (is_gimple_call (stmt)
195638fd1498Szrj 	      && gimple_call_lhs (stmt))
195738fd1498Szrj 	    {
195838fd1498Szrj 	      tree arg, arg0, arg1, result;
195938fd1498Szrj 	      HOST_WIDE_INT n;
196038fd1498Szrj 	      location_t loc;
196138fd1498Szrj 
196238fd1498Szrj 	      switch (gimple_call_combined_fn (stmt))
196338fd1498Szrj 		{
196438fd1498Szrj 		CASE_CFN_COS:
196538fd1498Szrj 		CASE_CFN_SIN:
196638fd1498Szrj 		CASE_CFN_CEXPI:
196738fd1498Szrj 		  /* Make sure we have either sincos or cexp.  */
196838fd1498Szrj 		  if (!targetm.libc_has_function (function_c99_math_complex)
196938fd1498Szrj 		      && !targetm.libc_has_function (function_sincos))
197038fd1498Szrj 		    break;
197138fd1498Szrj 
197238fd1498Szrj 		  arg = gimple_call_arg (stmt, 0);
197338fd1498Szrj 		  if (TREE_CODE (arg) == SSA_NAME)
197438fd1498Szrj 		    cfg_changed |= execute_cse_sincos_1 (arg);
197538fd1498Szrj 		  break;
197638fd1498Szrj 
197738fd1498Szrj 		CASE_CFN_POW:
197838fd1498Szrj 		  arg0 = gimple_call_arg (stmt, 0);
197938fd1498Szrj 		  arg1 = gimple_call_arg (stmt, 1);
198038fd1498Szrj 
198138fd1498Szrj 		  loc = gimple_location (stmt);
198238fd1498Szrj 		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
198338fd1498Szrj 
198438fd1498Szrj 		  if (result)
198538fd1498Szrj 		    {
198638fd1498Szrj 		      tree lhs = gimple_get_lhs (stmt);
198738fd1498Szrj 		      gassign *new_stmt = gimple_build_assign (lhs, result);
198838fd1498Szrj 		      gimple_set_location (new_stmt, loc);
198938fd1498Szrj 		      unlink_stmt_vdef (stmt);
199038fd1498Szrj 		      gsi_replace (&gsi, new_stmt, true);
199138fd1498Szrj 		      cleanup_eh = true;
199238fd1498Szrj 		      if (gimple_vdef (stmt))
199338fd1498Szrj 			release_ssa_name (gimple_vdef (stmt));
199438fd1498Szrj 		    }
199538fd1498Szrj 		  break;
199638fd1498Szrj 
199738fd1498Szrj 		CASE_CFN_POWI:
199838fd1498Szrj 		  arg0 = gimple_call_arg (stmt, 0);
199938fd1498Szrj 		  arg1 = gimple_call_arg (stmt, 1);
200038fd1498Szrj 		  loc = gimple_location (stmt);
200138fd1498Szrj 
200238fd1498Szrj 		  if (real_minus_onep (arg0))
200338fd1498Szrj 		    {
200438fd1498Szrj                       tree t0, t1, cond, one, minus_one;
200538fd1498Szrj 		      gassign *stmt;
200638fd1498Szrj 
200738fd1498Szrj 		      t0 = TREE_TYPE (arg0);
200838fd1498Szrj 		      t1 = TREE_TYPE (arg1);
200938fd1498Szrj 		      one = build_real (t0, dconst1);
201038fd1498Szrj 		      minus_one = build_real (t0, dconstm1);
201138fd1498Szrj 
201238fd1498Szrj 		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
201338fd1498Szrj 		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
201438fd1498Szrj 						  arg1, build_int_cst (t1, 1));
201538fd1498Szrj 		      gimple_set_location (stmt, loc);
201638fd1498Szrj 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
201738fd1498Szrj 
201838fd1498Szrj 		      result = make_temp_ssa_name (t0, NULL, "powi");
201938fd1498Szrj 		      stmt = gimple_build_assign (result, COND_EXPR, cond,
202038fd1498Szrj 						  minus_one, one);
202138fd1498Szrj 		      gimple_set_location (stmt, loc);
202238fd1498Szrj 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
202338fd1498Szrj 		    }
202438fd1498Szrj 		  else
202538fd1498Szrj 		    {
202638fd1498Szrj 		      if (!tree_fits_shwi_p (arg1))
202738fd1498Szrj 			break;
202838fd1498Szrj 
202938fd1498Szrj 		      n = tree_to_shwi (arg1);
203038fd1498Szrj 		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
203138fd1498Szrj 		    }
203238fd1498Szrj 
203338fd1498Szrj 		  if (result)
203438fd1498Szrj 		    {
203538fd1498Szrj 		      tree lhs = gimple_get_lhs (stmt);
203638fd1498Szrj 		      gassign *new_stmt = gimple_build_assign (lhs, result);
203738fd1498Szrj 		      gimple_set_location (new_stmt, loc);
203838fd1498Szrj 		      unlink_stmt_vdef (stmt);
203938fd1498Szrj 		      gsi_replace (&gsi, new_stmt, true);
204038fd1498Szrj 		      cleanup_eh = true;
204138fd1498Szrj 		      if (gimple_vdef (stmt))
204238fd1498Szrj 			release_ssa_name (gimple_vdef (stmt));
204338fd1498Szrj 		    }
204438fd1498Szrj 		  break;
204538fd1498Szrj 
204638fd1498Szrj 		CASE_CFN_CABS:
204738fd1498Szrj 		  arg0 = gimple_call_arg (stmt, 0);
204838fd1498Szrj 		  loc = gimple_location (stmt);
204938fd1498Szrj 		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
205038fd1498Szrj 
205138fd1498Szrj 		  if (result)
205238fd1498Szrj 		    {
205338fd1498Szrj 		      tree lhs = gimple_get_lhs (stmt);
205438fd1498Szrj 		      gassign *new_stmt = gimple_build_assign (lhs, result);
205538fd1498Szrj 		      gimple_set_location (new_stmt, loc);
205638fd1498Szrj 		      unlink_stmt_vdef (stmt);
205738fd1498Szrj 		      gsi_replace (&gsi, new_stmt, true);
205838fd1498Szrj 		      cleanup_eh = true;
205938fd1498Szrj 		      if (gimple_vdef (stmt))
206038fd1498Szrj 			release_ssa_name (gimple_vdef (stmt));
206138fd1498Szrj 		    }
206238fd1498Szrj 		  break;
206338fd1498Szrj 
206438fd1498Szrj 		default:;
206538fd1498Szrj 		}
206638fd1498Szrj 	    }
206738fd1498Szrj 	}
206838fd1498Szrj       if (cleanup_eh)
206938fd1498Szrj 	cfg_changed |= gimple_purge_dead_eh_edges (bb);
207038fd1498Szrj     }
207138fd1498Szrj 
207238fd1498Szrj   statistics_counter_event (fun, "sincos statements inserted",
207338fd1498Szrj 			    sincos_stats.inserted);
207438fd1498Szrj 
207538fd1498Szrj   return cfg_changed ? TODO_cleanup_cfg : 0;
207638fd1498Szrj }
207738fd1498Szrj 
207838fd1498Szrj } // anon namespace
207938fd1498Szrj 
208038fd1498Szrj gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)208138fd1498Szrj make_pass_cse_sincos (gcc::context *ctxt)
208238fd1498Szrj {
208338fd1498Szrj   return new pass_cse_sincos (ctxt);
208438fd1498Szrj }
208538fd1498Szrj 
208638fd1498Szrj /* Return true if stmt is a type conversion operation that can be stripped
208738fd1498Szrj    when used in a widening multiply operation.  */
208838fd1498Szrj static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)208938fd1498Szrj widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
209038fd1498Szrj {
209138fd1498Szrj   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
209238fd1498Szrj 
209338fd1498Szrj   if (TREE_CODE (result_type) == INTEGER_TYPE)
209438fd1498Szrj     {
209538fd1498Szrj       tree op_type;
209638fd1498Szrj       tree inner_op_type;
209738fd1498Szrj 
209838fd1498Szrj       if (!CONVERT_EXPR_CODE_P (rhs_code))
209938fd1498Szrj 	return false;
210038fd1498Szrj 
210138fd1498Szrj       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
210238fd1498Szrj 
210338fd1498Szrj       /* If the type of OP has the same precision as the result, then
210438fd1498Szrj 	 we can strip this conversion.  The multiply operation will be
210538fd1498Szrj 	 selected to create the correct extension as a by-product.  */
210638fd1498Szrj       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
210738fd1498Szrj 	return true;
210838fd1498Szrj 
210938fd1498Szrj       /* We can also strip a conversion if it preserves the signed-ness of
211038fd1498Szrj 	 the operation and doesn't narrow the range.  */
211138fd1498Szrj       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
211238fd1498Szrj 
211338fd1498Szrj       /* If the inner-most type is unsigned, then we can strip any
211438fd1498Szrj 	 intermediate widening operation.  If it's signed, then the
211538fd1498Szrj 	 intermediate widening operation must also be signed.  */
211638fd1498Szrj       if ((TYPE_UNSIGNED (inner_op_type)
211738fd1498Szrj 	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
211838fd1498Szrj 	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
211938fd1498Szrj 	return true;
212038fd1498Szrj 
212138fd1498Szrj       return false;
212238fd1498Szrj     }
212338fd1498Szrj 
212438fd1498Szrj   return rhs_code == FIXED_CONVERT_EXPR;
212538fd1498Szrj }
212638fd1498Szrj 
212738fd1498Szrj /* Return true if RHS is a suitable operand for a widening multiplication,
212838fd1498Szrj    assuming a target type of TYPE.
212938fd1498Szrj    There are two cases:
213038fd1498Szrj 
213138fd1498Szrj      - RHS makes some value at least twice as wide.  Store that value
213238fd1498Szrj        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
213338fd1498Szrj 
213438fd1498Szrj      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
213538fd1498Szrj        but leave *TYPE_OUT untouched.  */
213638fd1498Szrj 
213738fd1498Szrj static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)213838fd1498Szrj is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
213938fd1498Szrj 			tree *new_rhs_out)
214038fd1498Szrj {
214138fd1498Szrj   gimple *stmt;
214238fd1498Szrj   tree type1, rhs1;
214338fd1498Szrj 
214438fd1498Szrj   if (TREE_CODE (rhs) == SSA_NAME)
214538fd1498Szrj     {
214638fd1498Szrj       stmt = SSA_NAME_DEF_STMT (rhs);
214738fd1498Szrj       if (is_gimple_assign (stmt))
214838fd1498Szrj 	{
214938fd1498Szrj 	  if (! widening_mult_conversion_strippable_p (type, stmt))
215038fd1498Szrj 	    rhs1 = rhs;
215138fd1498Szrj 	  else
215238fd1498Szrj 	    {
215338fd1498Szrj 	      rhs1 = gimple_assign_rhs1 (stmt);
215438fd1498Szrj 
215538fd1498Szrj 	      if (TREE_CODE (rhs1) == INTEGER_CST)
215638fd1498Szrj 		{
215738fd1498Szrj 		  *new_rhs_out = rhs1;
215838fd1498Szrj 		  *type_out = NULL;
215938fd1498Szrj 		  return true;
216038fd1498Szrj 		}
216138fd1498Szrj 	    }
216238fd1498Szrj 	}
216338fd1498Szrj       else
216438fd1498Szrj 	rhs1 = rhs;
216538fd1498Szrj 
216638fd1498Szrj       type1 = TREE_TYPE (rhs1);
216738fd1498Szrj 
216838fd1498Szrj       if (TREE_CODE (type1) != TREE_CODE (type)
216938fd1498Szrj 	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
217038fd1498Szrj 	return false;
217138fd1498Szrj 
217238fd1498Szrj       *new_rhs_out = rhs1;
217338fd1498Szrj       *type_out = type1;
217438fd1498Szrj       return true;
217538fd1498Szrj     }
217638fd1498Szrj 
217738fd1498Szrj   if (TREE_CODE (rhs) == INTEGER_CST)
217838fd1498Szrj     {
217938fd1498Szrj       *new_rhs_out = rhs;
218038fd1498Szrj       *type_out = NULL;
218138fd1498Szrj       return true;
218238fd1498Szrj     }
218338fd1498Szrj 
218438fd1498Szrj   return false;
218538fd1498Szrj }
218638fd1498Szrj 
218738fd1498Szrj /* Return true if STMT performs a widening multiplication, assuming the
218838fd1498Szrj    output type is TYPE.  If so, store the unwidened types of the operands
218938fd1498Szrj    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
219038fd1498Szrj    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
219138fd1498Szrj    and *TYPE2_OUT would give the operands of the multiplication.  */
219238fd1498Szrj 
219338fd1498Szrj static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)219438fd1498Szrj is_widening_mult_p (gimple *stmt,
219538fd1498Szrj 		    tree *type1_out, tree *rhs1_out,
219638fd1498Szrj 		    tree *type2_out, tree *rhs2_out)
219738fd1498Szrj {
219838fd1498Szrj   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
219938fd1498Szrj 
220038fd1498Szrj   if (TREE_CODE (type) == INTEGER_TYPE)
220138fd1498Szrj     {
220238fd1498Szrj       if (TYPE_OVERFLOW_TRAPS (type))
220338fd1498Szrj 	return false;
220438fd1498Szrj     }
220538fd1498Szrj   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
220638fd1498Szrj     return false;
220738fd1498Szrj 
220838fd1498Szrj   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
220938fd1498Szrj 			       rhs1_out))
221038fd1498Szrj     return false;
221138fd1498Szrj 
221238fd1498Szrj   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
221338fd1498Szrj 			       rhs2_out))
221438fd1498Szrj     return false;
221538fd1498Szrj 
221638fd1498Szrj   if (*type1_out == NULL)
221738fd1498Szrj     {
221838fd1498Szrj       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
221938fd1498Szrj 	return false;
222038fd1498Szrj       *type1_out = *type2_out;
222138fd1498Szrj     }
222238fd1498Szrj 
222338fd1498Szrj   if (*type2_out == NULL)
222438fd1498Szrj     {
222538fd1498Szrj       if (!int_fits_type_p (*rhs2_out, *type1_out))
222638fd1498Szrj 	return false;
222738fd1498Szrj       *type2_out = *type1_out;
222838fd1498Szrj     }
222938fd1498Szrj 
223038fd1498Szrj   /* Ensure that the larger of the two operands comes first. */
223138fd1498Szrj   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
223238fd1498Szrj     {
223338fd1498Szrj       std::swap (*type1_out, *type2_out);
223438fd1498Szrj       std::swap (*rhs1_out, *rhs2_out);
223538fd1498Szrj     }
223638fd1498Szrj 
223738fd1498Szrj   return true;
223838fd1498Szrj }
223938fd1498Szrj 
224038fd1498Szrj /* Check to see if the CALL statement is an invocation of copysign
224138fd1498Szrj    with 1. being the first argument.  */
224238fd1498Szrj static bool
is_copysign_call_with_1(gimple * call)224338fd1498Szrj is_copysign_call_with_1 (gimple *call)
224438fd1498Szrj {
224538fd1498Szrj   gcall *c = dyn_cast <gcall *> (call);
224638fd1498Szrj   if (! c)
224738fd1498Szrj     return false;
224838fd1498Szrj 
224938fd1498Szrj   enum combined_fn code = gimple_call_combined_fn (c);
225038fd1498Szrj 
225138fd1498Szrj   if (code == CFN_LAST)
225238fd1498Szrj     return false;
225338fd1498Szrj 
225438fd1498Szrj   if (builtin_fn_p (code))
225538fd1498Szrj     {
225638fd1498Szrj       switch (as_builtin_fn (code))
225738fd1498Szrj 	{
225838fd1498Szrj 	CASE_FLT_FN (BUILT_IN_COPYSIGN):
225938fd1498Szrj 	CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
226038fd1498Szrj 	  return real_onep (gimple_call_arg (c, 0));
226138fd1498Szrj 	default:
226238fd1498Szrj 	  return false;
226338fd1498Szrj 	}
226438fd1498Szrj     }
226538fd1498Szrj 
226638fd1498Szrj   if (internal_fn_p (code))
226738fd1498Szrj     {
226838fd1498Szrj       switch (as_internal_fn (code))
226938fd1498Szrj 	{
227038fd1498Szrj 	case IFN_COPYSIGN:
227138fd1498Szrj 	  return real_onep (gimple_call_arg (c, 0));
227238fd1498Szrj 	default:
227338fd1498Szrj 	  return false;
227438fd1498Szrj 	}
227538fd1498Szrj     }
227638fd1498Szrj 
227738fd1498Szrj    return false;
227838fd1498Szrj }
227938fd1498Szrj 
228038fd1498Szrj /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
228138fd1498Szrj    This only happens when the the xorsign optab is defined, if the
228238fd1498Szrj    pattern is not a xorsign pattern or if expansion fails FALSE is
228338fd1498Szrj    returned, otherwise TRUE is returned.  */
228438fd1498Szrj static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)228538fd1498Szrj convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
228638fd1498Szrj {
228738fd1498Szrj   tree treeop0, treeop1, lhs, type;
228838fd1498Szrj   location_t loc = gimple_location (stmt);
228938fd1498Szrj   lhs = gimple_assign_lhs (stmt);
229038fd1498Szrj   treeop0 = gimple_assign_rhs1 (stmt);
229138fd1498Szrj   treeop1 = gimple_assign_rhs2 (stmt);
229238fd1498Szrj   type = TREE_TYPE (lhs);
229338fd1498Szrj   machine_mode mode = TYPE_MODE (type);
229438fd1498Szrj 
229538fd1498Szrj   if (HONOR_SNANS (type))
229638fd1498Szrj     return false;
229738fd1498Szrj 
229838fd1498Szrj   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
229938fd1498Szrj     {
230038fd1498Szrj       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
230138fd1498Szrj       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
230238fd1498Szrj 	{
230338fd1498Szrj 	  call0 = SSA_NAME_DEF_STMT (treeop1);
230438fd1498Szrj 	  if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
230538fd1498Szrj 	    return false;
230638fd1498Szrj 
230738fd1498Szrj 	  treeop1 = treeop0;
230838fd1498Szrj 	}
230938fd1498Szrj 	if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
231038fd1498Szrj 	  return false;
231138fd1498Szrj 
231238fd1498Szrj 	gcall *c = as_a<gcall*> (call0);
231338fd1498Szrj 	treeop0 = gimple_call_arg (c, 1);
231438fd1498Szrj 
231538fd1498Szrj 	gcall *call_stmt
231638fd1498Szrj 	  = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
231738fd1498Szrj 	gimple_set_lhs (call_stmt, lhs);
231838fd1498Szrj 	gimple_set_location (call_stmt, loc);
231938fd1498Szrj 	gsi_replace (gsi, call_stmt, true);
232038fd1498Szrj 	return true;
232138fd1498Szrj     }
232238fd1498Szrj 
232338fd1498Szrj   return false;
232438fd1498Szrj }
232538fd1498Szrj 
232638fd1498Szrj /* Process a single gimple statement STMT, which has a MULT_EXPR as
232738fd1498Szrj    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
232838fd1498Szrj    value is true iff we converted the statement.  */
232938fd1498Szrj 
233038fd1498Szrj static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)233138fd1498Szrj convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
233238fd1498Szrj {
233338fd1498Szrj   tree lhs, rhs1, rhs2, type, type1, type2;
233438fd1498Szrj   enum insn_code handler;
233538fd1498Szrj   scalar_int_mode to_mode, from_mode, actual_mode;
233638fd1498Szrj   optab op;
233738fd1498Szrj   int actual_precision;
233838fd1498Szrj   location_t loc = gimple_location (stmt);
233938fd1498Szrj   bool from_unsigned1, from_unsigned2;
234038fd1498Szrj 
234138fd1498Szrj   lhs = gimple_assign_lhs (stmt);
234238fd1498Szrj   type = TREE_TYPE (lhs);
234338fd1498Szrj   if (TREE_CODE (type) != INTEGER_TYPE)
234438fd1498Szrj     return false;
234538fd1498Szrj 
234638fd1498Szrj   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
234738fd1498Szrj     return false;
234838fd1498Szrj 
234938fd1498Szrj   to_mode = SCALAR_INT_TYPE_MODE (type);
235038fd1498Szrj   from_mode = SCALAR_INT_TYPE_MODE (type1);
235138fd1498Szrj   if (to_mode == from_mode)
235238fd1498Szrj     return false;
235338fd1498Szrj 
235438fd1498Szrj   from_unsigned1 = TYPE_UNSIGNED (type1);
235538fd1498Szrj   from_unsigned2 = TYPE_UNSIGNED (type2);
235638fd1498Szrj 
235738fd1498Szrj   if (from_unsigned1 && from_unsigned2)
235838fd1498Szrj     op = umul_widen_optab;
235938fd1498Szrj   else if (!from_unsigned1 && !from_unsigned2)
236038fd1498Szrj     op = smul_widen_optab;
236138fd1498Szrj   else
236238fd1498Szrj     op = usmul_widen_optab;
236338fd1498Szrj 
236438fd1498Szrj   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
236538fd1498Szrj 						  &actual_mode);
236638fd1498Szrj 
236738fd1498Szrj   if (handler == CODE_FOR_nothing)
236838fd1498Szrj     {
236938fd1498Szrj       if (op != smul_widen_optab)
237038fd1498Szrj 	{
237138fd1498Szrj 	  /* We can use a signed multiply with unsigned types as long as
237238fd1498Szrj 	     there is a wider mode to use, or it is the smaller of the two
237338fd1498Szrj 	     types that is unsigned.  Note that type1 >= type2, always.  */
237438fd1498Szrj 	  if ((TYPE_UNSIGNED (type1)
237538fd1498Szrj 	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
237638fd1498Szrj 	      || (TYPE_UNSIGNED (type2)
237738fd1498Szrj 		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
237838fd1498Szrj 	    {
237938fd1498Szrj 	      if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
238038fd1498Szrj 		  || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
238138fd1498Szrj 		return false;
238238fd1498Szrj 	    }
238338fd1498Szrj 
238438fd1498Szrj 	  op = smul_widen_optab;
238538fd1498Szrj 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
238638fd1498Szrj 							  from_mode,
238738fd1498Szrj 							  &actual_mode);
238838fd1498Szrj 
238938fd1498Szrj 	  if (handler == CODE_FOR_nothing)
239038fd1498Szrj 	    return false;
239138fd1498Szrj 
239238fd1498Szrj 	  from_unsigned1 = from_unsigned2 = false;
239338fd1498Szrj 	}
239438fd1498Szrj       else
239538fd1498Szrj 	return false;
239638fd1498Szrj     }
239738fd1498Szrj 
239838fd1498Szrj   /* Ensure that the inputs to the handler are in the correct precison
239938fd1498Szrj      for the opcode.  This will be the full mode size.  */
240038fd1498Szrj   actual_precision = GET_MODE_PRECISION (actual_mode);
240138fd1498Szrj   if (2 * actual_precision > TYPE_PRECISION (type))
240238fd1498Szrj     return false;
240338fd1498Szrj   if (actual_precision != TYPE_PRECISION (type1)
240438fd1498Szrj       || from_unsigned1 != TYPE_UNSIGNED (type1))
240538fd1498Szrj     rhs1 = build_and_insert_cast (gsi, loc,
240638fd1498Szrj 				  build_nonstandard_integer_type
240738fd1498Szrj 				    (actual_precision, from_unsigned1), rhs1);
240838fd1498Szrj   if (actual_precision != TYPE_PRECISION (type2)
240938fd1498Szrj       || from_unsigned2 != TYPE_UNSIGNED (type2))
241038fd1498Szrj     rhs2 = build_and_insert_cast (gsi, loc,
241138fd1498Szrj 				  build_nonstandard_integer_type
241238fd1498Szrj 				    (actual_precision, from_unsigned2), rhs2);
241338fd1498Szrj 
241438fd1498Szrj   /* Handle constants.  */
241538fd1498Szrj   if (TREE_CODE (rhs1) == INTEGER_CST)
241638fd1498Szrj     rhs1 = fold_convert (type1, rhs1);
241738fd1498Szrj   if (TREE_CODE (rhs2) == INTEGER_CST)
241838fd1498Szrj     rhs2 = fold_convert (type2, rhs2);
241938fd1498Szrj 
242038fd1498Szrj   gimple_assign_set_rhs1 (stmt, rhs1);
242138fd1498Szrj   gimple_assign_set_rhs2 (stmt, rhs2);
242238fd1498Szrj   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
242338fd1498Szrj   update_stmt (stmt);
242438fd1498Szrj   widen_mul_stats.widen_mults_inserted++;
242538fd1498Szrj   return true;
242638fd1498Szrj }
242738fd1498Szrj 
242838fd1498Szrj /* Process a single gimple statement STMT, which is found at the
242938fd1498Szrj    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
243038fd1498Szrj    rhs (given by CODE), and try to convert it into a
243138fd1498Szrj    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
243238fd1498Szrj    is true iff we converted the statement.  */
243338fd1498Szrj 
243438fd1498Szrj static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)243538fd1498Szrj convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
243638fd1498Szrj 			    enum tree_code code)
243738fd1498Szrj {
243838fd1498Szrj   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
243938fd1498Szrj   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
244038fd1498Szrj   tree type, type1, type2, optype;
244138fd1498Szrj   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
244238fd1498Szrj   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
244338fd1498Szrj   optab this_optab;
244438fd1498Szrj   enum tree_code wmult_code;
244538fd1498Szrj   enum insn_code handler;
244638fd1498Szrj   scalar_mode to_mode, from_mode, actual_mode;
244738fd1498Szrj   location_t loc = gimple_location (stmt);
244838fd1498Szrj   int actual_precision;
244938fd1498Szrj   bool from_unsigned1, from_unsigned2;
245038fd1498Szrj 
245138fd1498Szrj   lhs = gimple_assign_lhs (stmt);
245238fd1498Szrj   type = TREE_TYPE (lhs);
245338fd1498Szrj   if (TREE_CODE (type) != INTEGER_TYPE
245438fd1498Szrj       && TREE_CODE (type) != FIXED_POINT_TYPE)
245538fd1498Szrj     return false;
245638fd1498Szrj 
245738fd1498Szrj   if (code == MINUS_EXPR)
245838fd1498Szrj     wmult_code = WIDEN_MULT_MINUS_EXPR;
245938fd1498Szrj   else
246038fd1498Szrj     wmult_code = WIDEN_MULT_PLUS_EXPR;
246138fd1498Szrj 
246238fd1498Szrj   rhs1 = gimple_assign_rhs1 (stmt);
246338fd1498Szrj   rhs2 = gimple_assign_rhs2 (stmt);
246438fd1498Szrj 
246538fd1498Szrj   if (TREE_CODE (rhs1) == SSA_NAME)
246638fd1498Szrj     {
246738fd1498Szrj       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
246838fd1498Szrj       if (is_gimple_assign (rhs1_stmt))
246938fd1498Szrj 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
247038fd1498Szrj     }
247138fd1498Szrj 
247238fd1498Szrj   if (TREE_CODE (rhs2) == SSA_NAME)
247338fd1498Szrj     {
247438fd1498Szrj       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
247538fd1498Szrj       if (is_gimple_assign (rhs2_stmt))
247638fd1498Szrj 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
247738fd1498Szrj     }
247838fd1498Szrj 
247938fd1498Szrj   /* Allow for one conversion statement between the multiply
248038fd1498Szrj      and addition/subtraction statement.  If there are more than
248138fd1498Szrj      one conversions then we assume they would invalidate this
248238fd1498Szrj      transformation.  If that's not the case then they should have
248338fd1498Szrj      been folded before now.  */
248438fd1498Szrj   if (CONVERT_EXPR_CODE_P (rhs1_code))
248538fd1498Szrj     {
248638fd1498Szrj       conv1_stmt = rhs1_stmt;
248738fd1498Szrj       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
248838fd1498Szrj       if (TREE_CODE (rhs1) == SSA_NAME)
248938fd1498Szrj 	{
249038fd1498Szrj 	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
249138fd1498Szrj 	  if (is_gimple_assign (rhs1_stmt))
249238fd1498Szrj 	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
249338fd1498Szrj 	}
249438fd1498Szrj       else
249538fd1498Szrj 	return false;
249638fd1498Szrj     }
249738fd1498Szrj   if (CONVERT_EXPR_CODE_P (rhs2_code))
249838fd1498Szrj     {
249938fd1498Szrj       conv2_stmt = rhs2_stmt;
250038fd1498Szrj       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
250138fd1498Szrj       if (TREE_CODE (rhs2) == SSA_NAME)
250238fd1498Szrj 	{
250338fd1498Szrj 	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
250438fd1498Szrj 	  if (is_gimple_assign (rhs2_stmt))
250538fd1498Szrj 	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
250638fd1498Szrj 	}
250738fd1498Szrj       else
250838fd1498Szrj 	return false;
250938fd1498Szrj     }
251038fd1498Szrj 
251138fd1498Szrj   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
251238fd1498Szrj      is_widening_mult_p, but we still need the rhs returns.
251338fd1498Szrj 
251438fd1498Szrj      It might also appear that it would be sufficient to use the existing
251538fd1498Szrj      operands of the widening multiply, but that would limit the choice of
251638fd1498Szrj      multiply-and-accumulate instructions.
251738fd1498Szrj 
251838fd1498Szrj      If the widened-multiplication result has more than one uses, it is
251938fd1498Szrj      probably wiser not to do the conversion.  */
252038fd1498Szrj   if (code == PLUS_EXPR
252138fd1498Szrj       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
252238fd1498Szrj     {
252338fd1498Szrj       if (!has_single_use (rhs1)
252438fd1498Szrj 	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
252538fd1498Szrj 				  &type2, &mult_rhs2))
252638fd1498Szrj 	return false;
252738fd1498Szrj       add_rhs = rhs2;
252838fd1498Szrj       conv_stmt = conv1_stmt;
252938fd1498Szrj     }
253038fd1498Szrj   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
253138fd1498Szrj     {
253238fd1498Szrj       if (!has_single_use (rhs2)
253338fd1498Szrj 	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
253438fd1498Szrj 				  &type2, &mult_rhs2))
253538fd1498Szrj 	return false;
253638fd1498Szrj       add_rhs = rhs1;
253738fd1498Szrj       conv_stmt = conv2_stmt;
253838fd1498Szrj     }
253938fd1498Szrj   else
254038fd1498Szrj     return false;
254138fd1498Szrj 
254238fd1498Szrj   to_mode = SCALAR_TYPE_MODE (type);
254338fd1498Szrj   from_mode = SCALAR_TYPE_MODE (type1);
254438fd1498Szrj   if (to_mode == from_mode)
254538fd1498Szrj     return false;
254638fd1498Szrj 
254738fd1498Szrj   from_unsigned1 = TYPE_UNSIGNED (type1);
254838fd1498Szrj   from_unsigned2 = TYPE_UNSIGNED (type2);
254938fd1498Szrj   optype = type1;
255038fd1498Szrj 
255138fd1498Szrj   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
255238fd1498Szrj   if (from_unsigned1 != from_unsigned2)
255338fd1498Szrj     {
255438fd1498Szrj       if (!INTEGRAL_TYPE_P (type))
255538fd1498Szrj 	return false;
255638fd1498Szrj       /* We can use a signed multiply with unsigned types as long as
255738fd1498Szrj 	 there is a wider mode to use, or it is the smaller of the two
255838fd1498Szrj 	 types that is unsigned.  Note that type1 >= type2, always.  */
255938fd1498Szrj       if ((from_unsigned1
256038fd1498Szrj 	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
256138fd1498Szrj 	  || (from_unsigned2
256238fd1498Szrj 	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
256338fd1498Szrj 	{
256438fd1498Szrj 	  if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
256538fd1498Szrj 	      || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
256638fd1498Szrj 	    return false;
256738fd1498Szrj 	}
256838fd1498Szrj 
256938fd1498Szrj       from_unsigned1 = from_unsigned2 = false;
257038fd1498Szrj       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
257138fd1498Szrj 					       false);
257238fd1498Szrj     }
257338fd1498Szrj 
257438fd1498Szrj   /* If there was a conversion between the multiply and addition
257538fd1498Szrj      then we need to make sure it fits a multiply-and-accumulate.
257638fd1498Szrj      The should be a single mode change which does not change the
257738fd1498Szrj      value.  */
257838fd1498Szrj   if (conv_stmt)
257938fd1498Szrj     {
258038fd1498Szrj       /* We use the original, unmodified data types for this.  */
258138fd1498Szrj       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
258238fd1498Szrj       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
258338fd1498Szrj       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
258438fd1498Szrj       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
258538fd1498Szrj 
258638fd1498Szrj       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
258738fd1498Szrj 	{
258838fd1498Szrj 	  /* Conversion is a truncate.  */
258938fd1498Szrj 	  if (TYPE_PRECISION (to_type) < data_size)
259038fd1498Szrj 	    return false;
259138fd1498Szrj 	}
259238fd1498Szrj       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
259338fd1498Szrj 	{
259438fd1498Szrj 	  /* Conversion is an extend.  Check it's the right sort.  */
259538fd1498Szrj 	  if (TYPE_UNSIGNED (from_type) != is_unsigned
259638fd1498Szrj 	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
259738fd1498Szrj 	    return false;
259838fd1498Szrj 	}
259938fd1498Szrj       /* else convert is a no-op for our purposes.  */
260038fd1498Szrj     }
260138fd1498Szrj 
260238fd1498Szrj   /* Verify that the machine can perform a widening multiply
260338fd1498Szrj      accumulate in this mode/signedness combination, otherwise
260438fd1498Szrj      this transformation is likely to pessimize code.  */
260538fd1498Szrj   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
260638fd1498Szrj   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
260738fd1498Szrj 						  from_mode, &actual_mode);
260838fd1498Szrj 
260938fd1498Szrj   if (handler == CODE_FOR_nothing)
261038fd1498Szrj     return false;
261138fd1498Szrj 
261238fd1498Szrj   /* Ensure that the inputs to the handler are in the correct precison
261338fd1498Szrj      for the opcode.  This will be the full mode size.  */
261438fd1498Szrj   actual_precision = GET_MODE_PRECISION (actual_mode);
261538fd1498Szrj   if (actual_precision != TYPE_PRECISION (type1)
261638fd1498Szrj       || from_unsigned1 != TYPE_UNSIGNED (type1))
261738fd1498Szrj     mult_rhs1 = build_and_insert_cast (gsi, loc,
261838fd1498Szrj 				       build_nonstandard_integer_type
261938fd1498Szrj 				         (actual_precision, from_unsigned1),
262038fd1498Szrj 				       mult_rhs1);
262138fd1498Szrj   if (actual_precision != TYPE_PRECISION (type2)
262238fd1498Szrj       || from_unsigned2 != TYPE_UNSIGNED (type2))
262338fd1498Szrj     mult_rhs2 = build_and_insert_cast (gsi, loc,
262438fd1498Szrj 				       build_nonstandard_integer_type
262538fd1498Szrj 					 (actual_precision, from_unsigned2),
262638fd1498Szrj 				       mult_rhs2);
262738fd1498Szrj 
262838fd1498Szrj   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
262938fd1498Szrj     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
263038fd1498Szrj 
263138fd1498Szrj   /* Handle constants.  */
263238fd1498Szrj   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
263338fd1498Szrj     mult_rhs1 = fold_convert (type1, mult_rhs1);
263438fd1498Szrj   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
263538fd1498Szrj     mult_rhs2 = fold_convert (type2, mult_rhs2);
263638fd1498Szrj 
263738fd1498Szrj   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
263838fd1498Szrj 				  add_rhs);
263938fd1498Szrj   update_stmt (gsi_stmt (*gsi));
264038fd1498Szrj   widen_mul_stats.maccs_inserted++;
264138fd1498Szrj   return true;
264238fd1498Szrj }
264338fd1498Szrj 
264438fd1498Szrj /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
264538fd1498Szrj    OP2 and which we know is used in statements that can be, together with the
264638fd1498Szrj    multiplication, converted to FMAs, perform the transformation.  */
264738fd1498Szrj 
264838fd1498Szrj static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)264938fd1498Szrj convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
265038fd1498Szrj {
265138fd1498Szrj   tree type = TREE_TYPE (mul_result);
265238fd1498Szrj   gimple *use_stmt;
265338fd1498Szrj   imm_use_iterator imm_iter;
265438fd1498Szrj   gassign *fma_stmt;
265538fd1498Szrj 
265638fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
265738fd1498Szrj     {
265838fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
265938fd1498Szrj       enum tree_code use_code;
266038fd1498Szrj       tree addop, mulop1 = op1, result = mul_result;
266138fd1498Szrj       bool negate_p = false;
266238fd1498Szrj 
266338fd1498Szrj       if (is_gimple_debug (use_stmt))
266438fd1498Szrj 	continue;
266538fd1498Szrj 
266638fd1498Szrj       use_code = gimple_assign_rhs_code (use_stmt);
266738fd1498Szrj       if (use_code == NEGATE_EXPR)
266838fd1498Szrj 	{
266938fd1498Szrj 	  result = gimple_assign_lhs (use_stmt);
267038fd1498Szrj 	  use_operand_p use_p;
267138fd1498Szrj 	  gimple *neguse_stmt;
267238fd1498Szrj 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
267338fd1498Szrj 	  gsi_remove (&gsi, true);
267438fd1498Szrj 	  release_defs (use_stmt);
267538fd1498Szrj 
267638fd1498Szrj 	  use_stmt = neguse_stmt;
267738fd1498Szrj 	  gsi = gsi_for_stmt (use_stmt);
267838fd1498Szrj 	  use_code = gimple_assign_rhs_code (use_stmt);
267938fd1498Szrj 	  negate_p = true;
268038fd1498Szrj 	}
268138fd1498Szrj 
268238fd1498Szrj       if (gimple_assign_rhs1 (use_stmt) == result)
268338fd1498Szrj 	{
268438fd1498Szrj 	  addop = gimple_assign_rhs2 (use_stmt);
268538fd1498Szrj 	  /* a * b - c -> a * b + (-c)  */
268638fd1498Szrj 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
268738fd1498Szrj 	    addop = force_gimple_operand_gsi (&gsi,
268838fd1498Szrj 					      build1 (NEGATE_EXPR,
268938fd1498Szrj 						      type, addop),
269038fd1498Szrj 					      true, NULL_TREE, true,
269138fd1498Szrj 					      GSI_SAME_STMT);
269238fd1498Szrj 	}
269338fd1498Szrj       else
269438fd1498Szrj 	{
269538fd1498Szrj 	  addop = gimple_assign_rhs1 (use_stmt);
269638fd1498Szrj 	  /* a - b * c -> (-b) * c + a */
269738fd1498Szrj 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
269838fd1498Szrj 	    negate_p = !negate_p;
269938fd1498Szrj 	}
270038fd1498Szrj 
270138fd1498Szrj       if (negate_p)
270238fd1498Szrj 	mulop1 = force_gimple_operand_gsi (&gsi,
270338fd1498Szrj 					   build1 (NEGATE_EXPR,
270438fd1498Szrj 						   type, mulop1),
270538fd1498Szrj 					   true, NULL_TREE, true,
270638fd1498Szrj 					   GSI_SAME_STMT);
270738fd1498Szrj 
270838fd1498Szrj       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
270938fd1498Szrj 				      FMA_EXPR, mulop1, op2, addop);
271038fd1498Szrj 
271138fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
271238fd1498Szrj 	{
271338fd1498Szrj 	  fprintf (dump_file, "Generated FMA ");
271438fd1498Szrj 	  print_gimple_stmt (dump_file, fma_stmt, 0, 0);
271538fd1498Szrj 	  fprintf (dump_file, "\n");
271638fd1498Szrj 	}
271738fd1498Szrj 
271838fd1498Szrj       gsi_replace (&gsi, fma_stmt, true);
271938fd1498Szrj       widen_mul_stats.fmas_inserted++;
272038fd1498Szrj     }
272138fd1498Szrj }
272238fd1498Szrj 
272338fd1498Szrj /* Data necessary to perform the actual transformation from a multiplication
272438fd1498Szrj    and an addition to an FMA after decision is taken it should be done and to
272538fd1498Szrj    then delete the multiplication statement from the function IL.  */
272638fd1498Szrj 
272738fd1498Szrj struct fma_transformation_info
272838fd1498Szrj {
272938fd1498Szrj   gimple *mul_stmt;
273038fd1498Szrj   tree mul_result;
273138fd1498Szrj   tree op1;
273238fd1498Szrj   tree op2;
273338fd1498Szrj };
273438fd1498Szrj 
273538fd1498Szrj /* Structure containing the current state of FMA deferring, i.e. whether we are
273638fd1498Szrj    deferring, whether to continue deferring, and all data necessary to come
273738fd1498Szrj    back and perform all deferred transformations.  */
273838fd1498Szrj 
273938fd1498Szrj class fma_deferring_state
274038fd1498Szrj {
274138fd1498Szrj public:
274238fd1498Szrj   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
274338fd1498Szrj      do any deferring.  */
274438fd1498Szrj 
fma_deferring_state(bool perform_deferring)274538fd1498Szrj   fma_deferring_state (bool perform_deferring)
274638fd1498Szrj     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
274738fd1498Szrj       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
274838fd1498Szrj 
274938fd1498Szrj   /* List of FMA candidates for which we the transformation has been determined
275038fd1498Szrj      possible but we at this point in BB analysis we do not consider them
275138fd1498Szrj      beneficial.  */
275238fd1498Szrj   auto_vec<fma_transformation_info, 8> m_candidates;
275338fd1498Szrj 
275438fd1498Szrj   /* Set of results of multiplication that are part of an already deferred FMA
275538fd1498Szrj      candidates.  */
275638fd1498Szrj   hash_set<tree> m_mul_result_set;
275738fd1498Szrj 
275838fd1498Szrj   /* The PHI that supposedly feeds back result of a FMA to another over loop
275938fd1498Szrj      boundary.  */
276038fd1498Szrj   gphi *m_initial_phi;
276138fd1498Szrj 
276238fd1498Szrj   /* Result of the last produced FMA candidate or NULL if there has not been
276338fd1498Szrj      one.  */
276438fd1498Szrj   tree m_last_result;
276538fd1498Szrj 
276638fd1498Szrj   /* If true, deferring might still be profitable.  If false, transform all
276738fd1498Szrj      candidates and no longer defer.  */
276838fd1498Szrj   bool m_deferring_p;
276938fd1498Szrj };
277038fd1498Szrj 
277138fd1498Szrj /* Transform all deferred FMA candidates and mark STATE as no longer
277238fd1498Szrj    deferring.  */
277338fd1498Szrj 
277438fd1498Szrj static void
cancel_fma_deferring(fma_deferring_state * state)277538fd1498Szrj cancel_fma_deferring (fma_deferring_state *state)
277638fd1498Szrj {
277738fd1498Szrj   if (!state->m_deferring_p)
277838fd1498Szrj     return;
277938fd1498Szrj 
278038fd1498Szrj   for (unsigned i = 0; i < state->m_candidates.length (); i++)
278138fd1498Szrj     {
278238fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
278338fd1498Szrj 	fprintf (dump_file, "Generating deferred FMA\n");
278438fd1498Szrj 
278538fd1498Szrj       const fma_transformation_info &fti = state->m_candidates[i];
278638fd1498Szrj       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
278738fd1498Szrj 
278838fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
278938fd1498Szrj       gsi_remove (&gsi, true);
279038fd1498Szrj       release_defs (fti.mul_stmt);
279138fd1498Szrj     }
279238fd1498Szrj   state->m_deferring_p = false;
279338fd1498Szrj }
279438fd1498Szrj 
279538fd1498Szrj /* If OP is an SSA name defined by a PHI node, return the PHI statement.
279638fd1498Szrj    Otherwise return NULL.  */
279738fd1498Szrj 
279838fd1498Szrj static gphi *
result_of_phi(tree op)279938fd1498Szrj result_of_phi (tree op)
280038fd1498Szrj {
280138fd1498Szrj   if (TREE_CODE (op) != SSA_NAME)
280238fd1498Szrj     return NULL;
280338fd1498Szrj 
280438fd1498Szrj   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
280538fd1498Szrj }
280638fd1498Szrj 
280738fd1498Szrj /* After processing statements of a BB and recording STATE, return true if the
280838fd1498Szrj    initial phi is fed by the last FMA candidate result ore one such result from
280938fd1498Szrj    previously processed BBs marked in LAST_RESULT_SET.  */
281038fd1498Szrj 
281138fd1498Szrj static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)281238fd1498Szrj last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
281338fd1498Szrj 				      hash_set<tree> *last_result_set)
281438fd1498Szrj {
281538fd1498Szrj   ssa_op_iter iter;
281638fd1498Szrj   use_operand_p use;
281738fd1498Szrj   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
281838fd1498Szrj     {
281938fd1498Szrj       tree t = USE_FROM_PTR (use);
282038fd1498Szrj       if (t == state->m_last_result
282138fd1498Szrj 	  || last_result_set->contains (t))
282238fd1498Szrj 	return true;
282338fd1498Szrj     }
282438fd1498Szrj 
282538fd1498Szrj   return false;
282638fd1498Szrj }
282738fd1498Szrj 
282838fd1498Szrj /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
282938fd1498Szrj    with uses in additions and subtractions to form fused multiply-add
283038fd1498Szrj    operations.  Returns true if successful and MUL_STMT should be removed.
283138fd1498Szrj 
283238fd1498Szrj    If STATE indicates that we are deferring FMA transformation, that means
283338fd1498Szrj    that we do not produce FMAs for basic blocks which look like:
283438fd1498Szrj 
283538fd1498Szrj     <bb 6>
283638fd1498Szrj     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
283738fd1498Szrj     _65 = _14 * _16;
283838fd1498Szrj     accumulator_66 = _65 + accumulator_111;
283938fd1498Szrj 
284038fd1498Szrj   or its unrolled version, i.e. with several FMA candidates that feed result
284138fd1498Szrj   of one into the addend of another.  Instead, we add them to a list in STATE
284238fd1498Szrj   and if we later discover an FMA candidate that is not part of such a chain,
284338fd1498Szrj   we go back and perform all deferred past candidates.  */
284438fd1498Szrj 
284538fd1498Szrj static bool
convert_mult_to_fma(gimple * mul_stmt,tree op1,tree op2,fma_deferring_state * state)284638fd1498Szrj convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
284738fd1498Szrj 		     fma_deferring_state *state)
284838fd1498Szrj {
284938fd1498Szrj   tree mul_result = gimple_get_lhs (mul_stmt);
285038fd1498Szrj   tree type = TREE_TYPE (mul_result);
285138fd1498Szrj   gimple *use_stmt, *neguse_stmt;
285238fd1498Szrj   use_operand_p use_p;
285338fd1498Szrj   imm_use_iterator imm_iter;
285438fd1498Szrj 
285538fd1498Szrj   if (FLOAT_TYPE_P (type)
285638fd1498Szrj       && flag_fp_contract_mode == FP_CONTRACT_OFF)
285738fd1498Szrj     return false;
285838fd1498Szrj 
285938fd1498Szrj   /* We don't want to do bitfield reduction ops.  */
286038fd1498Szrj   if (INTEGRAL_TYPE_P (type)
286138fd1498Szrj       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
286238fd1498Szrj     return false;
286338fd1498Szrj 
286438fd1498Szrj   /* If the target doesn't support it, don't generate it.  We assume that
286538fd1498Szrj      if fma isn't available then fms, fnma or fnms are not either.  */
286638fd1498Szrj   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
286738fd1498Szrj     return false;
286838fd1498Szrj 
286938fd1498Szrj   /* If the multiplication has zero uses, it is kept around probably because
287038fd1498Szrj      of -fnon-call-exceptions.  Don't optimize it away in that case,
287138fd1498Szrj      it is DCE job.  */
287238fd1498Szrj   if (has_zero_uses (mul_result))
287338fd1498Szrj     return false;
287438fd1498Szrj 
287538fd1498Szrj   bool check_defer
287638fd1498Szrj     = (state->m_deferring_p
287738fd1498Szrj        && (tree_to_shwi (TYPE_SIZE (type))
287838fd1498Szrj 	   <= PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS)));
287938fd1498Szrj   bool defer = check_defer;
288038fd1498Szrj   /* Make sure that the multiplication statement becomes dead after
288138fd1498Szrj      the transformation, thus that all uses are transformed to FMAs.
288238fd1498Szrj      This means we assume that an FMA operation has the same cost
288338fd1498Szrj      as an addition.  */
288438fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
288538fd1498Szrj     {
288638fd1498Szrj       enum tree_code use_code;
288738fd1498Szrj       tree result = mul_result;
288838fd1498Szrj       bool negate_p = false;
288938fd1498Szrj 
289038fd1498Szrj       use_stmt = USE_STMT (use_p);
289138fd1498Szrj 
289238fd1498Szrj       if (is_gimple_debug (use_stmt))
289338fd1498Szrj 	continue;
289438fd1498Szrj 
289538fd1498Szrj       /* For now restrict this operations to single basic blocks.  In theory
289638fd1498Szrj 	 we would want to support sinking the multiplication in
289738fd1498Szrj 	 m = a*b;
289838fd1498Szrj 	 if ()
289938fd1498Szrj 	   ma = m + c;
290038fd1498Szrj 	 else
290138fd1498Szrj 	   d = m;
290238fd1498Szrj 	 to form a fma in the then block and sink the multiplication to the
290338fd1498Szrj 	 else block.  */
290438fd1498Szrj       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
290538fd1498Szrj 	return false;
290638fd1498Szrj 
290738fd1498Szrj       if (!is_gimple_assign (use_stmt))
290838fd1498Szrj 	return false;
290938fd1498Szrj 
291038fd1498Szrj       use_code = gimple_assign_rhs_code (use_stmt);
291138fd1498Szrj 
291238fd1498Szrj       /* A negate on the multiplication leads to FNMA.  */
291338fd1498Szrj       if (use_code == NEGATE_EXPR)
291438fd1498Szrj 	{
291538fd1498Szrj 	  ssa_op_iter iter;
291638fd1498Szrj 	  use_operand_p usep;
291738fd1498Szrj 
291838fd1498Szrj 	  result = gimple_assign_lhs (use_stmt);
291938fd1498Szrj 
292038fd1498Szrj 	  /* Make sure the negate statement becomes dead with this
292138fd1498Szrj 	     single transformation.  */
292238fd1498Szrj 	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
292338fd1498Szrj 			       &use_p, &neguse_stmt))
292438fd1498Szrj 	    return false;
292538fd1498Szrj 
292638fd1498Szrj 	  /* Make sure the multiplication isn't also used on that stmt.  */
292738fd1498Szrj 	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
292838fd1498Szrj 	    if (USE_FROM_PTR (usep) == mul_result)
292938fd1498Szrj 	      return false;
293038fd1498Szrj 
293138fd1498Szrj 	  /* Re-validate.  */
293238fd1498Szrj 	  use_stmt = neguse_stmt;
293338fd1498Szrj 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
293438fd1498Szrj 	    return false;
293538fd1498Szrj 	  if (!is_gimple_assign (use_stmt))
293638fd1498Szrj 	    return false;
293738fd1498Szrj 
293838fd1498Szrj 	  use_code = gimple_assign_rhs_code (use_stmt);
293938fd1498Szrj 	  negate_p = true;
294038fd1498Szrj 	}
294138fd1498Szrj 
294238fd1498Szrj       switch (use_code)
294338fd1498Szrj 	{
294438fd1498Szrj 	case MINUS_EXPR:
294538fd1498Szrj 	  if (gimple_assign_rhs2 (use_stmt) == result)
294638fd1498Szrj 	    negate_p = !negate_p;
294738fd1498Szrj 	  break;
294838fd1498Szrj 	case PLUS_EXPR:
294938fd1498Szrj 	  break;
295038fd1498Szrj 	default:
295138fd1498Szrj 	  /* FMA can only be formed from PLUS and MINUS.  */
295238fd1498Szrj 	  return false;
295338fd1498Szrj 	}
295438fd1498Szrj 
295538fd1498Szrj       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
295638fd1498Szrj 	 by a MULT_EXPR that we'll visit later, we might be able to
295738fd1498Szrj 	 get a more profitable match with fnma.
295838fd1498Szrj 	 OTOH, if we don't, a negate / fma pair has likely lower latency
295938fd1498Szrj 	 that a mult / subtract pair.  */
296038fd1498Szrj       if (use_code == MINUS_EXPR && !negate_p
296138fd1498Szrj 	  && gimple_assign_rhs1 (use_stmt) == result
296238fd1498Szrj 	  && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
296338fd1498Szrj 	  && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
296438fd1498Szrj 	{
296538fd1498Szrj 	  tree rhs2 = gimple_assign_rhs2 (use_stmt);
296638fd1498Szrj 
296738fd1498Szrj 	  if (TREE_CODE (rhs2) == SSA_NAME)
296838fd1498Szrj 	    {
296938fd1498Szrj 	      gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
297038fd1498Szrj 	      if (has_single_use (rhs2)
297138fd1498Szrj 		  && is_gimple_assign (stmt2)
297238fd1498Szrj 		  && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
297338fd1498Szrj 	      return false;
297438fd1498Szrj 	    }
297538fd1498Szrj 	}
297638fd1498Szrj 
297738fd1498Szrj       tree use_rhs1 = gimple_assign_rhs1 (use_stmt);
297838fd1498Szrj       tree use_rhs2 = gimple_assign_rhs2 (use_stmt);
297938fd1498Szrj       /* We can't handle a * b + a * b.  */
298038fd1498Szrj       if (use_rhs1 == use_rhs2)
298138fd1498Szrj 	return false;
298238fd1498Szrj       /* If deferring, make sure we are not looking at an instruction that
298338fd1498Szrj 	 wouldn't have existed if we were not.  */
298438fd1498Szrj       if (state->m_deferring_p
298538fd1498Szrj 	  && (state->m_mul_result_set.contains (use_rhs1)
298638fd1498Szrj 	      || state->m_mul_result_set.contains (use_rhs2)))
298738fd1498Szrj 	return false;
298838fd1498Szrj 
298938fd1498Szrj       if (check_defer)
299038fd1498Szrj 	{
299138fd1498Szrj 	  tree use_lhs = gimple_assign_lhs (use_stmt);
299238fd1498Szrj 	  if (state->m_last_result)
299338fd1498Szrj 	    {
299438fd1498Szrj 	      if (use_rhs2 == state->m_last_result
299538fd1498Szrj 		  || use_rhs1 == state->m_last_result)
299638fd1498Szrj 		defer = true;
299738fd1498Szrj 	      else
299838fd1498Szrj 		defer = false;
299938fd1498Szrj 	    }
300038fd1498Szrj 	  else
300138fd1498Szrj 	    {
300238fd1498Szrj 	      gcc_checking_assert (!state->m_initial_phi);
300338fd1498Szrj 	      gphi *phi;
300438fd1498Szrj 	      if (use_rhs1 == result)
300538fd1498Szrj 		phi = result_of_phi (use_rhs2);
300638fd1498Szrj 	      else
300738fd1498Szrj 		{
300838fd1498Szrj 		  gcc_assert (use_rhs2 == result);
300938fd1498Szrj 		  phi = result_of_phi (use_rhs1);
301038fd1498Szrj 		}
301138fd1498Szrj 
301238fd1498Szrj 	      if (phi)
301338fd1498Szrj 		{
301438fd1498Szrj 		  state->m_initial_phi = phi;
301538fd1498Szrj 		  defer = true;
301638fd1498Szrj 		}
301738fd1498Szrj 	      else
301838fd1498Szrj 		defer = false;
301938fd1498Szrj 	    }
302038fd1498Szrj 
302138fd1498Szrj 	  state->m_last_result = use_lhs;
302238fd1498Szrj 	  check_defer = false;
302338fd1498Szrj 	}
302438fd1498Szrj       else
302538fd1498Szrj 	defer = false;
302638fd1498Szrj 
302738fd1498Szrj       /* While it is possible to validate whether or not the exact form that
302838fd1498Szrj 	 we've recognized is available in the backend, the assumption is that
302938fd1498Szrj 	 if the deferring logic above did not trigger, the transformation is
303038fd1498Szrj 	 never a loss.  For instance, suppose the target only has the plain FMA
303138fd1498Szrj 	 pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
303238fd1498Szrj 	 MUL+SUB for FMA+NEG, which is still two operations.  Consider
303338fd1498Szrj          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
303438fd1498Szrj 	 form the two NEGs are independent and could be run in parallel.  */
303538fd1498Szrj     }
303638fd1498Szrj 
303738fd1498Szrj   if (defer)
303838fd1498Szrj     {
303938fd1498Szrj       fma_transformation_info fti;
304038fd1498Szrj       fti.mul_stmt = mul_stmt;
304138fd1498Szrj       fti.mul_result = mul_result;
304238fd1498Szrj       fti.op1 = op1;
304338fd1498Szrj       fti.op2 = op2;
304438fd1498Szrj       state->m_candidates.safe_push (fti);
304538fd1498Szrj       state->m_mul_result_set.add (mul_result);
304638fd1498Szrj 
304738fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
304838fd1498Szrj 	{
304938fd1498Szrj 	  fprintf (dump_file, "Deferred generating FMA for multiplication ");
305038fd1498Szrj 	  print_gimple_stmt (dump_file, mul_stmt, 0, 0);
305138fd1498Szrj 	  fprintf (dump_file, "\n");
305238fd1498Szrj 	}
305338fd1498Szrj 
305438fd1498Szrj       return false;
305538fd1498Szrj     }
305638fd1498Szrj   else
305738fd1498Szrj     {
305838fd1498Szrj       if (state->m_deferring_p)
305938fd1498Szrj 	cancel_fma_deferring (state);
306038fd1498Szrj       convert_mult_to_fma_1 (mul_result, op1, op2);
306138fd1498Szrj       return true;
306238fd1498Szrj     }
306338fd1498Szrj }
306438fd1498Szrj 
306538fd1498Szrj 
306638fd1498Szrj /* Helper function of match_uaddsub_overflow.  Return 1
306738fd1498Szrj    if USE_STMT is unsigned overflow check ovf != 0 for
306838fd1498Szrj    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
306938fd1498Szrj    and 0 otherwise.  */
307038fd1498Szrj 
307138fd1498Szrj static int
uaddsub_overflow_check_p(gimple * stmt,gimple * use_stmt)307238fd1498Szrj uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
307338fd1498Szrj {
307438fd1498Szrj   enum tree_code ccode = ERROR_MARK;
307538fd1498Szrj   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
307638fd1498Szrj   if (gimple_code (use_stmt) == GIMPLE_COND)
307738fd1498Szrj     {
307838fd1498Szrj       ccode = gimple_cond_code (use_stmt);
307938fd1498Szrj       crhs1 = gimple_cond_lhs (use_stmt);
308038fd1498Szrj       crhs2 = gimple_cond_rhs (use_stmt);
308138fd1498Szrj     }
308238fd1498Szrj   else if (is_gimple_assign (use_stmt))
308338fd1498Szrj     {
308438fd1498Szrj       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
308538fd1498Szrj 	{
308638fd1498Szrj 	  ccode = gimple_assign_rhs_code (use_stmt);
308738fd1498Szrj 	  crhs1 = gimple_assign_rhs1 (use_stmt);
308838fd1498Szrj 	  crhs2 = gimple_assign_rhs2 (use_stmt);
308938fd1498Szrj 	}
309038fd1498Szrj       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
309138fd1498Szrj 	{
309238fd1498Szrj 	  tree cond = gimple_assign_rhs1 (use_stmt);
309338fd1498Szrj 	  if (COMPARISON_CLASS_P (cond))
309438fd1498Szrj 	    {
309538fd1498Szrj 	      ccode = TREE_CODE (cond);
309638fd1498Szrj 	      crhs1 = TREE_OPERAND (cond, 0);
309738fd1498Szrj 	      crhs2 = TREE_OPERAND (cond, 1);
309838fd1498Szrj 	    }
309938fd1498Szrj 	  else
310038fd1498Szrj 	    return 0;
310138fd1498Szrj 	}
310238fd1498Szrj       else
310338fd1498Szrj 	return 0;
310438fd1498Szrj     }
310538fd1498Szrj   else
310638fd1498Szrj     return 0;
310738fd1498Szrj 
310838fd1498Szrj   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
310938fd1498Szrj     return 0;
311038fd1498Szrj 
311138fd1498Szrj   enum tree_code code = gimple_assign_rhs_code (stmt);
311238fd1498Szrj   tree lhs = gimple_assign_lhs (stmt);
311338fd1498Szrj   tree rhs1 = gimple_assign_rhs1 (stmt);
311438fd1498Szrj   tree rhs2 = gimple_assign_rhs2 (stmt);
311538fd1498Szrj 
311638fd1498Szrj   switch (ccode)
311738fd1498Szrj     {
311838fd1498Szrj     case GT_EXPR:
311938fd1498Szrj     case LE_EXPR:
312038fd1498Szrj       /* r = a - b; r > a or r <= a
312138fd1498Szrj 	 r = a + b; a > r or a <= r or b > r or b <= r.  */
312238fd1498Szrj       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
312338fd1498Szrj 	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
312438fd1498Szrj 	      && crhs2 == lhs))
312538fd1498Szrj 	return ccode == GT_EXPR ? 1 : -1;
312638fd1498Szrj       break;
312738fd1498Szrj     case LT_EXPR:
312838fd1498Szrj     case GE_EXPR:
312938fd1498Szrj       /* r = a - b; a < r or a >= r
313038fd1498Szrj 	 r = a + b; r < a or r >= a or r < b or r >= b.  */
313138fd1498Szrj       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
313238fd1498Szrj 	  || (code == PLUS_EXPR && crhs1 == lhs
313338fd1498Szrj 	      && (crhs2 == rhs1 || crhs2 == rhs2)))
313438fd1498Szrj 	return ccode == LT_EXPR ? 1 : -1;
313538fd1498Szrj       break;
313638fd1498Szrj     default:
313738fd1498Szrj       break;
313838fd1498Szrj     }
313938fd1498Szrj   return 0;
314038fd1498Szrj }
314138fd1498Szrj 
314238fd1498Szrj /* Recognize for unsigned x
314338fd1498Szrj    x = y - z;
314438fd1498Szrj    if (x > y)
314538fd1498Szrj    where there are other uses of x and replace it with
314638fd1498Szrj    _7 = SUB_OVERFLOW (y, z);
314738fd1498Szrj    x = REALPART_EXPR <_7>;
314838fd1498Szrj    _8 = IMAGPART_EXPR <_7>;
314938fd1498Szrj    if (_8)
315038fd1498Szrj    and similarly for addition.  */
315138fd1498Szrj 
315238fd1498Szrj static bool
match_uaddsub_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)315338fd1498Szrj match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
315438fd1498Szrj 			enum tree_code code)
315538fd1498Szrj {
315638fd1498Szrj   tree lhs = gimple_assign_lhs (stmt);
315738fd1498Szrj   tree type = TREE_TYPE (lhs);
315838fd1498Szrj   use_operand_p use_p;
315938fd1498Szrj   imm_use_iterator iter;
316038fd1498Szrj   bool use_seen = false;
316138fd1498Szrj   bool ovf_use_seen = false;
316238fd1498Szrj   gimple *use_stmt;
316338fd1498Szrj 
316438fd1498Szrj   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
316538fd1498Szrj   if (!INTEGRAL_TYPE_P (type)
316638fd1498Szrj       || !TYPE_UNSIGNED (type)
316738fd1498Szrj       || has_zero_uses (lhs)
316838fd1498Szrj       || has_single_use (lhs)
316938fd1498Szrj       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
317038fd1498Szrj 			TYPE_MODE (type)) == CODE_FOR_nothing)
317138fd1498Szrj     return false;
317238fd1498Szrj 
317338fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
317438fd1498Szrj     {
317538fd1498Szrj       use_stmt = USE_STMT (use_p);
317638fd1498Szrj       if (is_gimple_debug (use_stmt))
317738fd1498Szrj 	continue;
317838fd1498Szrj 
317938fd1498Szrj       if (uaddsub_overflow_check_p (stmt, use_stmt))
318038fd1498Szrj 	ovf_use_seen = true;
318138fd1498Szrj       else
318238fd1498Szrj 	use_seen = true;
318338fd1498Szrj       if (ovf_use_seen && use_seen)
318438fd1498Szrj 	break;
318538fd1498Szrj     }
318638fd1498Szrj 
318738fd1498Szrj   if (!ovf_use_seen || !use_seen)
318838fd1498Szrj     return false;
318938fd1498Szrj 
319038fd1498Szrj   tree ctype = build_complex_type (type);
319138fd1498Szrj   tree rhs1 = gimple_assign_rhs1 (stmt);
319238fd1498Szrj   tree rhs2 = gimple_assign_rhs2 (stmt);
319338fd1498Szrj   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
319438fd1498Szrj 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
319538fd1498Szrj 					 2, rhs1, rhs2);
319638fd1498Szrj   tree ctmp = make_ssa_name (ctype);
319738fd1498Szrj   gimple_call_set_lhs (g, ctmp);
319838fd1498Szrj   gsi_insert_before (gsi, g, GSI_SAME_STMT);
319938fd1498Szrj   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
320038fd1498Szrj 				     build1 (REALPART_EXPR, type, ctmp));
320138fd1498Szrj   gsi_replace (gsi, g2, true);
320238fd1498Szrj   tree ovf = make_ssa_name (type);
320338fd1498Szrj   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
320438fd1498Szrj 			    build1 (IMAGPART_EXPR, type, ctmp));
320538fd1498Szrj   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
320638fd1498Szrj 
320738fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
320838fd1498Szrj     {
320938fd1498Szrj       if (is_gimple_debug (use_stmt))
321038fd1498Szrj 	continue;
321138fd1498Szrj 
321238fd1498Szrj       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
321338fd1498Szrj       if (ovf_use == 0)
321438fd1498Szrj 	continue;
321538fd1498Szrj       if (gimple_code (use_stmt) == GIMPLE_COND)
321638fd1498Szrj 	{
321738fd1498Szrj 	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
321838fd1498Szrj 	  gimple_cond_set_lhs (cond_stmt, ovf);
321938fd1498Szrj 	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
322038fd1498Szrj 	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
322138fd1498Szrj 	}
322238fd1498Szrj       else
322338fd1498Szrj 	{
322438fd1498Szrj 	  gcc_checking_assert (is_gimple_assign (use_stmt));
322538fd1498Szrj 	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
322638fd1498Szrj 	    {
322738fd1498Szrj 	      gimple_assign_set_rhs1 (use_stmt, ovf);
322838fd1498Szrj 	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
322938fd1498Szrj 	      gimple_assign_set_rhs_code (use_stmt,
323038fd1498Szrj 					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
323138fd1498Szrj 	    }
323238fd1498Szrj 	  else
323338fd1498Szrj 	    {
323438fd1498Szrj 	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
323538fd1498Szrj 				   == COND_EXPR);
323638fd1498Szrj 	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
323738fd1498Szrj 				  boolean_type_node, ovf,
323838fd1498Szrj 				  build_int_cst (type, 0));
323938fd1498Szrj 	      gimple_assign_set_rhs1 (use_stmt, cond);
324038fd1498Szrj 	    }
324138fd1498Szrj 	}
324238fd1498Szrj       update_stmt (use_stmt);
324338fd1498Szrj     }
324438fd1498Szrj   return true;
324538fd1498Szrj }
324638fd1498Szrj 
324738fd1498Szrj /* Return true if target has support for divmod.  */
324838fd1498Szrj 
324938fd1498Szrj static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)325038fd1498Szrj target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
325138fd1498Szrj {
325238fd1498Szrj   /* If target supports hardware divmod insn, use it for divmod.  */
325338fd1498Szrj   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
325438fd1498Szrj     return true;
325538fd1498Szrj 
325638fd1498Szrj   /* Check if libfunc for divmod is available.  */
325738fd1498Szrj   rtx libfunc = optab_libfunc (divmod_optab, mode);
325838fd1498Szrj   if (libfunc != NULL_RTX)
325938fd1498Szrj     {
326038fd1498Szrj       /* If optab_handler exists for div_optab, perhaps in a wider mode,
326138fd1498Szrj 	 we don't want to use the libfunc even if it exists for given mode.  */
326238fd1498Szrj       machine_mode div_mode;
326338fd1498Szrj       FOR_EACH_MODE_FROM (div_mode, mode)
326438fd1498Szrj 	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
326538fd1498Szrj 	  return false;
326638fd1498Szrj 
326738fd1498Szrj       return targetm.expand_divmod_libfunc != NULL;
326838fd1498Szrj     }
326938fd1498Szrj 
327038fd1498Szrj   return false;
327138fd1498Szrj }
327238fd1498Szrj 
327338fd1498Szrj /* Check if stmt is candidate for divmod transform.  */
327438fd1498Szrj 
327538fd1498Szrj static bool
divmod_candidate_p(gassign * stmt)327638fd1498Szrj divmod_candidate_p (gassign *stmt)
327738fd1498Szrj {
327838fd1498Szrj   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
327938fd1498Szrj   machine_mode mode = TYPE_MODE (type);
328038fd1498Szrj   optab divmod_optab, div_optab;
328138fd1498Szrj 
328238fd1498Szrj   if (TYPE_UNSIGNED (type))
328338fd1498Szrj     {
328438fd1498Szrj       divmod_optab = udivmod_optab;
328538fd1498Szrj       div_optab = udiv_optab;
328638fd1498Szrj     }
328738fd1498Szrj   else
328838fd1498Szrj     {
328938fd1498Szrj       divmod_optab = sdivmod_optab;
329038fd1498Szrj       div_optab = sdiv_optab;
329138fd1498Szrj     }
329238fd1498Szrj 
329338fd1498Szrj   tree op1 = gimple_assign_rhs1 (stmt);
329438fd1498Szrj   tree op2 = gimple_assign_rhs2 (stmt);
329538fd1498Szrj 
329638fd1498Szrj   /* Disable the transform if either is a constant, since division-by-constant
329738fd1498Szrj      may have specialized expansion.  */
329838fd1498Szrj   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
329938fd1498Szrj     return false;
330038fd1498Szrj 
330138fd1498Szrj   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
330238fd1498Szrj      expand using the [su]divv optabs.  */
330338fd1498Szrj   if (TYPE_OVERFLOW_TRAPS (type))
330438fd1498Szrj     return false;
330538fd1498Szrj 
330638fd1498Szrj   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
330738fd1498Szrj     return false;
330838fd1498Szrj 
330938fd1498Szrj   return true;
331038fd1498Szrj }
331138fd1498Szrj 
331238fd1498Szrj /* This function looks for:
331338fd1498Szrj    t1 = a TRUNC_DIV_EXPR b;
331438fd1498Szrj    t2 = a TRUNC_MOD_EXPR b;
331538fd1498Szrj    and transforms it to the following sequence:
331638fd1498Szrj    complex_tmp = DIVMOD (a, b);
331738fd1498Szrj    t1 = REALPART_EXPR(a);
331838fd1498Szrj    t2 = IMAGPART_EXPR(b);
331938fd1498Szrj    For conditions enabling the transform see divmod_candidate_p().
332038fd1498Szrj 
332138fd1498Szrj    The pass has three parts:
332238fd1498Szrj    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
332338fd1498Szrj       other trunc_div_expr and trunc_mod_expr stmts.
332438fd1498Szrj    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
332538fd1498Szrj       to stmts vector.
332638fd1498Szrj    3) Insert DIVMOD call just before top_stmt and update entries in
332738fd1498Szrj       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
332838fd1498Szrj       IMAGPART_EXPR for mod).  */
332938fd1498Szrj 
333038fd1498Szrj static bool
convert_to_divmod(gassign * stmt)333138fd1498Szrj convert_to_divmod (gassign *stmt)
333238fd1498Szrj {
333338fd1498Szrj   if (stmt_can_throw_internal (stmt)
333438fd1498Szrj       || !divmod_candidate_p (stmt))
333538fd1498Szrj     return false;
333638fd1498Szrj 
333738fd1498Szrj   tree op1 = gimple_assign_rhs1 (stmt);
333838fd1498Szrj   tree op2 = gimple_assign_rhs2 (stmt);
333938fd1498Szrj 
334038fd1498Szrj   imm_use_iterator use_iter;
334138fd1498Szrj   gimple *use_stmt;
334238fd1498Szrj   auto_vec<gimple *> stmts;
334338fd1498Szrj 
334438fd1498Szrj   gimple *top_stmt = stmt;
334538fd1498Szrj   basic_block top_bb = gimple_bb (stmt);
334638fd1498Szrj 
334738fd1498Szrj   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
334838fd1498Szrj      at-least stmt and possibly other trunc_div/trunc_mod stmts
334938fd1498Szrj      having same operands as stmt.  */
335038fd1498Szrj 
335138fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
335238fd1498Szrj     {
335338fd1498Szrj       if (is_gimple_assign (use_stmt)
335438fd1498Szrj 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
335538fd1498Szrj 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
335638fd1498Szrj 	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
335738fd1498Szrj 	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
335838fd1498Szrj 	{
335938fd1498Szrj 	  if (stmt_can_throw_internal (use_stmt))
336038fd1498Szrj 	    continue;
336138fd1498Szrj 
336238fd1498Szrj 	  basic_block bb = gimple_bb (use_stmt);
336338fd1498Szrj 
336438fd1498Szrj 	  if (bb == top_bb)
336538fd1498Szrj 	    {
336638fd1498Szrj 	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
336738fd1498Szrj 		top_stmt = use_stmt;
336838fd1498Szrj 	    }
336938fd1498Szrj 	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
337038fd1498Szrj 	    {
337138fd1498Szrj 	      top_bb = bb;
337238fd1498Szrj 	      top_stmt = use_stmt;
337338fd1498Szrj 	    }
337438fd1498Szrj 	}
337538fd1498Szrj     }
337638fd1498Szrj 
337738fd1498Szrj   tree top_op1 = gimple_assign_rhs1 (top_stmt);
337838fd1498Szrj   tree top_op2 = gimple_assign_rhs2 (top_stmt);
337938fd1498Szrj 
338038fd1498Szrj   stmts.safe_push (top_stmt);
338138fd1498Szrj   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
338238fd1498Szrj 
338338fd1498Szrj   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
338438fd1498Szrj      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
338538fd1498Szrj      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
338638fd1498Szrj      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
338738fd1498Szrj 
338838fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
338938fd1498Szrj     {
339038fd1498Szrj       if (is_gimple_assign (use_stmt)
339138fd1498Szrj 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
339238fd1498Szrj 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
339338fd1498Szrj 	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
339438fd1498Szrj 	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
339538fd1498Szrj 	{
339638fd1498Szrj 	  if (use_stmt == top_stmt
339738fd1498Szrj 	      || stmt_can_throw_internal (use_stmt)
339838fd1498Szrj 	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
339938fd1498Szrj 	    continue;
340038fd1498Szrj 
340138fd1498Szrj 	  stmts.safe_push (use_stmt);
340238fd1498Szrj 	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
340338fd1498Szrj 	    div_seen = true;
340438fd1498Szrj 	}
340538fd1498Szrj     }
340638fd1498Szrj 
340738fd1498Szrj   if (!div_seen)
340838fd1498Szrj     return false;
340938fd1498Szrj 
341038fd1498Szrj   /* Part 3: Create libcall to internal fn DIVMOD:
341138fd1498Szrj      divmod_tmp = DIVMOD (op1, op2).  */
341238fd1498Szrj 
341338fd1498Szrj   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
341438fd1498Szrj   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
341538fd1498Szrj 				 call_stmt, "divmod_tmp");
341638fd1498Szrj   gimple_call_set_lhs (call_stmt, res);
341738fd1498Szrj   /* We rejected throwing statements above.  */
341838fd1498Szrj   gimple_call_set_nothrow (call_stmt, true);
341938fd1498Szrj 
342038fd1498Szrj   /* Insert the call before top_stmt.  */
342138fd1498Szrj   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
342238fd1498Szrj   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
342338fd1498Szrj 
342438fd1498Szrj   widen_mul_stats.divmod_calls_inserted++;
342538fd1498Szrj 
342638fd1498Szrj   /* Update all statements in stmts vector:
342738fd1498Szrj      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
342838fd1498Szrj      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
342938fd1498Szrj 
343038fd1498Szrj   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
343138fd1498Szrj     {
343238fd1498Szrj       tree new_rhs;
343338fd1498Szrj 
343438fd1498Szrj       switch (gimple_assign_rhs_code (use_stmt))
343538fd1498Szrj 	{
343638fd1498Szrj 	  case TRUNC_DIV_EXPR:
343738fd1498Szrj 	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
343838fd1498Szrj 	    break;
343938fd1498Szrj 
344038fd1498Szrj 	  case TRUNC_MOD_EXPR:
344138fd1498Szrj 	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
344238fd1498Szrj 	    break;
344338fd1498Szrj 
344438fd1498Szrj 	  default:
344538fd1498Szrj 	    gcc_unreachable ();
344638fd1498Szrj 	}
344738fd1498Szrj 
344838fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
344938fd1498Szrj       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
345038fd1498Szrj       update_stmt (use_stmt);
345138fd1498Szrj     }
345238fd1498Szrj 
345338fd1498Szrj   return true;
345438fd1498Szrj }
345538fd1498Szrj 
345638fd1498Szrj /* Find integer multiplications where the operands are extended from
345738fd1498Szrj    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
345838fd1498Szrj    where appropriate.  */
345938fd1498Szrj 
346038fd1498Szrj namespace {
346138fd1498Szrj 
346238fd1498Szrj const pass_data pass_data_optimize_widening_mul =
346338fd1498Szrj {
346438fd1498Szrj   GIMPLE_PASS, /* type */
346538fd1498Szrj   "widening_mul", /* name */
346638fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
346738fd1498Szrj   TV_TREE_WIDEN_MUL, /* tv_id */
346838fd1498Szrj   PROP_ssa, /* properties_required */
346938fd1498Szrj   0, /* properties_provided */
347038fd1498Szrj   0, /* properties_destroyed */
347138fd1498Szrj   0, /* todo_flags_start */
347238fd1498Szrj   TODO_update_ssa, /* todo_flags_finish */
347338fd1498Szrj };
347438fd1498Szrj 
347538fd1498Szrj class pass_optimize_widening_mul : public gimple_opt_pass
347638fd1498Szrj {
347738fd1498Szrj public:
pass_optimize_widening_mul(gcc::context * ctxt)347838fd1498Szrj   pass_optimize_widening_mul (gcc::context *ctxt)
347938fd1498Szrj     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
348038fd1498Szrj   {}
348138fd1498Szrj 
348238fd1498Szrj   /* opt_pass methods: */
gate(function *)348338fd1498Szrj   virtual bool gate (function *)
348438fd1498Szrj     {
348538fd1498Szrj       return flag_expensive_optimizations && optimize;
348638fd1498Szrj     }
348738fd1498Szrj 
348838fd1498Szrj   virtual unsigned int execute (function *);
348938fd1498Szrj 
349038fd1498Szrj }; // class pass_optimize_widening_mul
349138fd1498Szrj 
349238fd1498Szrj /* Walker class to perform the transformation in reverse dominance order. */
349338fd1498Szrj 
349438fd1498Szrj class math_opts_dom_walker : public dom_walker
349538fd1498Szrj {
349638fd1498Szrj public:
349738fd1498Szrj   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
349838fd1498Szrj      if walking modidifes the CFG.  */
349938fd1498Szrj 
math_opts_dom_walker(bool * cfg_changed_p)350038fd1498Szrj   math_opts_dom_walker (bool *cfg_changed_p)
350138fd1498Szrj     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
350238fd1498Szrj       m_cfg_changed_p (cfg_changed_p) {}
350338fd1498Szrj 
350438fd1498Szrj   /* The actual actions performed in the walk.  */
350538fd1498Szrj 
350638fd1498Szrj   virtual void after_dom_children (basic_block);
350738fd1498Szrj 
350838fd1498Szrj   /* Set of results of chains of multiply and add statement combinations that
350938fd1498Szrj      were not transformed into FMAs because of active deferring.  */
351038fd1498Szrj   hash_set<tree> m_last_result_set;
351138fd1498Szrj 
351238fd1498Szrj   /* Pointer to a flag of the user that needs to be set if CFG has been
351338fd1498Szrj      modified.  */
351438fd1498Szrj   bool *m_cfg_changed_p;
351538fd1498Szrj };
351638fd1498Szrj 
351738fd1498Szrj void
after_dom_children(basic_block bb)351838fd1498Szrj math_opts_dom_walker::after_dom_children (basic_block bb)
351938fd1498Szrj {
352038fd1498Szrj   gimple_stmt_iterator gsi;
352138fd1498Szrj 
352238fd1498Szrj   fma_deferring_state fma_state (PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS) > 0);
352338fd1498Szrj 
352438fd1498Szrj   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
352538fd1498Szrj     {
352638fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
352738fd1498Szrj       enum tree_code code;
352838fd1498Szrj 
352938fd1498Szrj       if (is_gimple_assign (stmt))
353038fd1498Szrj 	{
353138fd1498Szrj 	  code = gimple_assign_rhs_code (stmt);
353238fd1498Szrj 	  switch (code)
353338fd1498Szrj 	    {
353438fd1498Szrj 	    case MULT_EXPR:
353538fd1498Szrj 	      if (!convert_mult_to_widen (stmt, &gsi)
353638fd1498Szrj 		  && !convert_expand_mult_copysign (stmt, &gsi)
353738fd1498Szrj 		  && convert_mult_to_fma (stmt,
353838fd1498Szrj 					  gimple_assign_rhs1 (stmt),
353938fd1498Szrj 					  gimple_assign_rhs2 (stmt),
354038fd1498Szrj 					  &fma_state))
354138fd1498Szrj 		{
354238fd1498Szrj 		  gsi_remove (&gsi, true);
354338fd1498Szrj 		  release_defs (stmt);
354438fd1498Szrj 		  continue;
354538fd1498Szrj 		}
354638fd1498Szrj 	      break;
354738fd1498Szrj 
354838fd1498Szrj 	    case PLUS_EXPR:
354938fd1498Szrj 	    case MINUS_EXPR:
355038fd1498Szrj 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
355138fd1498Szrj 		match_uaddsub_overflow (&gsi, stmt, code);
355238fd1498Szrj 	      break;
355338fd1498Szrj 
355438fd1498Szrj 	    case TRUNC_MOD_EXPR:
355538fd1498Szrj 	      convert_to_divmod (as_a<gassign *> (stmt));
355638fd1498Szrj 	      break;
355738fd1498Szrj 
355838fd1498Szrj 	    default:;
355938fd1498Szrj 	    }
356038fd1498Szrj 	}
356138fd1498Szrj       else if (is_gimple_call (stmt))
356238fd1498Szrj 	{
356338fd1498Szrj 	  tree fndecl = gimple_call_fndecl (stmt);
356438fd1498Szrj 	  if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
356538fd1498Szrj 	    {
356638fd1498Szrj 	      switch (DECL_FUNCTION_CODE (fndecl))
356738fd1498Szrj 		{
356838fd1498Szrj 		case BUILT_IN_POWF:
356938fd1498Szrj 		case BUILT_IN_POW:
357038fd1498Szrj 		case BUILT_IN_POWL:
357138fd1498Szrj 		  if (gimple_call_lhs (stmt)
357238fd1498Szrj 		      && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
357338fd1498Szrj 		      && real_equal
357438fd1498Szrj 		      (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
357538fd1498Szrj 		       &dconst2)
357638fd1498Szrj 		      && convert_mult_to_fma (stmt,
357738fd1498Szrj 					      gimple_call_arg (stmt, 0),
357838fd1498Szrj 					      gimple_call_arg (stmt, 0),
357938fd1498Szrj 					      &fma_state))
358038fd1498Szrj 		    {
358138fd1498Szrj 		      unlink_stmt_vdef (stmt);
358238fd1498Szrj 		      if (gsi_remove (&gsi, true)
358338fd1498Szrj 			  && gimple_purge_dead_eh_edges (bb))
358438fd1498Szrj 			*m_cfg_changed_p = true;
358538fd1498Szrj 		      release_defs (stmt);
358638fd1498Szrj 		      continue;
358738fd1498Szrj 		    }
358838fd1498Szrj 		  break;
358938fd1498Szrj 
359038fd1498Szrj 		default:;
359138fd1498Szrj 		}
359238fd1498Szrj 	    }
359338fd1498Szrj 	  else
359438fd1498Szrj 	    cancel_fma_deferring (&fma_state);
359538fd1498Szrj 	}
359638fd1498Szrj       gsi_next (&gsi);
359738fd1498Szrj     }
359838fd1498Szrj   if (fma_state.m_deferring_p
359938fd1498Szrj       && fma_state.m_initial_phi)
360038fd1498Szrj     {
360138fd1498Szrj       gcc_checking_assert (fma_state.m_last_result);
360238fd1498Szrj       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
360338fd1498Szrj 						 &m_last_result_set))
360438fd1498Szrj 	cancel_fma_deferring (&fma_state);
360538fd1498Szrj       else
360638fd1498Szrj 	m_last_result_set.add (fma_state.m_last_result);
360738fd1498Szrj     }
360838fd1498Szrj }
360938fd1498Szrj 
361038fd1498Szrj 
361138fd1498Szrj unsigned int
execute(function * fun)361238fd1498Szrj pass_optimize_widening_mul::execute (function *fun)
361338fd1498Szrj {
361438fd1498Szrj   bool cfg_changed = false;
361538fd1498Szrj 
361638fd1498Szrj   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
361738fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
361838fd1498Szrj   renumber_gimple_stmt_uids ();
361938fd1498Szrj 
362038fd1498Szrj   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
362138fd1498Szrj 
362238fd1498Szrj   statistics_counter_event (fun, "widening multiplications inserted",
362338fd1498Szrj 			    widen_mul_stats.widen_mults_inserted);
362438fd1498Szrj   statistics_counter_event (fun, "widening maccs inserted",
362538fd1498Szrj 			    widen_mul_stats.maccs_inserted);
362638fd1498Szrj   statistics_counter_event (fun, "fused multiply-adds inserted",
362738fd1498Szrj 			    widen_mul_stats.fmas_inserted);
362838fd1498Szrj   statistics_counter_event (fun, "divmod calls inserted",
362938fd1498Szrj 			    widen_mul_stats.divmod_calls_inserted);
363038fd1498Szrj 
363138fd1498Szrj   return cfg_changed ? TODO_cleanup_cfg : 0;
363238fd1498Szrj }
363338fd1498Szrj 
363438fd1498Szrj } // anon namespace
363538fd1498Szrj 
363638fd1498Szrj gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)363738fd1498Szrj make_pass_optimize_widening_mul (gcc::context *ctxt)
363838fd1498Szrj {
363938fd1498Szrj   return new pass_optimize_widening_mul (ctxt);
364038fd1498Szrj }
3641