1 /* Global, SSA-based optimizations using mathematical identities.
2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21    operations.  These are common in sequences such as this one:
22 
23 	modulus = sqrt(x*x + y*y + z*z);
24 	x = x / modulus;
25 	y = y / modulus;
26 	z = z / modulus;
27 
28    that can be optimized to
29 
30 	modulus = sqrt(x*x + y*y + z*z);
31         rmodulus = 1.0 / modulus;
32 	x = x * rmodulus;
33 	y = y * rmodulus;
34 	z = z * rmodulus;
35 
36    We do this for loop invariant divisors, and with this pass whenever
37    we notice that a division has the same divisor multiple times.
38 
39    Of course, like in PRE, we don't insert a division if a dominator
40    already has one.  However, this cannot be done as an extension of
41    PRE for several reasons.
42 
43    First of all, with some experiments it was found out that the
44    transformation is not always useful if there are only two divisions
45    by the same divisor.  This is probably because modern processors
46    can pipeline the divisions; on older, in-order processors it should
47    still be effective to optimize two divisions by the same number.
48    We make this a param, and it shall be called N in the remainder of
49    this comment.
50 
51    Second, if trapping math is active, we have less freedom on where
52    to insert divisions: we can only do so in basic blocks that already
53    contain one.  (If divisions don't trap, instead, we can insert
54    divisions elsewhere, which will be in blocks that are common dominators
55    of those that have the division).
56 
57    We really don't want to compute the reciprocal unless a division will
58    be found.  To do this, we won't insert the division in a basic block
59    that has less than N divisions *post-dominating* it.
60 
61    The algorithm constructs a subset of the dominator tree, holding the
62    blocks containing the divisions and the common dominators to them,
63    and walk it twice.  The first walk is in post-order, and it annotates
64    each block with the number of divisions that post-dominate it: this
65    gives information on where divisions can be inserted profitably.
66    The second walk is in pre-order, and it inserts divisions as explained
67    above, and replaces divisions by multiplications.
68 
69    In the best case, the cost of the pass is O(n_statements).  In the
70    worst-case, the cost is due to creating the dominator tree subset,
71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
72    for n_statements / n_basic_blocks statements.  So, the amortized cost
73    of creating the dominator tree subset is O(n_basic_blocks) and the
74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 
76    More practically, the cost will be small because there are few
77    divisions, and they tend to be in the same basic block, so insert_bb
78    is called very few times.
79 
80    If we did this using domwalk.c, an efficient implementation would have
81    to work on all the variables in a single pass, because we could not
82    work on just a subset of the dominator tree, as we do now, and the
83    cost would also be something like O(n_statements * n_basic_blocks).
84    The data structures would be more complex in order to work on all the
85    variables in a single pass.  */
86 
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
119 
120 /* This structure represents one basic block that either computes a
121    division, or is a common dominator for basic block that compute a
122    division.  */
123 struct occurrence {
124   /* The basic block represented by this structure.  */
125   basic_block bb = basic_block();
126 
127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128      inserted in BB.  */
129   tree recip_def = tree();
130 
131   /* If non-NULL, the SSA_NAME holding the definition for a squared
132      reciprocal inserted in BB.  */
133   tree square_recip_def = tree();
134 
135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136      was inserted in BB.  */
137   gimple *recip_def_stmt = nullptr;
138 
139   /* Pointer to a list of "struct occurrence"s for blocks dominated
140      by BB.  */
141   struct occurrence *children = nullptr;
142 
143   /* Pointer to the next "struct occurrence"s in the list of blocks
144      sharing a common dominator.  */
145   struct occurrence *next = nullptr;
146 
147   /* The number of divisions that are in BB before compute_merit.  The
148      number of divisions that are in BB or post-dominate it after
149      compute_merit.  */
150   int num_divisions = 0;
151 
152   /* True if the basic block has a division, false if it is a common
153      dominator for basic blocks that do.  If it is false and trapping
154      math is active, BB is not a candidate for inserting a reciprocal.  */
155   bool bb_has_division = false;
156 
157   /* Construct a struct occurrence for basic block BB, and whose
158      children list is headed by CHILDREN.  */
occurrenceoccurrence159   occurrence (basic_block bb, struct occurrence *children)
160   : bb (bb), children (children)
161   {
162     bb->aux = this;
163   }
164 
165   /* Destroy a struct occurrence and remove it from its basic block.  */
~occurrenceoccurrence166   ~occurrence ()
167   {
168     bb->aux = nullptr;
169   }
170 
171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
172   static void* operator new (size_t);
173 
174   /* Return memory for a struct occurrence to OCC_POOL.  */
175   static void operator delete (void*, size_t);
176 };
177 
178 static struct
179 {
180   /* Number of 1.0/X ops inserted.  */
181   int rdivs_inserted;
182 
183   /* Number of 1.0/FUNC ops inserted.  */
184   int rfuncs_inserted;
185 } reciprocal_stats;
186 
187 static struct
188 {
189   /* Number of cexpi calls inserted.  */
190   int inserted;
191 
192   /* Number of conversions removed.  */
193   int conv_removed;
194 
195 } sincos_stats;
196 
197 static struct
198 {
199   /* Number of widening multiplication ops inserted.  */
200   int widen_mults_inserted;
201 
202   /* Number of integer multiply-and-accumulate ops inserted.  */
203   int maccs_inserted;
204 
205   /* Number of fp fused multiply-add ops inserted.  */
206   int fmas_inserted;
207 
208   /* Number of divmod calls inserted.  */
209   int divmod_calls_inserted;
210 } widen_mul_stats;
211 
212 /* The instance of "struct occurrence" representing the highest
213    interesting block in the dominator tree.  */
214 static struct occurrence *occ_head;
215 
216 /* Allocation pool for getting instances of "struct occurrence".  */
217 static object_allocator<occurrence> *occ_pool;
218 
new(size_t n)219 void* occurrence::operator new (size_t n)
220 {
221   gcc_assert (n == sizeof(occurrence));
222   return occ_pool->allocate_raw ();
223 }
224 
delete(void * occ,size_t n)225 void occurrence::operator delete (void *occ, size_t n)
226 {
227   gcc_assert (n == sizeof(occurrence));
228   occ_pool->remove_raw (occ);
229 }
230 
231 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
232    list of "struct occurrence"s, one per basic block, having IDOM as
233    their common dominator.
234 
235    We try to insert NEW_OCC as deep as possible in the tree, and we also
236    insert any other block that is a common dominator for BB and one
237    block already in the tree.  */
238 
239 static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)240 insert_bb (struct occurrence *new_occ, basic_block idom,
241 	   struct occurrence **p_head)
242 {
243   struct occurrence *occ, **p_occ;
244 
245   for (p_occ = p_head; (occ = *p_occ) != NULL; )
246     {
247       basic_block bb = new_occ->bb, occ_bb = occ->bb;
248       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
249       if (dom == bb)
250 	{
251 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
252 	     from its list.  */
253 	  *p_occ = occ->next;
254 	  occ->next = new_occ->children;
255 	  new_occ->children = occ;
256 
257 	  /* Try the next block (it may as well be dominated by BB).  */
258 	}
259 
260       else if (dom == occ_bb)
261 	{
262 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
263 	  insert_bb (new_occ, dom, &occ->children);
264 	  return;
265 	}
266 
267       else if (dom != idom)
268 	{
269 	  gcc_assert (!dom->aux);
270 
271 	  /* There is a dominator between IDOM and BB, add it and make
272 	     two children out of NEW_OCC and OCC.  First, remove OCC from
273 	     its list.	*/
274 	  *p_occ = occ->next;
275 	  new_occ->next = occ;
276 	  occ->next = NULL;
277 
278 	  /* None of the previous blocks has DOM as a dominator: if we tail
279 	     recursed, we would reexamine them uselessly. Just switch BB with
280 	     DOM, and go on looking for blocks dominated by DOM.  */
281 	  new_occ = new occurrence (dom, new_occ);
282 	}
283 
284       else
285 	{
286 	  /* Nothing special, go on with the next element.  */
287 	  p_occ = &occ->next;
288 	}
289     }
290 
291   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
292   new_occ->next = *p_head;
293   *p_head = new_occ;
294 }
295 
296 /* Register that we found a division in BB.
297    IMPORTANCE is a measure of how much weighting to give
298    that division.  Use IMPORTANCE = 2 to register a single
299    division.  If the division is going to be found multiple
300    times use 1 (as it is with squares).  */
301 
302 static inline void
register_division_in(basic_block bb,int importance)303 register_division_in (basic_block bb, int importance)
304 {
305   struct occurrence *occ;
306 
307   occ = (struct occurrence *) bb->aux;
308   if (!occ)
309     {
310       occ = new occurrence (bb, NULL);
311       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
312     }
313 
314   occ->bb_has_division = true;
315   occ->num_divisions += importance;
316 }
317 
318 
319 /* Compute the number of divisions that postdominate each block in OCC and
320    its children.  */
321 
322 static void
compute_merit(struct occurrence * occ)323 compute_merit (struct occurrence *occ)
324 {
325   struct occurrence *occ_child;
326   basic_block dom = occ->bb;
327 
328   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
329     {
330       basic_block bb;
331       if (occ_child->children)
332         compute_merit (occ_child);
333 
334       if (flag_exceptions)
335 	bb = single_noncomplex_succ (dom);
336       else
337 	bb = dom;
338 
339       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
340         occ->num_divisions += occ_child->num_divisions;
341     }
342 }
343 
344 
345 /* Return whether USE_STMT is a floating-point division by DEF.  */
346 static inline bool
is_division_by(gimple * use_stmt,tree def)347 is_division_by (gimple *use_stmt, tree def)
348 {
349   return is_gimple_assign (use_stmt)
350 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
351 	 && gimple_assign_rhs2 (use_stmt) == def
352 	 /* Do not recognize x / x as valid division, as we are getting
353 	    confused later by replacing all immediate uses x in such
354 	    a stmt.  */
355 	 && gimple_assign_rhs1 (use_stmt) != def
356 	 && !stmt_can_throw_internal (cfun, use_stmt);
357 }
358 
359 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
360 static inline bool
is_mult_by(gimple * use_stmt,tree def,tree a)361 is_mult_by (gimple *use_stmt, tree def, tree a)
362 {
363   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
364       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
365     {
366       tree op0 = gimple_assign_rhs1 (use_stmt);
367       tree op1 = gimple_assign_rhs2 (use_stmt);
368 
369       return (op0 == def && op1 == a)
370 	      || (op0 == a && op1 == def);
371     }
372   return 0;
373 }
374 
375 /* Return whether USE_STMT is DEF * DEF.  */
376 static inline bool
is_square_of(gimple * use_stmt,tree def)377 is_square_of (gimple *use_stmt, tree def)
378 {
379   return is_mult_by (use_stmt, def, def);
380 }
381 
382 /* Return whether USE_STMT is a floating-point division by
383    DEF * DEF.  */
384 static inline bool
is_division_by_square(gimple * use_stmt,tree def)385 is_division_by_square (gimple *use_stmt, tree def)
386 {
387   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
388       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
389       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
390       && !stmt_can_throw_internal (cfun, use_stmt))
391     {
392       tree denominator = gimple_assign_rhs2 (use_stmt);
393       if (TREE_CODE (denominator) == SSA_NAME)
394 	return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
395     }
396   return 0;
397 }
398 
399 /* Walk the subset of the dominator tree rooted at OCC, setting the
400    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
401    the given basic block.  The field may be left NULL, of course,
402    if it is not possible or profitable to do the optimization.
403 
404    DEF_BSI is an iterator pointing at the statement defining DEF.
405    If RECIP_DEF is set, a dominator already has a computation that can
406    be used.
407 
408    If should_insert_square_recip is set, then this also inserts
409    the square of the reciprocal immediately after the definition
410    of the reciprocal.  */
411 
412 static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)413 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
414 		    tree def, tree recip_def, tree square_recip_def,
415 		    int should_insert_square_recip, int threshold)
416 {
417   tree type;
418   gassign *new_stmt, *new_square_stmt;
419   gimple_stmt_iterator gsi;
420   struct occurrence *occ_child;
421 
422   if (!recip_def
423       && (occ->bb_has_division || !flag_trapping_math)
424       /* Divide by two as all divisions are counted twice in
425 	 the costing loop.  */
426       && occ->num_divisions / 2 >= threshold)
427     {
428       /* Make a variable with the replacement and substitute it.  */
429       type = TREE_TYPE (def);
430       recip_def = create_tmp_reg (type, "reciptmp");
431       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
432 				      build_one_cst (type), def);
433 
434       if (should_insert_square_recip)
435 	{
436 	  square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
437 	  new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
438 						 recip_def, recip_def);
439 	}
440 
441       if (occ->bb_has_division)
442 	{
443 	  /* Case 1: insert before an existing division.  */
444 	  gsi = gsi_after_labels (occ->bb);
445 	  while (!gsi_end_p (gsi)
446 		 && (!is_division_by (gsi_stmt (gsi), def))
447 		 && (!is_division_by_square (gsi_stmt (gsi), def)))
448 	    gsi_next (&gsi);
449 
450 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
451 	  if (should_insert_square_recip)
452 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
453 	}
454       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
455 	{
456 	  /* Case 2: insert right after the definition.  Note that this will
457 	     never happen if the definition statement can throw, because in
458 	     that case the sole successor of the statement's basic block will
459 	     dominate all the uses as well.  */
460 	  gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
461 	  if (should_insert_square_recip)
462 	    gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
463 	}
464       else
465 	{
466 	  /* Case 3: insert in a basic block not containing defs/uses.  */
467 	  gsi = gsi_after_labels (occ->bb);
468 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
469 	  if (should_insert_square_recip)
470 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
471 	}
472 
473       reciprocal_stats.rdivs_inserted++;
474 
475       occ->recip_def_stmt = new_stmt;
476     }
477 
478   occ->recip_def = recip_def;
479   occ->square_recip_def = square_recip_def;
480   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
481     insert_reciprocals (def_gsi, occ_child, def, recip_def,
482 			square_recip_def, should_insert_square_recip,
483 			threshold);
484 }
485 
486 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
487    Take as argument the use for (x * x).  */
488 static inline void
replace_reciprocal_squares(use_operand_p use_p)489 replace_reciprocal_squares (use_operand_p use_p)
490 {
491   gimple *use_stmt = USE_STMT (use_p);
492   basic_block bb = gimple_bb (use_stmt);
493   struct occurrence *occ = (struct occurrence *) bb->aux;
494 
495   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
496       && occ->recip_def)
497     {
498       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
499       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
500       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
501       SET_USE (use_p, occ->square_recip_def);
502       fold_stmt_inplace (&gsi);
503       update_stmt (use_stmt);
504     }
505 }
506 
507 
508 /* Replace the division at USE_P with a multiplication by the reciprocal, if
509    possible.  */
510 
511 static inline void
replace_reciprocal(use_operand_p use_p)512 replace_reciprocal (use_operand_p use_p)
513 {
514   gimple *use_stmt = USE_STMT (use_p);
515   basic_block bb = gimple_bb (use_stmt);
516   struct occurrence *occ = (struct occurrence *) bb->aux;
517 
518   if (optimize_bb_for_speed_p (bb)
519       && occ->recip_def && use_stmt != occ->recip_def_stmt)
520     {
521       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
522       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
523       SET_USE (use_p, occ->recip_def);
524       fold_stmt_inplace (&gsi);
525       update_stmt (use_stmt);
526     }
527 }
528 
529 
530 /* Free OCC and return one more "struct occurrence" to be freed.  */
531 
532 static struct occurrence *
free_bb(struct occurrence * occ)533 free_bb (struct occurrence *occ)
534 {
535   struct occurrence *child, *next;
536 
537   /* First get the two pointers hanging off OCC.  */
538   next = occ->next;
539   child = occ->children;
540   delete occ;
541 
542   /* Now ensure that we don't recurse unless it is necessary.  */
543   if (!child)
544     return next;
545   else
546     {
547       while (next)
548 	next = free_bb (next);
549 
550       return child;
551     }
552 }
553 
554 /* Transform sequences like
555    t = sqrt (a)
556    x = 1.0 / t;
557    r1 = x * x;
558    r2 = a * x;
559    into:
560    t = sqrt (a)
561    r1 = 1.0 / a;
562    r2 = t;
563    x = r1 * r2;
564    depending on the uses of x, r1, r2.  This removes one multiplication and
565    allows the sqrt and division operations to execute in parallel.
566    DEF_GSI is the gsi of the initial division by sqrt that defines
567    DEF (x in the example above).  */
568 
569 static void
optimize_recip_sqrt(gimple_stmt_iterator * def_gsi,tree def)570 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
571 {
572   gimple *use_stmt;
573   imm_use_iterator use_iter;
574   gimple *stmt = gsi_stmt (*def_gsi);
575   tree x = def;
576   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
577   tree div_rhs1 = gimple_assign_rhs1 (stmt);
578 
579   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
580       || TREE_CODE (div_rhs1) != REAL_CST
581       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
582     return;
583 
584   gcall *sqrt_stmt
585     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
586 
587   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
588     return;
589 
590   switch (gimple_call_combined_fn (sqrt_stmt))
591     {
592     CASE_CFN_SQRT:
593     CASE_CFN_SQRT_FN:
594       break;
595 
596     default:
597       return;
598     }
599   tree a = gimple_call_arg (sqrt_stmt, 0);
600 
601   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
602 
603   /* Statements that use x in x * x.  */
604   auto_vec<gimple *> sqr_stmts;
605   /* Statements that use x in a * x.  */
606   auto_vec<gimple *> mult_stmts;
607   bool has_other_use = false;
608   bool mult_on_main_path = false;
609 
610   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
611     {
612       if (is_gimple_debug (use_stmt))
613 	continue;
614       if (is_square_of (use_stmt, x))
615 	{
616 	  sqr_stmts.safe_push (use_stmt);
617 	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
618 	    mult_on_main_path = true;
619 	}
620       else if (is_mult_by (use_stmt, x, a))
621 	{
622 	  mult_stmts.safe_push (use_stmt);
623 	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
624 	    mult_on_main_path = true;
625 	}
626       else
627 	has_other_use = true;
628     }
629 
630   /* In the x * x and a * x cases we just rewire stmt operands or
631      remove multiplications.  In the has_other_use case we introduce
632      a multiplication so make sure we don't introduce a multiplication
633      on a path where there was none.  */
634   if (has_other_use && !mult_on_main_path)
635     return;
636 
637   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
638     return;
639 
640   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
641      to be able to compose it from the sqr and mult cases.  */
642   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
643     return;
644 
645   if (dump_file)
646     {
647       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
648       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
649       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
650       fprintf (dump_file, "\n");
651     }
652 
653   bool delete_div = !has_other_use;
654   tree sqr_ssa_name = NULL_TREE;
655   if (!sqr_stmts.is_empty ())
656     {
657       /* r1 = x * x.  Transform the original
658 	 x = 1.0 / t
659 	 into
660 	 tmp1 = 1.0 / a
661 	 r1 = tmp1.  */
662 
663       sqr_ssa_name
664 	= make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
665 
666       if (dump_file)
667 	{
668 	  fprintf (dump_file, "Replacing original division\n");
669 	  print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
670 	  fprintf (dump_file, "with new division\n");
671 	}
672       stmt
673 	= gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
674 			       gimple_assign_rhs1 (stmt), a);
675       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
676       gsi_remove (def_gsi, true);
677       *def_gsi = gsi_for_stmt (stmt);
678       fold_stmt_inplace (def_gsi);
679       update_stmt (stmt);
680 
681       if (dump_file)
682 	print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
683 
684       delete_div = false;
685       gimple *sqr_stmt;
686       unsigned int i;
687       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
688 	{
689 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
690 	  gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
691 	  update_stmt (sqr_stmt);
692 	}
693     }
694   if (!mult_stmts.is_empty ())
695     {
696       /* r2 = a * x.  Transform this into:
697 	 r2 = t (The original sqrt (a)).  */
698       unsigned int i;
699       gimple *mult_stmt = NULL;
700       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
701 	{
702 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
703 
704 	  if (dump_file)
705 	    {
706 	      fprintf (dump_file, "Replacing squaring multiplication\n");
707 	      print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
708 	      fprintf (dump_file, "with assignment\n");
709 	    }
710 	  gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
711 	  fold_stmt_inplace (&gsi2);
712 	  update_stmt (mult_stmt);
713 	  if (dump_file)
714 	    print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
715       }
716     }
717 
718   if (has_other_use)
719     {
720       /* Using the two temporaries tmp1, tmp2 from above
721 	 the original x is now:
722 	 x = tmp1 * tmp2.  */
723       gcc_assert (orig_sqrt_ssa_name);
724       gcc_assert (sqr_ssa_name);
725 
726       gimple *new_stmt
727 	= gimple_build_assign (x, MULT_EXPR,
728 			       orig_sqrt_ssa_name, sqr_ssa_name);
729       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
730       update_stmt (stmt);
731     }
732   else if (delete_div)
733     {
734       /* Remove the original division.  */
735       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
736       gsi_remove (&gsi2, true);
737       release_defs (stmt);
738     }
739   else
740     release_ssa_name (x);
741 }
742 
743 /* Look for floating-point divisions among DEF's uses, and try to
744    replace them by multiplications with the reciprocal.  Add
745    as many statements computing the reciprocal as needed.
746 
747    DEF must be a GIMPLE register of a floating-point type.  */
748 
749 static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)750 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
751 {
752   use_operand_p use_p, square_use_p;
753   imm_use_iterator use_iter, square_use_iter;
754   tree square_def;
755   struct occurrence *occ;
756   int count = 0;
757   int threshold;
758   int square_recip_count = 0;
759   int sqrt_recip_count = 0;
760 
761   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
762   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
763 
764   /* If DEF is a square (x * x), count the number of divisions by x.
765      If there are more divisions by x than by (DEF * DEF), prefer to optimize
766      the reciprocal of x instead of DEF.  This improves cases like:
767        def = x * x
768        t0 = a / def
769        t1 = b / def
770        t2 = c / x
771      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
772   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
773 
774   if (is_gimple_assign (def_stmt)
775       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
776       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
777       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
778     {
779       tree op0 = gimple_assign_rhs1 (def_stmt);
780 
781       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
782 	{
783 	  gimple *use_stmt = USE_STMT (use_p);
784 	  if (is_division_by (use_stmt, op0))
785 	    sqrt_recip_count++;
786 	}
787     }
788 
789   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
790     {
791       gimple *use_stmt = USE_STMT (use_p);
792       if (is_division_by (use_stmt, def))
793 	{
794 	  register_division_in (gimple_bb (use_stmt), 2);
795 	  count++;
796 	}
797 
798       if (is_square_of (use_stmt, def))
799 	{
800 	  square_def = gimple_assign_lhs (use_stmt);
801 	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
802 	    {
803 	      gimple *square_use_stmt = USE_STMT (square_use_p);
804 	      if (is_division_by (square_use_stmt, square_def))
805 		{
806 		  /* This is executed twice for each division by a square.  */
807 		  register_division_in (gimple_bb (square_use_stmt), 1);
808 		  square_recip_count++;
809 		}
810 	    }
811 	}
812     }
813 
814   /* Square reciprocals were counted twice above.  */
815   square_recip_count /= 2;
816 
817   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
818   if (sqrt_recip_count > square_recip_count)
819     goto out;
820 
821   /* Do the expensive part only if we can hope to optimize something.  */
822   if (count + square_recip_count >= threshold && count >= 1)
823     {
824       gimple *use_stmt;
825       for (occ = occ_head; occ; occ = occ->next)
826 	{
827 	  compute_merit (occ);
828 	  insert_reciprocals (def_gsi, occ, def, NULL, NULL,
829 			      square_recip_count, threshold);
830 	}
831 
832       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
833 	{
834 	  if (is_division_by (use_stmt, def))
835 	    {
836 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
837 		replace_reciprocal (use_p);
838 	    }
839 	  else if (square_recip_count > 0 && is_square_of (use_stmt, def))
840 	    {
841 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
842 		{
843 		  /* Find all uses of the square that are divisions and
844 		   * replace them by multiplications with the inverse.  */
845 		  imm_use_iterator square_iterator;
846 		  gimple *powmult_use_stmt = USE_STMT (use_p);
847 		  tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
848 
849 		  FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
850 					 square_iterator, powmult_def_name)
851 		    FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
852 		      {
853 			gimple *powmult_use_stmt = USE_STMT (square_use_p);
854 			if (is_division_by (powmult_use_stmt, powmult_def_name))
855 			  replace_reciprocal_squares (square_use_p);
856 		      }
857 		}
858 	    }
859 	}
860     }
861 
862 out:
863   for (occ = occ_head; occ; )
864     occ = free_bb (occ);
865 
866   occ_head = NULL;
867 }
868 
869 /* Return an internal function that implements the reciprocal of CALL,
870    or IFN_LAST if there is no such function that the target supports.  */
871 
872 internal_fn
internal_fn_reciprocal(gcall * call)873 internal_fn_reciprocal (gcall *call)
874 {
875   internal_fn ifn;
876 
877   switch (gimple_call_combined_fn (call))
878     {
879     CASE_CFN_SQRT:
880     CASE_CFN_SQRT_FN:
881       ifn = IFN_RSQRT;
882       break;
883 
884     default:
885       return IFN_LAST;
886     }
887 
888   tree_pair types = direct_internal_fn_types (ifn, call);
889   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
890     return IFN_LAST;
891 
892   return ifn;
893 }
894 
895 /* Go through all the floating-point SSA_NAMEs, and call
896    execute_cse_reciprocals_1 on each of them.  */
897 namespace {
898 
899 const pass_data pass_data_cse_reciprocals =
900 {
901   GIMPLE_PASS, /* type */
902   "recip", /* name */
903   OPTGROUP_NONE, /* optinfo_flags */
904   TV_TREE_RECIP, /* tv_id */
905   PROP_ssa, /* properties_required */
906   0, /* properties_provided */
907   0, /* properties_destroyed */
908   0, /* todo_flags_start */
909   TODO_update_ssa, /* todo_flags_finish */
910 };
911 
912 class pass_cse_reciprocals : public gimple_opt_pass
913 {
914 public:
pass_cse_reciprocals(gcc::context * ctxt)915   pass_cse_reciprocals (gcc::context *ctxt)
916     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
917   {}
918 
919   /* opt_pass methods: */
gate(function *)920   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
921   virtual unsigned int execute (function *);
922 
923 }; // class pass_cse_reciprocals
924 
925 unsigned int
execute(function * fun)926 pass_cse_reciprocals::execute (function *fun)
927 {
928   basic_block bb;
929   tree arg;
930 
931   occ_pool = new object_allocator<occurrence> ("dominators for recip");
932 
933   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
934   calculate_dominance_info (CDI_DOMINATORS);
935   calculate_dominance_info (CDI_POST_DOMINATORS);
936 
937   if (flag_checking)
938     FOR_EACH_BB_FN (bb, fun)
939       gcc_assert (!bb->aux);
940 
941   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
942     if (FLOAT_TYPE_P (TREE_TYPE (arg))
943 	&& is_gimple_reg (arg))
944       {
945 	tree name = ssa_default_def (fun, arg);
946 	if (name)
947 	  execute_cse_reciprocals_1 (NULL, name);
948       }
949 
950   FOR_EACH_BB_FN (bb, fun)
951     {
952       tree def;
953 
954       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
955 	   gsi_next (&gsi))
956 	{
957 	  gphi *phi = gsi.phi ();
958 	  def = PHI_RESULT (phi);
959 	  if (! virtual_operand_p (def)
960 	      && FLOAT_TYPE_P (TREE_TYPE (def)))
961 	    execute_cse_reciprocals_1 (NULL, def);
962 	}
963 
964       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
965 	   gsi_next (&gsi))
966         {
967 	  gimple *stmt = gsi_stmt (gsi);
968 
969 	  if (gimple_has_lhs (stmt)
970 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
971 	      && FLOAT_TYPE_P (TREE_TYPE (def))
972 	      && TREE_CODE (def) == SSA_NAME)
973 	    {
974 	      execute_cse_reciprocals_1 (&gsi, def);
975 	      stmt = gsi_stmt (gsi);
976 	      if (flag_unsafe_math_optimizations
977 		  && is_gimple_assign (stmt)
978 		  && gimple_assign_lhs (stmt) == def
979 		  && !stmt_can_throw_internal (cfun, stmt)
980 		  && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
981 		optimize_recip_sqrt (&gsi, def);
982 	    }
983 	}
984 
985       if (optimize_bb_for_size_p (bb))
986         continue;
987 
988       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
989       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
990 	   gsi_next (&gsi))
991         {
992 	  gimple *stmt = gsi_stmt (gsi);
993 
994 	  if (is_gimple_assign (stmt)
995 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
996 	    {
997 	      tree arg1 = gimple_assign_rhs2 (stmt);
998 	      gimple *stmt1;
999 
1000 	      if (TREE_CODE (arg1) != SSA_NAME)
1001 		continue;
1002 
1003 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
1004 
1005 	      if (is_gimple_call (stmt1)
1006 		  && gimple_call_lhs (stmt1))
1007 		{
1008 		  bool fail;
1009 		  imm_use_iterator ui;
1010 		  use_operand_p use_p;
1011 		  tree fndecl = NULL_TREE;
1012 
1013 		  gcall *call = as_a <gcall *> (stmt1);
1014 		  internal_fn ifn = internal_fn_reciprocal (call);
1015 		  if (ifn == IFN_LAST)
1016 		    {
1017 		      fndecl = gimple_call_fndecl (call);
1018 		      if (!fndecl
1019 			  || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1020 			continue;
1021 		      fndecl = targetm.builtin_reciprocal (fndecl);
1022 		      if (!fndecl)
1023 			continue;
1024 		    }
1025 
1026 		  /* Check that all uses of the SSA name are divisions,
1027 		     otherwise replacing the defining statement will do
1028 		     the wrong thing.  */
1029 		  fail = false;
1030 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1031 		    {
1032 		      gimple *stmt2 = USE_STMT (use_p);
1033 		      if (is_gimple_debug (stmt2))
1034 			continue;
1035 		      if (!is_gimple_assign (stmt2)
1036 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1037 			  || gimple_assign_rhs1 (stmt2) == arg1
1038 			  || gimple_assign_rhs2 (stmt2) != arg1)
1039 			{
1040 			  fail = true;
1041 			  break;
1042 			}
1043 		    }
1044 		  if (fail)
1045 		    continue;
1046 
1047 		  gimple_replace_ssa_lhs (call, arg1);
1048 		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1049 		    {
1050 		      auto_vec<tree, 4> args;
1051 		      for (unsigned int i = 0;
1052 			   i < gimple_call_num_args (call); i++)
1053 			args.safe_push (gimple_call_arg (call, i));
1054 		      gcall *stmt2;
1055 		      if (ifn == IFN_LAST)
1056 			stmt2 = gimple_build_call_vec (fndecl, args);
1057 		      else
1058 			stmt2 = gimple_build_call_internal_vec (ifn, args);
1059 		      gimple_call_set_lhs (stmt2, arg1);
1060 		      gimple_move_vops (stmt2, call);
1061 		      gimple_call_set_nothrow (stmt2,
1062 					       gimple_call_nothrow_p (call));
1063 		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1064 		      gsi_replace (&gsi2, stmt2, true);
1065 		    }
1066 		  else
1067 		    {
1068 		      if (ifn == IFN_LAST)
1069 			gimple_call_set_fndecl (call, fndecl);
1070 		      else
1071 			gimple_call_set_internal_fn (call, ifn);
1072 		      update_stmt (call);
1073 		    }
1074 		  reciprocal_stats.rfuncs_inserted++;
1075 
1076 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1077 		    {
1078 		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1079 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1080 		      fold_stmt_inplace (&gsi);
1081 		      update_stmt (stmt);
1082 		    }
1083 		}
1084 	    }
1085 	}
1086     }
1087 
1088   statistics_counter_event (fun, "reciprocal divs inserted",
1089 			    reciprocal_stats.rdivs_inserted);
1090   statistics_counter_event (fun, "reciprocal functions inserted",
1091 			    reciprocal_stats.rfuncs_inserted);
1092 
1093   free_dominance_info (CDI_DOMINATORS);
1094   free_dominance_info (CDI_POST_DOMINATORS);
1095   delete occ_pool;
1096   return 0;
1097 }
1098 
1099 } // anon namespace
1100 
1101 gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)1102 make_pass_cse_reciprocals (gcc::context *ctxt)
1103 {
1104   return new pass_cse_reciprocals (ctxt);
1105 }
1106 
1107 /* If NAME is the result of a type conversion, look for other
1108    equivalent dominating or dominated conversions, and replace all
1109    uses with the earliest dominating name, removing the redundant
1110    conversions.  Return the prevailing name.  */
1111 
1112 static tree
execute_cse_conv_1(tree name)1113 execute_cse_conv_1 (tree name)
1114 {
1115   if (SSA_NAME_IS_DEFAULT_DEF (name)
1116       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1117     return name;
1118 
1119   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1120 
1121   if (!gimple_assign_cast_p (def_stmt))
1122     return name;
1123 
1124   tree src = gimple_assign_rhs1 (def_stmt);
1125 
1126   if (TREE_CODE (src) != SSA_NAME)
1127     return name;
1128 
1129   imm_use_iterator use_iter;
1130   gimple *use_stmt;
1131 
1132   /* Find the earliest dominating def.    */
1133   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1134     {
1135       if (use_stmt == def_stmt
1136 	  || !gimple_assign_cast_p (use_stmt))
1137 	continue;
1138 
1139       tree lhs = gimple_assign_lhs (use_stmt);
1140 
1141       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1142 	  || (gimple_assign_rhs1 (use_stmt)
1143 	      != gimple_assign_rhs1 (def_stmt))
1144 	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1145 	continue;
1146 
1147       bool use_dominates;
1148       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1149 	{
1150 	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1151 	  while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1152 	    gsi_next (&gsi);
1153 	  use_dominates = !gsi_end_p (gsi);
1154 	}
1155       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1156 			       gimple_bb (def_stmt)))
1157 	use_dominates = false;
1158       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1159 			       gimple_bb (use_stmt)))
1160 	use_dominates = true;
1161       else
1162 	continue;
1163 
1164       if (use_dominates)
1165 	{
1166 	  std::swap (name, lhs);
1167 	  std::swap (def_stmt, use_stmt);
1168 	}
1169     }
1170 
1171   /* Now go through all uses of SRC again, replacing the equivalent
1172      dominated conversions.  We may replace defs that were not
1173      dominated by the then-prevailing defs when we first visited
1174      them.  */
1175   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1176     {
1177       if (use_stmt == def_stmt
1178 	  || !gimple_assign_cast_p (use_stmt))
1179 	continue;
1180 
1181       tree lhs = gimple_assign_lhs (use_stmt);
1182 
1183       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1184 	  || (gimple_assign_rhs1 (use_stmt)
1185 	      != gimple_assign_rhs1 (def_stmt))
1186 	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1187 	continue;
1188 
1189       if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1190 	  || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1191 			     gimple_bb (def_stmt)))
1192 	{
1193 	  sincos_stats.conv_removed++;
1194 
1195 	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1196 	  replace_uses_by (lhs, name);
1197 	  gsi_remove (&gsi, true);
1198 	}
1199     }
1200 
1201   return name;
1202 }
1203 
1204 /* Records an occurrence at statement USE_STMT in the vector of trees
1205    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1206    is not yet initialized.  Returns true if the occurrence was pushed on
1207    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1208    statements in the vector.  */
1209 
1210 static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)1211 maybe_record_sincos (vec<gimple *> *stmts,
1212 		     basic_block *top_bb, gimple *use_stmt)
1213 {
1214   basic_block use_bb = gimple_bb (use_stmt);
1215   if (*top_bb
1216       && (*top_bb == use_bb
1217 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1218     stmts->safe_push (use_stmt);
1219   else if (!*top_bb
1220 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1221     {
1222       stmts->safe_push (use_stmt);
1223       *top_bb = use_bb;
1224     }
1225   else
1226     return false;
1227 
1228   return true;
1229 }
1230 
1231 /* Look for sin, cos and cexpi calls with the same argument NAME and
1232    create a single call to cexpi CSEing the result in this case.
1233    We first walk over all immediate uses of the argument collecting
1234    statements that we can CSE in a vector and in a second pass replace
1235    the statement rhs with a REALPART or IMAGPART expression on the
1236    result of the cexpi call we insert before the use statement that
1237    dominates all other candidates.  */
1238 
1239 static bool
execute_cse_sincos_1(tree name)1240 execute_cse_sincos_1 (tree name)
1241 {
1242   gimple_stmt_iterator gsi;
1243   imm_use_iterator use_iter;
1244   tree fndecl, res, type = NULL_TREE;
1245   gimple *def_stmt, *use_stmt, *stmt;
1246   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1247   auto_vec<gimple *> stmts;
1248   basic_block top_bb = NULL;
1249   int i;
1250   bool cfg_changed = false;
1251 
1252   name = execute_cse_conv_1 (name);
1253 
1254   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1255     {
1256       if (gimple_code (use_stmt) != GIMPLE_CALL
1257 	  || !gimple_call_lhs (use_stmt))
1258 	continue;
1259 
1260       switch (gimple_call_combined_fn (use_stmt))
1261 	{
1262 	CASE_CFN_COS:
1263 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1264 	  break;
1265 
1266 	CASE_CFN_SIN:
1267 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1268 	  break;
1269 
1270 	CASE_CFN_CEXPI:
1271 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1272 	  break;
1273 
1274 	default:;
1275 	  continue;
1276 	}
1277 
1278       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1279       if (!type)
1280 	{
1281 	  type = t;
1282 	  t = TREE_TYPE (name);
1283 	}
1284       /* This checks that NAME has the right type in the first round,
1285 	 and, in subsequent rounds, that the built_in type is the same
1286 	 type, or a compatible type.  */
1287       if (type != t && !types_compatible_p (type, t))
1288 	return false;
1289     }
1290   if (seen_cos + seen_sin + seen_cexpi <= 1)
1291     return false;
1292 
1293   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1294      the name def statement.  */
1295   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1296   if (!fndecl)
1297     return false;
1298   stmt = gimple_build_call (fndecl, 1, name);
1299   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1300   gimple_call_set_lhs (stmt, res);
1301 
1302   def_stmt = SSA_NAME_DEF_STMT (name);
1303   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1304       && gimple_code (def_stmt) != GIMPLE_PHI
1305       && gimple_bb (def_stmt) == top_bb)
1306     {
1307       gsi = gsi_for_stmt (def_stmt);
1308       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1309     }
1310   else
1311     {
1312       gsi = gsi_after_labels (top_bb);
1313       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1314     }
1315   sincos_stats.inserted++;
1316 
1317   /* And adjust the recorded old call sites.  */
1318   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1319     {
1320       tree rhs = NULL;
1321 
1322       switch (gimple_call_combined_fn (use_stmt))
1323 	{
1324 	CASE_CFN_COS:
1325 	  rhs = fold_build1 (REALPART_EXPR, type, res);
1326 	  break;
1327 
1328 	CASE_CFN_SIN:
1329 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
1330 	  break;
1331 
1332 	CASE_CFN_CEXPI:
1333 	  rhs = res;
1334 	  break;
1335 
1336 	default:;
1337 	  gcc_unreachable ();
1338 	}
1339 
1340 	/* Replace call with a copy.  */
1341 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1342 
1343 	gsi = gsi_for_stmt (use_stmt);
1344 	gsi_replace (&gsi, stmt, true);
1345 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1346 	  cfg_changed = true;
1347     }
1348 
1349   return cfg_changed;
1350 }
1351 
1352 /* To evaluate powi(x,n), the floating point value x raised to the
1353    constant integer exponent n, we use a hybrid algorithm that
1354    combines the "window method" with look-up tables.  For an
1355    introduction to exponentiation algorithms and "addition chains",
1356    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1357    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1358    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1359    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1360 
1361 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1362    multiplications to inline before calling the system library's pow
1363    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1364    so this default never requires calling pow, powf or powl.  */
1365 
1366 #ifndef POWI_MAX_MULTS
1367 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1368 #endif
1369 
1370 /* The size of the "optimal power tree" lookup table.  All
1371    exponents less than this value are simply looked up in the
1372    powi_table below.  This threshold is also used to size the
1373    cache of pseudo registers that hold intermediate results.  */
1374 #define POWI_TABLE_SIZE 256
1375 
1376 /* The size, in bits of the window, used in the "window method"
1377    exponentiation algorithm.  This is equivalent to a radix of
1378    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1379 #define POWI_WINDOW_SIZE 3
1380 
1381 /* The following table is an efficient representation of an
1382    "optimal power tree".  For each value, i, the corresponding
1383    value, j, in the table states than an optimal evaluation
1384    sequence for calculating pow(x,i) can be found by evaluating
1385    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1386    100 integers is given in Knuth's "Seminumerical algorithms".  */
1387 
1388 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1389   {
1390       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1391       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1392       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1393      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1394      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1395      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1396      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1397      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1398      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1399      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1400      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1401      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1402      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1403      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1404      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1405      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1406      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1407      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1408      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1409      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1410      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1411      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1412      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1413      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1414      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1415     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1416     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1417     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1418     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1419     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1420     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1421     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1422   };
1423 
1424 
1425 /* Return the number of multiplications required to calculate
1426    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1427    subroutine of powi_cost.  CACHE is an array indicating
1428    which exponents have already been calculated.  */
1429 
1430 static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)1431 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1432 {
1433   /* If we've already calculated this exponent, then this evaluation
1434      doesn't require any additional multiplications.  */
1435   if (cache[n])
1436     return 0;
1437 
1438   cache[n] = true;
1439   return powi_lookup_cost (n - powi_table[n], cache)
1440 	 + powi_lookup_cost (powi_table[n], cache) + 1;
1441 }
1442 
1443 /* Return the number of multiplications required to calculate
1444    powi(x,n) for an arbitrary x, given the exponent N.  This
1445    function needs to be kept in sync with powi_as_mults below.  */
1446 
1447 static int
powi_cost(HOST_WIDE_INT n)1448 powi_cost (HOST_WIDE_INT n)
1449 {
1450   bool cache[POWI_TABLE_SIZE];
1451   unsigned HOST_WIDE_INT digit;
1452   unsigned HOST_WIDE_INT val;
1453   int result;
1454 
1455   if (n == 0)
1456     return 0;
1457 
1458   /* Ignore the reciprocal when calculating the cost.  */
1459   val = (n < 0) ? -n : n;
1460 
1461   /* Initialize the exponent cache.  */
1462   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1463   cache[1] = true;
1464 
1465   result = 0;
1466 
1467   while (val >= POWI_TABLE_SIZE)
1468     {
1469       if (val & 1)
1470 	{
1471 	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1472 	  result += powi_lookup_cost (digit, cache)
1473 		    + POWI_WINDOW_SIZE + 1;
1474 	  val >>= POWI_WINDOW_SIZE;
1475 	}
1476       else
1477 	{
1478 	  val >>= 1;
1479 	  result++;
1480 	}
1481     }
1482 
1483   return result + powi_lookup_cost (val, cache);
1484 }
1485 
1486 /* Recursive subroutine of powi_as_mults.  This function takes the
1487    array, CACHE, of already calculated exponents and an exponent N and
1488    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1489 
1490 static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache)1491 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1492 		 HOST_WIDE_INT n, tree *cache)
1493 {
1494   tree op0, op1, ssa_target;
1495   unsigned HOST_WIDE_INT digit;
1496   gassign *mult_stmt;
1497 
1498   if (n < POWI_TABLE_SIZE && cache[n])
1499     return cache[n];
1500 
1501   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1502 
1503   if (n < POWI_TABLE_SIZE)
1504     {
1505       cache[n] = ssa_target;
1506       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1507       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1508     }
1509   else if (n & 1)
1510     {
1511       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1512       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1513       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1514     }
1515   else
1516     {
1517       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1518       op1 = op0;
1519     }
1520 
1521   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1522   gimple_set_location (mult_stmt, loc);
1523   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1524 
1525   return ssa_target;
1526 }
1527 
1528 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1529    This function needs to be kept in sync with powi_cost above.  */
1530 
1531 tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1532 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1533 	       tree arg0, HOST_WIDE_INT n)
1534 {
1535   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1536   gassign *div_stmt;
1537   tree target;
1538 
1539   if (n == 0)
1540     return build_one_cst (type);
1541 
1542   memset (cache, 0, sizeof (cache));
1543   cache[1] = arg0;
1544 
1545   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1546   if (n >= 0)
1547     return result;
1548 
1549   /* If the original exponent was negative, reciprocate the result.  */
1550   target = make_temp_ssa_name (type, NULL, "powmult");
1551   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1552 				  build_real (type, dconst1), result);
1553   gimple_set_location (div_stmt, loc);
1554   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1555 
1556   return target;
1557 }
1558 
1559 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1560    location info LOC.  If the arguments are appropriate, create an
1561    equivalent sequence of statements prior to GSI using an optimal
1562    number of multiplications, and return an expession holding the
1563    result.  */
1564 
1565 static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1566 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1567 			    tree arg0, HOST_WIDE_INT n)
1568 {
1569   /* Avoid largest negative number.  */
1570   if (n != -n
1571       && ((n >= -1 && n <= 2)
1572 	  || (optimize_function_for_speed_p (cfun)
1573 	      && powi_cost (n) <= POWI_MAX_MULTS)))
1574     return powi_as_mults (gsi, loc, arg0, n);
1575 
1576   return NULL_TREE;
1577 }
1578 
1579 /* Build a gimple call statement that calls FN with argument ARG.
1580    Set the lhs of the call statement to a fresh SSA name.  Insert the
1581    statement prior to GSI's current position, and return the fresh
1582    SSA name.  */
1583 
1584 static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)1585 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1586 		       tree fn, tree arg)
1587 {
1588   gcall *call_stmt;
1589   tree ssa_target;
1590 
1591   call_stmt = gimple_build_call (fn, 1, arg);
1592   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1593   gimple_set_lhs (call_stmt, ssa_target);
1594   gimple_set_location (call_stmt, loc);
1595   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1596 
1597   return ssa_target;
1598 }
1599 
1600 /* Build a gimple binary operation with the given CODE and arguments
1601    ARG0, ARG1, assigning the result to a new SSA name for variable
1602    TARGET.  Insert the statement prior to GSI's current position, and
1603    return the fresh SSA name.*/
1604 
1605 static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)1606 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1607 			const char *name, enum tree_code code,
1608 			tree arg0, tree arg1)
1609 {
1610   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1611   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1612   gimple_set_location (stmt, loc);
1613   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1614   return result;
1615 }
1616 
1617 /* Build a gimple reference operation with the given CODE and argument
1618    ARG, assigning the result to a new SSA name of TYPE with NAME.
1619    Insert the statement prior to GSI's current position, and return
1620    the fresh SSA name.  */
1621 
1622 static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)1623 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1624 		      const char *name, enum tree_code code, tree arg0)
1625 {
1626   tree result = make_temp_ssa_name (type, NULL, name);
1627   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1628   gimple_set_location (stmt, loc);
1629   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1630   return result;
1631 }
1632 
1633 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1634    prior to GSI's current position, and return the fresh SSA name.  */
1635 
1636 static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)1637 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1638 		       tree type, tree val)
1639 {
1640   tree result = make_ssa_name (type);
1641   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1642   gimple_set_location (stmt, loc);
1643   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1644   return result;
1645 }
1646 
1647 struct pow_synth_sqrt_info
1648 {
1649   bool *factors;
1650   unsigned int deepest;
1651   unsigned int num_mults;
1652 };
1653 
1654 /* Return true iff the real value C can be represented as a
1655    sum of powers of 0.5 up to N.  That is:
1656    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1657    Record in INFO the various parameters of the synthesis algorithm such
1658    as the factors a[i], the maximum 0.5 power and the number of
1659    multiplications that will be required.  */
1660 
1661 bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)1662 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1663 				 struct pow_synth_sqrt_info *info)
1664 {
1665   REAL_VALUE_TYPE factor = dconsthalf;
1666   REAL_VALUE_TYPE remainder = c;
1667 
1668   info->deepest = 0;
1669   info->num_mults = 0;
1670   memset (info->factors, 0, n * sizeof (bool));
1671 
1672   for (unsigned i = 0; i < n; i++)
1673     {
1674       REAL_VALUE_TYPE res;
1675 
1676       /* If something inexact happened bail out now.  */
1677       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1678 	return false;
1679 
1680       /* We have hit zero.  The number is representable as a sum
1681          of powers of 0.5.  */
1682       if (real_equal (&res, &dconst0))
1683 	{
1684 	  info->factors[i] = true;
1685 	  info->deepest = i + 1;
1686 	  return true;
1687 	}
1688       else if (!REAL_VALUE_NEGATIVE (res))
1689 	{
1690 	  remainder = res;
1691 	  info->factors[i] = true;
1692 	  info->num_mults++;
1693 	}
1694       else
1695 	info->factors[i] = false;
1696 
1697       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1698     }
1699   return false;
1700 }
1701 
1702 /* Return the tree corresponding to FN being applied
1703    to ARG N times at GSI and LOC.
1704    Look up previous results from CACHE if need be.
1705    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1706 
1707 static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)1708 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1709 	      tree fn, location_t loc, tree *cache)
1710 {
1711   tree res = cache[n];
1712   if (!res)
1713     {
1714       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1715       res = build_and_insert_call (gsi, loc, fn, prev);
1716       cache[n] = res;
1717     }
1718 
1719   return res;
1720 }
1721 
1722 /* Print to STREAM the repeated application of function FNAME to ARG
1723    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1724    "foo (foo (x))".  */
1725 
1726 static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)1727 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1728 		 unsigned int n)
1729 {
1730   if (n == 0)
1731     fprintf (stream, "%s", arg);
1732   else
1733     {
1734       fprintf (stream, "%s (", fname);
1735       print_nested_fn (stream, fname, arg, n - 1);
1736       fprintf (stream, ")");
1737     }
1738 }
1739 
1740 /* Print to STREAM the fractional sequence of sqrt chains
1741    applied to ARG, described by INFO.  Used for the dump file.  */
1742 
1743 static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)1744 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1745 			        struct pow_synth_sqrt_info *info)
1746 {
1747   for (unsigned int i = 0; i < info->deepest; i++)
1748     {
1749       bool is_set = info->factors[i];
1750       if (is_set)
1751 	{
1752 	  print_nested_fn (stream, "sqrt", arg, i + 1);
1753 	  if (i != info->deepest - 1)
1754 	    fprintf (stream, " * ");
1755 	}
1756     }
1757 }
1758 
1759 /* Print to STREAM a representation of raising ARG to an integer
1760    power N.  Used for the dump file.  */
1761 
1762 static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)1763 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1764 {
1765   if (n > 1)
1766     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1767   else if (n == 1)
1768     fprintf (stream, "%s", arg);
1769 }
1770 
1771 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1772    square roots.  Place at GSI and LOC.  Limit the maximum depth
1773    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1774    result of the expanded sequence or NULL_TREE if the expansion failed.
1775 
1776    This routine assumes that ARG1 is a real number with a fractional part
1777    (the integer exponent case will have been handled earlier in
1778    gimple_expand_builtin_pow).
1779 
1780    For ARG1 > 0.0:
1781    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1782      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1783                     FRAC_PART == ARG1 - WHOLE_PART:
1784      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1785      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1786      if it can be expressed as such, that is if FRAC_PART satisfies:
1787      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1788      where integer a[i] is either 0 or 1.
1789 
1790      Example:
1791      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1792        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1793 
1794    For ARG1 < 0.0 there are two approaches:
1795    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1796          is calculated as above.
1797 
1798      Example:
1799      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1800        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1801 
1802    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1803            FRAC_PART  := ARG1 - WHOLE_PART
1804      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1805      Example:
1806      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1807        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1808 
1809    For ARG1 < 0.0 we choose between (A) and (B) depending on
1810    how many multiplications we'd have to do.
1811    So, for the example in (B): POW (x, -5.875), if we were to
1812    follow algorithm (A) we would produce:
1813    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1814    which contains more multiplications than approach (B).
1815 
1816    Hopefully, this approach will eliminate potentially expensive POW library
1817    calls when unsafe floating point math is enabled and allow the compiler to
1818    further optimise the multiplies, square roots and divides produced by this
1819    function.  */
1820 
1821 static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)1822 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1823 		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1824 {
1825   tree type = TREE_TYPE (arg0);
1826   machine_mode mode = TYPE_MODE (type);
1827   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1828   bool one_over = true;
1829 
1830   if (!sqrtfn)
1831     return NULL_TREE;
1832 
1833   if (TREE_CODE (arg1) != REAL_CST)
1834     return NULL_TREE;
1835 
1836   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1837 
1838   gcc_assert (max_depth > 0);
1839   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1840 
1841   struct pow_synth_sqrt_info synth_info;
1842   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1843   synth_info.deepest = 0;
1844   synth_info.num_mults = 0;
1845 
1846   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1847   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1848 
1849   /* The whole and fractional parts of exp.  */
1850   REAL_VALUE_TYPE whole_part;
1851   REAL_VALUE_TYPE frac_part;
1852 
1853   real_floor (&whole_part, mode, &exp);
1854   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1855 
1856 
1857   REAL_VALUE_TYPE ceil_whole = dconst0;
1858   REAL_VALUE_TYPE ceil_fract = dconst0;
1859 
1860   if (neg_exp)
1861     {
1862       real_ceil (&ceil_whole, mode, &exp);
1863       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1864     }
1865 
1866   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1867     return NULL_TREE;
1868 
1869   /* Check whether it's more profitable to not use 1.0 / ...  */
1870   if (neg_exp)
1871     {
1872       struct pow_synth_sqrt_info alt_synth_info;
1873       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1874       alt_synth_info.deepest = 0;
1875       alt_synth_info.num_mults = 0;
1876 
1877       if (representable_as_half_series_p (ceil_fract, max_depth,
1878 					   &alt_synth_info)
1879 	  && alt_synth_info.deepest <= synth_info.deepest
1880 	  && alt_synth_info.num_mults < synth_info.num_mults)
1881 	{
1882 	  whole_part = ceil_whole;
1883 	  frac_part = ceil_fract;
1884 	  synth_info.deepest = alt_synth_info.deepest;
1885 	  synth_info.num_mults = alt_synth_info.num_mults;
1886 	  memcpy (synth_info.factors, alt_synth_info.factors,
1887 		  (max_depth + 1) * sizeof (bool));
1888 	  one_over = false;
1889 	}
1890     }
1891 
1892   HOST_WIDE_INT n = real_to_integer (&whole_part);
1893   REAL_VALUE_TYPE cint;
1894   real_from_integer (&cint, VOIDmode, n, SIGNED);
1895 
1896   if (!real_identical (&whole_part, &cint))
1897     return NULL_TREE;
1898 
1899   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1900     return NULL_TREE;
1901 
1902   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1903 
1904   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1905 
1906   /* Calculate the integer part of the exponent.  */
1907   if (n > 1)
1908     {
1909       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1910       if (!integer_res)
1911 	return NULL_TREE;
1912     }
1913 
1914   if (dump_file)
1915     {
1916       char string[64];
1917 
1918       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1919       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1920 
1921       if (neg_exp)
1922 	{
1923 	  if (one_over)
1924 	    {
1925 	      fprintf (dump_file, "1.0 / (");
1926 	      dump_integer_part (dump_file, "x", n);
1927 	      if (n > 0)
1928 	        fprintf (dump_file, " * ");
1929 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1930 	      fprintf (dump_file, ")");
1931 	    }
1932 	  else
1933 	    {
1934 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1935 	      fprintf (dump_file, " / (");
1936 	      dump_integer_part (dump_file, "x", n);
1937 	      fprintf (dump_file, ")");
1938 	    }
1939 	}
1940       else
1941 	{
1942 	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1943 	  if (n > 0)
1944 	    fprintf (dump_file, " * ");
1945 	  dump_integer_part (dump_file, "x", n);
1946 	}
1947 
1948       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1949     }
1950 
1951 
1952   tree fract_res = NULL_TREE;
1953   cache[0] = arg0;
1954 
1955   /* Calculate the fractional part of the exponent.  */
1956   for (unsigned i = 0; i < synth_info.deepest; i++)
1957     {
1958       if (synth_info.factors[i])
1959 	{
1960 	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1961 
1962 	  if (!fract_res)
1963 	      fract_res = sqrt_chain;
1964 
1965 	  else
1966 	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1967 					   fract_res, sqrt_chain);
1968 	}
1969     }
1970 
1971   tree res = NULL_TREE;
1972 
1973   if (neg_exp)
1974     {
1975       if (one_over)
1976 	{
1977 	  if (n > 0)
1978 	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1979 					   fract_res, integer_res);
1980 	  else
1981 	    res = fract_res;
1982 
1983 	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1984 					  build_real (type, dconst1), res);
1985 	}
1986       else
1987 	{
1988 	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1989 					 fract_res, integer_res);
1990 	}
1991     }
1992   else
1993     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1994 				   fract_res, integer_res);
1995   return res;
1996 }
1997 
1998 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1999    with location info LOC.  If possible, create an equivalent and
2000    less expensive sequence of statements prior to GSI, and return an
2001    expession holding the result.  */
2002 
2003 static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)2004 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2005 			   tree arg0, tree arg1)
2006 {
2007   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2008   REAL_VALUE_TYPE c2, dconst3;
2009   HOST_WIDE_INT n;
2010   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2011   machine_mode mode;
2012   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2013   bool hw_sqrt_exists, c_is_int, c2_is_int;
2014 
2015   dconst1_4 = dconst1;
2016   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2017 
2018   /* If the exponent isn't a constant, there's nothing of interest
2019      to be done.  */
2020   if (TREE_CODE (arg1) != REAL_CST)
2021     return NULL_TREE;
2022 
2023   /* Don't perform the operation if flag_signaling_nans is on
2024      and the operand is a signaling NaN.  */
2025   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2026       && ((TREE_CODE (arg0) == REAL_CST
2027 	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2028 	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2029     return NULL_TREE;
2030 
2031   /* If the exponent is equivalent to an integer, expand to an optimal
2032      multiplication sequence when profitable.  */
2033   c = TREE_REAL_CST (arg1);
2034   n = real_to_integer (&c);
2035   real_from_integer (&cint, VOIDmode, n, SIGNED);
2036   c_is_int = real_identical (&c, &cint);
2037 
2038   if (c_is_int
2039       && ((n >= -1 && n <= 2)
2040 	  || (flag_unsafe_math_optimizations
2041 	      && speed_p
2042 	      && powi_cost (n) <= POWI_MAX_MULTS)))
2043     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2044 
2045   /* Attempt various optimizations using sqrt and cbrt.  */
2046   type = TREE_TYPE (arg0);
2047   mode = TYPE_MODE (type);
2048   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2049 
2050   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2051      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2052      sqrt(-0) = -0.  */
2053   if (sqrtfn
2054       && real_equal (&c, &dconsthalf)
2055       && !HONOR_SIGNED_ZEROS (mode))
2056     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2057 
2058   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2059 
2060   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2061      optimizations since 1./3. is not exactly representable.  If x
2062      is negative and finite, the correct value of pow(x,1./3.) is
2063      a NaN with the "invalid" exception raised, because the value
2064      of 1./3. actually has an even denominator.  The correct value
2065      of cbrt(x) is a negative real value.  */
2066   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2067   dconst1_3 = real_value_truncate (mode, dconst_third ());
2068 
2069   if (flag_unsafe_math_optimizations
2070       && cbrtfn
2071       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2072       && real_equal (&c, &dconst1_3))
2073     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2074 
2075   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2076      if we don't have a hardware sqrt insn.  */
2077   dconst1_6 = dconst1_3;
2078   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2079 
2080   if (flag_unsafe_math_optimizations
2081       && sqrtfn
2082       && cbrtfn
2083       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2084       && speed_p
2085       && hw_sqrt_exists
2086       && real_equal (&c, &dconst1_6))
2087     {
2088       /* sqrt(x)  */
2089       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2090 
2091       /* cbrt(sqrt(x))  */
2092       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2093     }
2094 
2095 
2096   /* Attempt to expand the POW as a product of square root chains.
2097      Expand the 0.25 case even when otpimising for size.  */
2098   if (flag_unsafe_math_optimizations
2099       && sqrtfn
2100       && hw_sqrt_exists
2101       && (speed_p || real_equal (&c, &dconst1_4))
2102       && !HONOR_SIGNED_ZEROS (mode))
2103     {
2104       unsigned int max_depth = speed_p
2105 				? param_max_pow_sqrt_depth
2106 				: 2;
2107 
2108       tree expand_with_sqrts
2109 	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2110 
2111       if (expand_with_sqrts)
2112 	return expand_with_sqrts;
2113     }
2114 
2115   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2116   n = real_to_integer (&c2);
2117   real_from_integer (&cint, VOIDmode, n, SIGNED);
2118   c2_is_int = real_identical (&c2, &cint);
2119 
2120   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2121 
2122      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2123      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2124 
2125      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2126      different from pow(x, 1./3.) due to rounding and behavior with
2127      negative x, we need to constrain this transformation to unsafe
2128      math and positive x or finite math.  */
2129   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2130   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2131   real_round (&c2, mode, &c2);
2132   n = real_to_integer (&c2);
2133   real_from_integer (&cint, VOIDmode, n, SIGNED);
2134   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2135   real_convert (&c2, mode, &c2);
2136 
2137   if (flag_unsafe_math_optimizations
2138       && cbrtfn
2139       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2140       && real_identical (&c2, &c)
2141       && !c2_is_int
2142       && optimize_function_for_speed_p (cfun)
2143       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2144     {
2145       tree powi_x_ndiv3 = NULL_TREE;
2146 
2147       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2148          possible or profitable, give up.  Skip the degenerate case when
2149          abs(n) < 3, where the result is always 1.  */
2150       if (absu_hwi (n) >= 3)
2151 	{
2152 	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2153 						     abs_hwi (n / 3));
2154 	  if (!powi_x_ndiv3)
2155 	    return NULL_TREE;
2156 	}
2157 
2158       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2159          as that creates an unnecessary variable.  Instead, just produce
2160          either cbrt(x) or cbrt(x) * cbrt(x).  */
2161       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2162 
2163       if (absu_hwi (n) % 3 == 1)
2164 	powi_cbrt_x = cbrt_x;
2165       else
2166 	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2167 					      cbrt_x, cbrt_x);
2168 
2169       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2170       if (absu_hwi (n) < 3)
2171 	result = powi_cbrt_x;
2172       else
2173 	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174 					 powi_x_ndiv3, powi_cbrt_x);
2175 
2176       /* If n is negative, reciprocate the result.  */
2177       if (n < 0)
2178 	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2179 					 build_real (type, dconst1), result);
2180 
2181       return result;
2182     }
2183 
2184   /* No optimizations succeeded.  */
2185   return NULL_TREE;
2186 }
2187 
2188 /* ARG is the argument to a cabs builtin call in GSI with location info
2189    LOC.  Create a sequence of statements prior to GSI that calculates
2190    sqrt(R*R + I*I), where R and I are the real and imaginary components
2191    of ARG, respectively.  Return an expression holding the result.  */
2192 
2193 static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)2194 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2195 {
2196   tree real_part, imag_part, addend1, addend2, sum, result;
2197   tree type = TREE_TYPE (TREE_TYPE (arg));
2198   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2199   machine_mode mode = TYPE_MODE (type);
2200 
2201   if (!flag_unsafe_math_optimizations
2202       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2203       || !sqrtfn
2204       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2205     return NULL_TREE;
2206 
2207   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2208 				    REALPART_EXPR, arg);
2209   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2210 				    real_part, real_part);
2211   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212 				    IMAGPART_EXPR, arg);
2213   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214 				    imag_part, imag_part);
2215   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2216   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2217 
2218   return result;
2219 }
2220 
2221 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2222    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2223    an optimal number of multiplies, when n is a constant.  */
2224 
2225 namespace {
2226 
2227 const pass_data pass_data_cse_sincos =
2228 {
2229   GIMPLE_PASS, /* type */
2230   "sincos", /* name */
2231   OPTGROUP_NONE, /* optinfo_flags */
2232   TV_TREE_SINCOS, /* tv_id */
2233   PROP_ssa, /* properties_required */
2234   PROP_gimple_opt_math, /* properties_provided */
2235   0, /* properties_destroyed */
2236   0, /* todo_flags_start */
2237   TODO_update_ssa, /* todo_flags_finish */
2238 };
2239 
2240 class pass_cse_sincos : public gimple_opt_pass
2241 {
2242 public:
pass_cse_sincos(gcc::context * ctxt)2243   pass_cse_sincos (gcc::context *ctxt)
2244     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2245   {}
2246 
2247   /* opt_pass methods: */
gate(function *)2248   virtual bool gate (function *)
2249     {
2250       /* We no longer require either sincos or cexp, since powi expansion
2251 	 piggybacks on this pass.  */
2252       return optimize;
2253     }
2254 
2255   virtual unsigned int execute (function *);
2256 
2257 }; // class pass_cse_sincos
2258 
2259 unsigned int
execute(function * fun)2260 pass_cse_sincos::execute (function *fun)
2261 {
2262   basic_block bb;
2263   bool cfg_changed = false;
2264 
2265   calculate_dominance_info (CDI_DOMINATORS);
2266   memset (&sincos_stats, 0, sizeof (sincos_stats));
2267 
2268   FOR_EACH_BB_FN (bb, fun)
2269     {
2270       gimple_stmt_iterator gsi;
2271       bool cleanup_eh = false;
2272 
2273       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2274         {
2275 	  gimple *stmt = gsi_stmt (gsi);
2276 
2277 	  /* Only the last stmt in a bb could throw, no need to call
2278 	     gimple_purge_dead_eh_edges if we change something in the middle
2279 	     of a basic block.  */
2280 	  cleanup_eh = false;
2281 
2282 	  if (is_gimple_call (stmt)
2283 	      && gimple_call_lhs (stmt))
2284 	    {
2285 	      tree arg, arg0, arg1, result;
2286 	      HOST_WIDE_INT n;
2287 	      location_t loc;
2288 
2289 	      switch (gimple_call_combined_fn (stmt))
2290 		{
2291 		CASE_CFN_COS:
2292 		CASE_CFN_SIN:
2293 		CASE_CFN_CEXPI:
2294 		  arg = gimple_call_arg (stmt, 0);
2295 		  /* Make sure we have either sincos or cexp.  */
2296 		  if (!targetm.libc_has_function (function_c99_math_complex,
2297 						  TREE_TYPE (arg))
2298 		      && !targetm.libc_has_function (function_sincos,
2299 						     TREE_TYPE (arg)))
2300 		    break;
2301 
2302 		  if (TREE_CODE (arg) == SSA_NAME)
2303 		    cfg_changed |= execute_cse_sincos_1 (arg);
2304 		  break;
2305 
2306 		CASE_CFN_POW:
2307 		  arg0 = gimple_call_arg (stmt, 0);
2308 		  arg1 = gimple_call_arg (stmt, 1);
2309 
2310 		  loc = gimple_location (stmt);
2311 		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2312 
2313 		  if (result)
2314 		    {
2315 		      tree lhs = gimple_get_lhs (stmt);
2316 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2317 		      gimple_set_location (new_stmt, loc);
2318 		      unlink_stmt_vdef (stmt);
2319 		      gsi_replace (&gsi, new_stmt, true);
2320 		      cleanup_eh = true;
2321 		      if (gimple_vdef (stmt))
2322 			release_ssa_name (gimple_vdef (stmt));
2323 		    }
2324 		  break;
2325 
2326 		CASE_CFN_POWI:
2327 		  arg0 = gimple_call_arg (stmt, 0);
2328 		  arg1 = gimple_call_arg (stmt, 1);
2329 		  loc = gimple_location (stmt);
2330 
2331 		  if (real_minus_onep (arg0))
2332 		    {
2333                       tree t0, t1, cond, one, minus_one;
2334 		      gassign *stmt;
2335 
2336 		      t0 = TREE_TYPE (arg0);
2337 		      t1 = TREE_TYPE (arg1);
2338 		      one = build_real (t0, dconst1);
2339 		      minus_one = build_real (t0, dconstm1);
2340 
2341 		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2342 		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2343 						  arg1, build_int_cst (t1, 1));
2344 		      gimple_set_location (stmt, loc);
2345 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2346 
2347 		      result = make_temp_ssa_name (t0, NULL, "powi");
2348 		      stmt = gimple_build_assign (result, COND_EXPR, cond,
2349 						  minus_one, one);
2350 		      gimple_set_location (stmt, loc);
2351 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2352 		    }
2353 		  else
2354 		    {
2355 		      if (!tree_fits_shwi_p (arg1))
2356 			break;
2357 
2358 		      n = tree_to_shwi (arg1);
2359 		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2360 		    }
2361 
2362 		  if (result)
2363 		    {
2364 		      tree lhs = gimple_get_lhs (stmt);
2365 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2366 		      gimple_set_location (new_stmt, loc);
2367 		      unlink_stmt_vdef (stmt);
2368 		      gsi_replace (&gsi, new_stmt, true);
2369 		      cleanup_eh = true;
2370 		      if (gimple_vdef (stmt))
2371 			release_ssa_name (gimple_vdef (stmt));
2372 		    }
2373 		  break;
2374 
2375 		CASE_CFN_CABS:
2376 		  arg0 = gimple_call_arg (stmt, 0);
2377 		  loc = gimple_location (stmt);
2378 		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2379 
2380 		  if (result)
2381 		    {
2382 		      tree lhs = gimple_get_lhs (stmt);
2383 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2384 		      gimple_set_location (new_stmt, loc);
2385 		      unlink_stmt_vdef (stmt);
2386 		      gsi_replace (&gsi, new_stmt, true);
2387 		      cleanup_eh = true;
2388 		      if (gimple_vdef (stmt))
2389 			release_ssa_name (gimple_vdef (stmt));
2390 		    }
2391 		  break;
2392 
2393 		default:;
2394 		}
2395 	    }
2396 	}
2397       if (cleanup_eh)
2398 	cfg_changed |= gimple_purge_dead_eh_edges (bb);
2399     }
2400 
2401   statistics_counter_event (fun, "sincos statements inserted",
2402 			    sincos_stats.inserted);
2403   statistics_counter_event (fun, "conv statements removed",
2404 			    sincos_stats.conv_removed);
2405 
2406   return cfg_changed ? TODO_cleanup_cfg : 0;
2407 }
2408 
2409 } // anon namespace
2410 
2411 gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)2412 make_pass_cse_sincos (gcc::context *ctxt)
2413 {
2414   return new pass_cse_sincos (ctxt);
2415 }
2416 
2417 /* Return true if stmt is a type conversion operation that can be stripped
2418    when used in a widening multiply operation.  */
2419 static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)2420 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2421 {
2422   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2423 
2424   if (TREE_CODE (result_type) == INTEGER_TYPE)
2425     {
2426       tree op_type;
2427       tree inner_op_type;
2428 
2429       if (!CONVERT_EXPR_CODE_P (rhs_code))
2430 	return false;
2431 
2432       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2433 
2434       /* If the type of OP has the same precision as the result, then
2435 	 we can strip this conversion.  The multiply operation will be
2436 	 selected to create the correct extension as a by-product.  */
2437       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2438 	return true;
2439 
2440       /* We can also strip a conversion if it preserves the signed-ness of
2441 	 the operation and doesn't narrow the range.  */
2442       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2443 
2444       /* If the inner-most type is unsigned, then we can strip any
2445 	 intermediate widening operation.  If it's signed, then the
2446 	 intermediate widening operation must also be signed.  */
2447       if ((TYPE_UNSIGNED (inner_op_type)
2448 	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2449 	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2450 	return true;
2451 
2452       return false;
2453     }
2454 
2455   return rhs_code == FIXED_CONVERT_EXPR;
2456 }
2457 
2458 /* Return true if RHS is a suitable operand for a widening multiplication,
2459    assuming a target type of TYPE.
2460    There are two cases:
2461 
2462      - RHS makes some value at least twice as wide.  Store that value
2463        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2464 
2465      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2466        but leave *TYPE_OUT untouched.  */
2467 
2468 static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)2469 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2470 			tree *new_rhs_out)
2471 {
2472   gimple *stmt;
2473   tree type1, rhs1;
2474 
2475   if (TREE_CODE (rhs) == SSA_NAME)
2476     {
2477       stmt = SSA_NAME_DEF_STMT (rhs);
2478       if (is_gimple_assign (stmt))
2479 	{
2480 	  if (! widening_mult_conversion_strippable_p (type, stmt))
2481 	    rhs1 = rhs;
2482 	  else
2483 	    {
2484 	      rhs1 = gimple_assign_rhs1 (stmt);
2485 
2486 	      if (TREE_CODE (rhs1) == INTEGER_CST)
2487 		{
2488 		  *new_rhs_out = rhs1;
2489 		  *type_out = NULL;
2490 		  return true;
2491 		}
2492 	    }
2493 	}
2494       else
2495 	rhs1 = rhs;
2496 
2497       type1 = TREE_TYPE (rhs1);
2498 
2499       if (TREE_CODE (type1) != TREE_CODE (type)
2500 	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2501 	return false;
2502 
2503       *new_rhs_out = rhs1;
2504       *type_out = type1;
2505       return true;
2506     }
2507 
2508   if (TREE_CODE (rhs) == INTEGER_CST)
2509     {
2510       *new_rhs_out = rhs;
2511       *type_out = NULL;
2512       return true;
2513     }
2514 
2515   return false;
2516 }
2517 
2518 /* Return true if STMT performs a widening multiplication, assuming the
2519    output type is TYPE.  If so, store the unwidened types of the operands
2520    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2521    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2522    and *TYPE2_OUT would give the operands of the multiplication.  */
2523 
2524 static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2525 is_widening_mult_p (gimple *stmt,
2526 		    tree *type1_out, tree *rhs1_out,
2527 		    tree *type2_out, tree *rhs2_out)
2528 {
2529   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2530 
2531   if (TREE_CODE (type) == INTEGER_TYPE)
2532     {
2533       if (TYPE_OVERFLOW_TRAPS (type))
2534 	return false;
2535     }
2536   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2537     return false;
2538 
2539   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2540 			       rhs1_out))
2541     return false;
2542 
2543   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2544 			       rhs2_out))
2545     return false;
2546 
2547   if (*type1_out == NULL)
2548     {
2549       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2550 	return false;
2551       *type1_out = *type2_out;
2552     }
2553 
2554   if (*type2_out == NULL)
2555     {
2556       if (!int_fits_type_p (*rhs2_out, *type1_out))
2557 	return false;
2558       *type2_out = *type1_out;
2559     }
2560 
2561   /* Ensure that the larger of the two operands comes first. */
2562   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2563     {
2564       std::swap (*type1_out, *type2_out);
2565       std::swap (*rhs1_out, *rhs2_out);
2566     }
2567 
2568   return true;
2569 }
2570 
2571 /* Check to see if the CALL statement is an invocation of copysign
2572    with 1. being the first argument.  */
2573 static bool
is_copysign_call_with_1(gimple * call)2574 is_copysign_call_with_1 (gimple *call)
2575 {
2576   gcall *c = dyn_cast <gcall *> (call);
2577   if (! c)
2578     return false;
2579 
2580   enum combined_fn code = gimple_call_combined_fn (c);
2581 
2582   if (code == CFN_LAST)
2583     return false;
2584 
2585   if (builtin_fn_p (code))
2586     {
2587       switch (as_builtin_fn (code))
2588 	{
2589 	CASE_FLT_FN (BUILT_IN_COPYSIGN):
2590 	CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2591 	  return real_onep (gimple_call_arg (c, 0));
2592 	default:
2593 	  return false;
2594 	}
2595     }
2596 
2597   if (internal_fn_p (code))
2598     {
2599       switch (as_internal_fn (code))
2600 	{
2601 	case IFN_COPYSIGN:
2602 	  return real_onep (gimple_call_arg (c, 0));
2603 	default:
2604 	  return false;
2605 	}
2606     }
2607 
2608    return false;
2609 }
2610 
2611 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2612    This only happens when the xorsign optab is defined, if the
2613    pattern is not a xorsign pattern or if expansion fails FALSE is
2614    returned, otherwise TRUE is returned.  */
2615 static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)2616 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2617 {
2618   tree treeop0, treeop1, lhs, type;
2619   location_t loc = gimple_location (stmt);
2620   lhs = gimple_assign_lhs (stmt);
2621   treeop0 = gimple_assign_rhs1 (stmt);
2622   treeop1 = gimple_assign_rhs2 (stmt);
2623   type = TREE_TYPE (lhs);
2624   machine_mode mode = TYPE_MODE (type);
2625 
2626   if (HONOR_SNANS (type))
2627     return false;
2628 
2629   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2630     {
2631       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2632       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2633 	{
2634 	  call0 = SSA_NAME_DEF_STMT (treeop1);
2635 	  if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2636 	    return false;
2637 
2638 	  treeop1 = treeop0;
2639 	}
2640 	if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2641 	  return false;
2642 
2643 	gcall *c = as_a<gcall*> (call0);
2644 	treeop0 = gimple_call_arg (c, 1);
2645 
2646 	gcall *call_stmt
2647 	  = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2648 	gimple_set_lhs (call_stmt, lhs);
2649 	gimple_set_location (call_stmt, loc);
2650 	gsi_replace (gsi, call_stmt, true);
2651 	return true;
2652     }
2653 
2654   return false;
2655 }
2656 
2657 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2658    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2659    value is true iff we converted the statement.  */
2660 
2661 static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)2662 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2663 {
2664   tree lhs, rhs1, rhs2, type, type1, type2;
2665   enum insn_code handler;
2666   scalar_int_mode to_mode, from_mode, actual_mode;
2667   optab op;
2668   int actual_precision;
2669   location_t loc = gimple_location (stmt);
2670   bool from_unsigned1, from_unsigned2;
2671 
2672   lhs = gimple_assign_lhs (stmt);
2673   type = TREE_TYPE (lhs);
2674   if (TREE_CODE (type) != INTEGER_TYPE)
2675     return false;
2676 
2677   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2678     return false;
2679 
2680   to_mode = SCALAR_INT_TYPE_MODE (type);
2681   from_mode = SCALAR_INT_TYPE_MODE (type1);
2682   if (to_mode == from_mode)
2683     return false;
2684 
2685   from_unsigned1 = TYPE_UNSIGNED (type1);
2686   from_unsigned2 = TYPE_UNSIGNED (type2);
2687 
2688   if (from_unsigned1 && from_unsigned2)
2689     op = umul_widen_optab;
2690   else if (!from_unsigned1 && !from_unsigned2)
2691     op = smul_widen_optab;
2692   else
2693     op = usmul_widen_optab;
2694 
2695   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2696 						  &actual_mode);
2697 
2698   if (handler == CODE_FOR_nothing)
2699     {
2700       if (op != smul_widen_optab)
2701 	{
2702 	  /* We can use a signed multiply with unsigned types as long as
2703 	     there is a wider mode to use, or it is the smaller of the two
2704 	     types that is unsigned.  Note that type1 >= type2, always.  */
2705 	  if ((TYPE_UNSIGNED (type1)
2706 	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2707 	      || (TYPE_UNSIGNED (type2)
2708 		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2709 	    {
2710 	      if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2711 		  || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2712 		return false;
2713 	    }
2714 
2715 	  op = smul_widen_optab;
2716 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2717 							  from_mode,
2718 							  &actual_mode);
2719 
2720 	  if (handler == CODE_FOR_nothing)
2721 	    return false;
2722 
2723 	  from_unsigned1 = from_unsigned2 = false;
2724 	}
2725       else
2726 	{
2727 	  /* Expand can synthesize smul_widen_optab if the target
2728 	     supports umul_widen_optab.  */
2729 	  op = umul_widen_optab;
2730 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2731 							  from_mode,
2732 							  &actual_mode);
2733 	  if (handler == CODE_FOR_nothing)
2734 	    return false;
2735 	}
2736     }
2737 
2738   /* Ensure that the inputs to the handler are in the correct precison
2739      for the opcode.  This will be the full mode size.  */
2740   actual_precision = GET_MODE_PRECISION (actual_mode);
2741   if (2 * actual_precision > TYPE_PRECISION (type))
2742     return false;
2743   if (actual_precision != TYPE_PRECISION (type1)
2744       || from_unsigned1 != TYPE_UNSIGNED (type1))
2745     rhs1 = build_and_insert_cast (gsi, loc,
2746 				  build_nonstandard_integer_type
2747 				    (actual_precision, from_unsigned1), rhs1);
2748   if (actual_precision != TYPE_PRECISION (type2)
2749       || from_unsigned2 != TYPE_UNSIGNED (type2))
2750     rhs2 = build_and_insert_cast (gsi, loc,
2751 				  build_nonstandard_integer_type
2752 				    (actual_precision, from_unsigned2), rhs2);
2753 
2754   /* Handle constants.  */
2755   if (TREE_CODE (rhs1) == INTEGER_CST)
2756     rhs1 = fold_convert (type1, rhs1);
2757   if (TREE_CODE (rhs2) == INTEGER_CST)
2758     rhs2 = fold_convert (type2, rhs2);
2759 
2760   gimple_assign_set_rhs1 (stmt, rhs1);
2761   gimple_assign_set_rhs2 (stmt, rhs2);
2762   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2763   update_stmt (stmt);
2764   widen_mul_stats.widen_mults_inserted++;
2765   return true;
2766 }
2767 
2768 /* Process a single gimple statement STMT, which is found at the
2769    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2770    rhs (given by CODE), and try to convert it into a
2771    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2772    is true iff we converted the statement.  */
2773 
2774 static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)2775 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2776 			    enum tree_code code)
2777 {
2778   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2779   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2780   tree type, type1, type2, optype;
2781   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2782   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2783   optab this_optab;
2784   enum tree_code wmult_code;
2785   enum insn_code handler;
2786   scalar_mode to_mode, from_mode, actual_mode;
2787   location_t loc = gimple_location (stmt);
2788   int actual_precision;
2789   bool from_unsigned1, from_unsigned2;
2790 
2791   lhs = gimple_assign_lhs (stmt);
2792   type = TREE_TYPE (lhs);
2793   if (TREE_CODE (type) != INTEGER_TYPE
2794       && TREE_CODE (type) != FIXED_POINT_TYPE)
2795     return false;
2796 
2797   if (code == MINUS_EXPR)
2798     wmult_code = WIDEN_MULT_MINUS_EXPR;
2799   else
2800     wmult_code = WIDEN_MULT_PLUS_EXPR;
2801 
2802   rhs1 = gimple_assign_rhs1 (stmt);
2803   rhs2 = gimple_assign_rhs2 (stmt);
2804 
2805   if (TREE_CODE (rhs1) == SSA_NAME)
2806     {
2807       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2808       if (is_gimple_assign (rhs1_stmt))
2809 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2810     }
2811 
2812   if (TREE_CODE (rhs2) == SSA_NAME)
2813     {
2814       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2815       if (is_gimple_assign (rhs2_stmt))
2816 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2817     }
2818 
2819   /* Allow for one conversion statement between the multiply
2820      and addition/subtraction statement.  If there are more than
2821      one conversions then we assume they would invalidate this
2822      transformation.  If that's not the case then they should have
2823      been folded before now.  */
2824   if (CONVERT_EXPR_CODE_P (rhs1_code))
2825     {
2826       conv1_stmt = rhs1_stmt;
2827       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2828       if (TREE_CODE (rhs1) == SSA_NAME)
2829 	{
2830 	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2831 	  if (is_gimple_assign (rhs1_stmt))
2832 	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2833 	}
2834       else
2835 	return false;
2836     }
2837   if (CONVERT_EXPR_CODE_P (rhs2_code))
2838     {
2839       conv2_stmt = rhs2_stmt;
2840       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2841       if (TREE_CODE (rhs2) == SSA_NAME)
2842 	{
2843 	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2844 	  if (is_gimple_assign (rhs2_stmt))
2845 	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2846 	}
2847       else
2848 	return false;
2849     }
2850 
2851   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2852      is_widening_mult_p, but we still need the rhs returns.
2853 
2854      It might also appear that it would be sufficient to use the existing
2855      operands of the widening multiply, but that would limit the choice of
2856      multiply-and-accumulate instructions.
2857 
2858      If the widened-multiplication result has more than one uses, it is
2859      probably wiser not to do the conversion.  Also restrict this operation
2860      to single basic block to avoid moving the multiply to a different block
2861      with a higher execution frequency.  */
2862   if (code == PLUS_EXPR
2863       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2864     {
2865       if (!has_single_use (rhs1)
2866 	  || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2867 	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2868 				  &type2, &mult_rhs2))
2869 	return false;
2870       add_rhs = rhs2;
2871       conv_stmt = conv1_stmt;
2872     }
2873   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2874     {
2875       if (!has_single_use (rhs2)
2876 	  || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2877 	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2878 				  &type2, &mult_rhs2))
2879 	return false;
2880       add_rhs = rhs1;
2881       conv_stmt = conv2_stmt;
2882     }
2883   else
2884     return false;
2885 
2886   to_mode = SCALAR_TYPE_MODE (type);
2887   from_mode = SCALAR_TYPE_MODE (type1);
2888   if (to_mode == from_mode)
2889     return false;
2890 
2891   from_unsigned1 = TYPE_UNSIGNED (type1);
2892   from_unsigned2 = TYPE_UNSIGNED (type2);
2893   optype = type1;
2894 
2895   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2896   if (from_unsigned1 != from_unsigned2)
2897     {
2898       if (!INTEGRAL_TYPE_P (type))
2899 	return false;
2900       /* We can use a signed multiply with unsigned types as long as
2901 	 there is a wider mode to use, or it is the smaller of the two
2902 	 types that is unsigned.  Note that type1 >= type2, always.  */
2903       if ((from_unsigned1
2904 	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2905 	  || (from_unsigned2
2906 	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2907 	{
2908 	  if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2909 	      || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2910 	    return false;
2911 	}
2912 
2913       from_unsigned1 = from_unsigned2 = false;
2914       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2915 					       false);
2916     }
2917 
2918   /* If there was a conversion between the multiply and addition
2919      then we need to make sure it fits a multiply-and-accumulate.
2920      The should be a single mode change which does not change the
2921      value.  */
2922   if (conv_stmt)
2923     {
2924       /* We use the original, unmodified data types for this.  */
2925       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2926       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2927       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2928       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2929 
2930       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2931 	{
2932 	  /* Conversion is a truncate.  */
2933 	  if (TYPE_PRECISION (to_type) < data_size)
2934 	    return false;
2935 	}
2936       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2937 	{
2938 	  /* Conversion is an extend.  Check it's the right sort.  */
2939 	  if (TYPE_UNSIGNED (from_type) != is_unsigned
2940 	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2941 	    return false;
2942 	}
2943       /* else convert is a no-op for our purposes.  */
2944     }
2945 
2946   /* Verify that the machine can perform a widening multiply
2947      accumulate in this mode/signedness combination, otherwise
2948      this transformation is likely to pessimize code.  */
2949   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2950   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2951 						  from_mode, &actual_mode);
2952 
2953   if (handler == CODE_FOR_nothing)
2954     return false;
2955 
2956   /* Ensure that the inputs to the handler are in the correct precison
2957      for the opcode.  This will be the full mode size.  */
2958   actual_precision = GET_MODE_PRECISION (actual_mode);
2959   if (actual_precision != TYPE_PRECISION (type1)
2960       || from_unsigned1 != TYPE_UNSIGNED (type1))
2961     mult_rhs1 = build_and_insert_cast (gsi, loc,
2962 				       build_nonstandard_integer_type
2963 				         (actual_precision, from_unsigned1),
2964 				       mult_rhs1);
2965   if (actual_precision != TYPE_PRECISION (type2)
2966       || from_unsigned2 != TYPE_UNSIGNED (type2))
2967     mult_rhs2 = build_and_insert_cast (gsi, loc,
2968 				       build_nonstandard_integer_type
2969 					 (actual_precision, from_unsigned2),
2970 				       mult_rhs2);
2971 
2972   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2973     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2974 
2975   /* Handle constants.  */
2976   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2977     mult_rhs1 = fold_convert (type1, mult_rhs1);
2978   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2979     mult_rhs2 = fold_convert (type2, mult_rhs2);
2980 
2981   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2982 				  add_rhs);
2983   update_stmt (gsi_stmt (*gsi));
2984   widen_mul_stats.maccs_inserted++;
2985   return true;
2986 }
2987 
2988 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2989    OP2 and which we know is used in statements that can be, together with the
2990    multiplication, converted to FMAs, perform the transformation.  */
2991 
2992 static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)2993 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2994 {
2995   tree type = TREE_TYPE (mul_result);
2996   gimple *use_stmt;
2997   imm_use_iterator imm_iter;
2998   gcall *fma_stmt;
2999 
3000   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3001     {
3002       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3003       tree addop, mulop1 = op1, result = mul_result;
3004       bool negate_p = false;
3005       gimple_seq seq = NULL;
3006 
3007       if (is_gimple_debug (use_stmt))
3008 	continue;
3009 
3010       if (is_gimple_assign (use_stmt)
3011 	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3012 	{
3013 	  result = gimple_assign_lhs (use_stmt);
3014 	  use_operand_p use_p;
3015 	  gimple *neguse_stmt;
3016 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3017 	  gsi_remove (&gsi, true);
3018 	  release_defs (use_stmt);
3019 
3020 	  use_stmt = neguse_stmt;
3021 	  gsi = gsi_for_stmt (use_stmt);
3022 	  negate_p = true;
3023 	}
3024 
3025       tree cond, else_value, ops[3];
3026       tree_code code;
3027       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3028 					      ops, &else_value))
3029 	gcc_unreachable ();
3030       addop = ops[0] == result ? ops[1] : ops[0];
3031 
3032       if (code == MINUS_EXPR)
3033 	{
3034 	  if (ops[0] == result)
3035 	    /* a * b - c -> a * b + (-c)  */
3036 	    addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3037 	  else
3038 	    /* a - b * c -> (-b) * c + a */
3039 	    negate_p = !negate_p;
3040 	}
3041 
3042       if (negate_p)
3043 	mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3044 
3045       if (seq)
3046 	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3047 
3048       if (cond)
3049 	fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3050 					       op2, addop, else_value);
3051       else
3052 	fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3053       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3054       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3055 								   use_stmt));
3056       gsi_replace (&gsi, fma_stmt, true);
3057       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3058 	 regardless of where the negation occurs.  */
3059       gimple *orig_stmt = gsi_stmt (gsi);
3060       if (fold_stmt (&gsi, follow_all_ssa_edges))
3061 	{
3062 	  if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3063 	    gcc_unreachable ();
3064 	  update_stmt (gsi_stmt (gsi));
3065 	}
3066 
3067       if (dump_file && (dump_flags & TDF_DETAILS))
3068 	{
3069 	  fprintf (dump_file, "Generated FMA ");
3070 	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3071 	  fprintf (dump_file, "\n");
3072 	}
3073 
3074       /* If the FMA result is negated in a single use, fold the negation
3075 	 too.  */
3076       orig_stmt = gsi_stmt (gsi);
3077       use_operand_p use_p;
3078       gimple *neg_stmt;
3079       if (is_gimple_call (orig_stmt)
3080 	  && gimple_call_internal_p (orig_stmt)
3081 	  && gimple_call_lhs (orig_stmt)
3082 	  && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3083 	  && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3084 	  && is_gimple_assign (neg_stmt)
3085 	  && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3086 	  && !stmt_could_throw_p (cfun, neg_stmt))
3087 	{
3088 	  gsi = gsi_for_stmt (neg_stmt);
3089 	  if (fold_stmt (&gsi, follow_all_ssa_edges))
3090 	    {
3091 	      if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3092 		gcc_unreachable ();
3093 	      update_stmt (gsi_stmt (gsi));
3094 	      if (dump_file && (dump_flags & TDF_DETAILS))
3095 		{
3096 		  fprintf (dump_file, "Folded FMA negation ");
3097 		  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3098 		  fprintf (dump_file, "\n");
3099 		}
3100 	    }
3101 	}
3102 
3103       widen_mul_stats.fmas_inserted++;
3104     }
3105 }
3106 
3107 /* Data necessary to perform the actual transformation from a multiplication
3108    and an addition to an FMA after decision is taken it should be done and to
3109    then delete the multiplication statement from the function IL.  */
3110 
3111 struct fma_transformation_info
3112 {
3113   gimple *mul_stmt;
3114   tree mul_result;
3115   tree op1;
3116   tree op2;
3117 };
3118 
3119 /* Structure containing the current state of FMA deferring, i.e. whether we are
3120    deferring, whether to continue deferring, and all data necessary to come
3121    back and perform all deferred transformations.  */
3122 
3123 class fma_deferring_state
3124 {
3125 public:
3126   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3127      do any deferring.  */
3128 
fma_deferring_state(bool perform_deferring)3129   fma_deferring_state (bool perform_deferring)
3130     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3131       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3132 
3133   /* List of FMA candidates for which we the transformation has been determined
3134      possible but we at this point in BB analysis we do not consider them
3135      beneficial.  */
3136   auto_vec<fma_transformation_info, 8> m_candidates;
3137 
3138   /* Set of results of multiplication that are part of an already deferred FMA
3139      candidates.  */
3140   hash_set<tree> m_mul_result_set;
3141 
3142   /* The PHI that supposedly feeds back result of a FMA to another over loop
3143      boundary.  */
3144   gphi *m_initial_phi;
3145 
3146   /* Result of the last produced FMA candidate or NULL if there has not been
3147      one.  */
3148   tree m_last_result;
3149 
3150   /* If true, deferring might still be profitable.  If false, transform all
3151      candidates and no longer defer.  */
3152   bool m_deferring_p;
3153 };
3154 
3155 /* Transform all deferred FMA candidates and mark STATE as no longer
3156    deferring.  */
3157 
3158 static void
cancel_fma_deferring(fma_deferring_state * state)3159 cancel_fma_deferring (fma_deferring_state *state)
3160 {
3161   if (!state->m_deferring_p)
3162     return;
3163 
3164   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3165     {
3166       if (dump_file && (dump_flags & TDF_DETAILS))
3167 	fprintf (dump_file, "Generating deferred FMA\n");
3168 
3169       const fma_transformation_info &fti = state->m_candidates[i];
3170       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3171 
3172       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3173       gsi_remove (&gsi, true);
3174       release_defs (fti.mul_stmt);
3175     }
3176   state->m_deferring_p = false;
3177 }
3178 
3179 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3180    Otherwise return NULL.  */
3181 
3182 static gphi *
result_of_phi(tree op)3183 result_of_phi (tree op)
3184 {
3185   if (TREE_CODE (op) != SSA_NAME)
3186     return NULL;
3187 
3188   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3189 }
3190 
3191 /* After processing statements of a BB and recording STATE, return true if the
3192    initial phi is fed by the last FMA candidate result ore one such result from
3193    previously processed BBs marked in LAST_RESULT_SET.  */
3194 
3195 static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)3196 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3197 				      hash_set<tree> *last_result_set)
3198 {
3199   ssa_op_iter iter;
3200   use_operand_p use;
3201   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3202     {
3203       tree t = USE_FROM_PTR (use);
3204       if (t == state->m_last_result
3205 	  || last_result_set->contains (t))
3206 	return true;
3207     }
3208 
3209   return false;
3210 }
3211 
3212 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3213    with uses in additions and subtractions to form fused multiply-add
3214    operations.  Returns true if successful and MUL_STMT should be removed.
3215    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3216    on MUL_COND, otherwise it is unconditional.
3217 
3218    If STATE indicates that we are deferring FMA transformation, that means
3219    that we do not produce FMAs for basic blocks which look like:
3220 
3221     <bb 6>
3222     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3223     _65 = _14 * _16;
3224     accumulator_66 = _65 + accumulator_111;
3225 
3226   or its unrolled version, i.e. with several FMA candidates that feed result
3227   of one into the addend of another.  Instead, we add them to a list in STATE
3228   and if we later discover an FMA candidate that is not part of such a chain,
3229   we go back and perform all deferred past candidates.  */
3230 
3231 static bool
3232 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3233 		     fma_deferring_state *state, tree mul_cond = NULL_TREE)
3234 {
3235   tree mul_result = gimple_get_lhs (mul_stmt);
3236   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3237      if the statement was left just for the side-effects.  */
3238   if (!mul_result)
3239     return false;
3240   tree type = TREE_TYPE (mul_result);
3241   gimple *use_stmt, *neguse_stmt;
3242   use_operand_p use_p;
3243   imm_use_iterator imm_iter;
3244 
3245   if (FLOAT_TYPE_P (type)
3246       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3247     return false;
3248 
3249   /* We don't want to do bitfield reduction ops.  */
3250   if (INTEGRAL_TYPE_P (type)
3251       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3252     return false;
3253 
3254   /* If the target doesn't support it, don't generate it.  We assume that
3255      if fma isn't available then fms, fnma or fnms are not either.  */
3256   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3257   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3258     return false;
3259 
3260   /* If the multiplication has zero uses, it is kept around probably because
3261      of -fnon-call-exceptions.  Don't optimize it away in that case,
3262      it is DCE job.  */
3263   if (has_zero_uses (mul_result))
3264     return false;
3265 
3266   bool check_defer
3267     = (state->m_deferring_p
3268        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3269 		    param_avoid_fma_max_bits));
3270   bool defer = check_defer;
3271   bool seen_negate_p = false;
3272   /* Make sure that the multiplication statement becomes dead after
3273      the transformation, thus that all uses are transformed to FMAs.
3274      This means we assume that an FMA operation has the same cost
3275      as an addition.  */
FOR_EACH_IMM_USE_FAST(use_p,imm_iter,mul_result)3276   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3277     {
3278       tree result = mul_result;
3279       bool negate_p = false;
3280 
3281       use_stmt = USE_STMT (use_p);
3282 
3283       if (is_gimple_debug (use_stmt))
3284 	continue;
3285 
3286       /* For now restrict this operations to single basic blocks.  In theory
3287 	 we would want to support sinking the multiplication in
3288 	 m = a*b;
3289 	 if ()
3290 	   ma = m + c;
3291 	 else
3292 	   d = m;
3293 	 to form a fma in the then block and sink the multiplication to the
3294 	 else block.  */
3295       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3296 	return false;
3297 
3298       /* A negate on the multiplication leads to FNMA.  */
3299       if (is_gimple_assign (use_stmt)
3300 	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3301 	{
3302 	  ssa_op_iter iter;
3303 	  use_operand_p usep;
3304 
3305 	  /* If (due to earlier missed optimizations) we have two
3306 	     negates of the same value, treat them as equivalent
3307 	     to a single negate with multiple uses.  */
3308 	  if (seen_negate_p)
3309 	    return false;
3310 
3311 	  result = gimple_assign_lhs (use_stmt);
3312 
3313 	  /* Make sure the negate statement becomes dead with this
3314 	     single transformation.  */
3315 	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
3316 			       &use_p, &neguse_stmt))
3317 	    return false;
3318 
3319 	  /* Make sure the multiplication isn't also used on that stmt.  */
3320 	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3321 	    if (USE_FROM_PTR (usep) == mul_result)
3322 	      return false;
3323 
3324 	  /* Re-validate.  */
3325 	  use_stmt = neguse_stmt;
3326 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3327 	    return false;
3328 
3329 	  negate_p = seen_negate_p = true;
3330 	}
3331 
3332       tree cond, else_value, ops[3];
3333       tree_code code;
3334       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3335 					      &else_value))
3336 	return false;
3337 
3338       switch (code)
3339 	{
3340 	case MINUS_EXPR:
3341 	  if (ops[1] == result)
3342 	    negate_p = !negate_p;
3343 	  break;
3344 	case PLUS_EXPR:
3345 	  break;
3346 	default:
3347 	  /* FMA can only be formed from PLUS and MINUS.  */
3348 	  return false;
3349 	}
3350 
3351       if (mul_cond && cond != mul_cond)
3352 	return false;
3353 
3354       if (cond)
3355 	{
3356 	  if (cond == result || else_value == result)
3357 	    return false;
3358 	  if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3359 	    return false;
3360 	}
3361 
3362       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3363 	 we'll visit later, we might be able to get a more profitable
3364 	 match with fnma.
3365 	 OTOH, if we don't, a negate / fma pair has likely lower latency
3366 	 that a mult / subtract pair.  */
3367       if (code == MINUS_EXPR
3368 	  && !negate_p
3369 	  && ops[0] == result
3370 	  && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3371 	  && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3372 	  && TREE_CODE (ops[1]) == SSA_NAME
3373 	  && has_single_use (ops[1]))
3374 	{
3375 	  gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3376 	  if (is_gimple_assign (stmt2)
3377 	      && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3378 	    return false;
3379 	}
3380 
3381       /* We can't handle a * b + a * b.  */
3382       if (ops[0] == ops[1])
3383 	return false;
3384       /* If deferring, make sure we are not looking at an instruction that
3385 	 wouldn't have existed if we were not.  */
3386       if (state->m_deferring_p
3387 	  && (state->m_mul_result_set.contains (ops[0])
3388 	      || state->m_mul_result_set.contains (ops[1])))
3389 	return false;
3390 
3391       if (check_defer)
3392 	{
3393 	  tree use_lhs = gimple_get_lhs (use_stmt);
3394 	  if (state->m_last_result)
3395 	    {
3396 	      if (ops[1] == state->m_last_result
3397 		  || ops[0] == state->m_last_result)
3398 		defer = true;
3399 	      else
3400 		defer = false;
3401 	    }
3402 	  else
3403 	    {
3404 	      gcc_checking_assert (!state->m_initial_phi);
3405 	      gphi *phi;
3406 	      if (ops[0] == result)
3407 		phi = result_of_phi (ops[1]);
3408 	      else
3409 		{
3410 		  gcc_assert (ops[1] == result);
3411 		  phi = result_of_phi (ops[0]);
3412 		}
3413 
3414 	      if (phi)
3415 		{
3416 		  state->m_initial_phi = phi;
3417 		  defer = true;
3418 		}
3419 	      else
3420 		defer = false;
3421 	    }
3422 
3423 	  state->m_last_result = use_lhs;
3424 	  check_defer = false;
3425 	}
3426       else
3427 	defer = false;
3428 
3429       /* While it is possible to validate whether or not the exact form that
3430 	 we've recognized is available in the backend, the assumption is that
3431 	 if the deferring logic above did not trigger, the transformation is
3432 	 never a loss.  For instance, suppose the target only has the plain FMA
3433 	 pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3434 	 MUL+SUB for FMA+NEG, which is still two operations.  Consider
3435          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3436 	 form the two NEGs are independent and could be run in parallel.  */
3437     }
3438 
3439   if (defer)
3440     {
3441       fma_transformation_info fti;
3442       fti.mul_stmt = mul_stmt;
3443       fti.mul_result = mul_result;
3444       fti.op1 = op1;
3445       fti.op2 = op2;
3446       state->m_candidates.safe_push (fti);
3447       state->m_mul_result_set.add (mul_result);
3448 
3449       if (dump_file && (dump_flags & TDF_DETAILS))
3450 	{
3451 	  fprintf (dump_file, "Deferred generating FMA for multiplication ");
3452 	  print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3453 	  fprintf (dump_file, "\n");
3454 	}
3455 
3456       return false;
3457     }
3458   else
3459     {
3460       if (state->m_deferring_p)
3461 	cancel_fma_deferring (state);
3462       convert_mult_to_fma_1 (mul_result, op1, op2);
3463       return true;
3464     }
3465 }
3466 
3467 
3468 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3469    a check for non-zero like:
3470    _1 = x_4(D) * y_5(D);
3471    *res_7(D) = _1;
3472    if (x_4(D) != 0)
3473      goto <bb 3>; [50.00%]
3474    else
3475      goto <bb 4>; [50.00%]
3476 
3477    <bb 3> [local count: 536870913]:
3478    _2 = _1 / x_4(D);
3479    _9 = _2 != y_5(D);
3480    _10 = (int) _9;
3481 
3482    <bb 4> [local count: 1073741824]:
3483    # iftmp.0_3 = PHI <_10(3), 0(2)>
3484    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3485    optimize the x_4(D) != 0 condition to 1.  */
3486 
3487 static void
maybe_optimize_guarding_check(vec<gimple * > & mul_stmts,gimple * cond_stmt,gimple * div_stmt,bool * cfg_changed)3488 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3489 			       gimple *div_stmt, bool *cfg_changed)
3490 {
3491   basic_block bb = gimple_bb (cond_stmt);
3492   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3493     return;
3494   edge pred_edge = single_pred_edge (bb);
3495   basic_block pred_bb = pred_edge->src;
3496   if (EDGE_COUNT (pred_bb->succs) != 2)
3497     return;
3498   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3499   edge other_succ_edge = NULL;
3500   if (gimple_code (cond_stmt) == GIMPLE_COND)
3501     {
3502       if (EDGE_COUNT (bb->succs) != 2)
3503 	return;
3504       other_succ_edge = EDGE_SUCC (bb, 0);
3505       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3506 	{
3507 	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3508 	    other_succ_edge = EDGE_SUCC (bb, 1);
3509 	}
3510       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3511 	other_succ_edge = EDGE_SUCC (bb, 0);
3512       if (other_edge->dest != other_succ_edge->dest)
3513 	return;
3514     }
3515   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3516     return;
3517   gimple *zero_cond = last_stmt (pred_bb);
3518   if (zero_cond == NULL
3519       || gimple_code (zero_cond) != GIMPLE_COND
3520       || (gimple_cond_code (zero_cond)
3521 	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3522       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3523     return;
3524   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3525   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3526     return;
3527   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3528     {
3529       /* Allow the divisor to be result of a same precision cast
3530 	 from zero_cond_lhs.  */
3531       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3532       if (TREE_CODE (rhs2) != SSA_NAME)
3533 	return;
3534       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3535       if (!gimple_assign_cast_p (g)
3536 	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3537 	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3538 	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3539 	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
3540 	return;
3541     }
3542   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3543   mul_stmts.quick_push (div_stmt);
3544   if (is_gimple_debug (gsi_stmt (gsi)))
3545     gsi_next_nondebug (&gsi);
3546   unsigned cast_count = 0;
3547   while (gsi_stmt (gsi) != cond_stmt)
3548     {
3549       /* If original mul_stmt has a single use, allow it in the same bb,
3550 	 we are looking then just at __builtin_mul_overflow_p.
3551 	 Though, in that case the original mul_stmt will be replaced
3552 	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3553       gimple *mul_stmt;
3554       unsigned int i;
3555       bool ok = false;
3556       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3557 	{
3558 	  if (gsi_stmt (gsi) == mul_stmt)
3559 	    {
3560 	      ok = true;
3561 	      break;
3562 	    }
3563 	}
3564       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3565 	ok = true;
3566       if (!ok)
3567 	return;
3568       gsi_next_nondebug (&gsi);
3569     }
3570   if (gimple_code (cond_stmt) == GIMPLE_COND)
3571     {
3572       basic_block succ_bb = other_edge->dest;
3573       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3574 	   gsi_next (&gpi))
3575 	{
3576 	  gphi *phi = gpi.phi ();
3577 	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3578 	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3579 	  if (!operand_equal_p (v1, v2, 0))
3580 	    return;
3581 	}
3582     }
3583   else
3584     {
3585       tree lhs = gimple_assign_lhs (cond_stmt);
3586       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3587 	return;
3588       gsi_next_nondebug (&gsi);
3589       if (!gsi_end_p (gsi))
3590 	{
3591 	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3592 	    return;
3593 	  gimple *cast_stmt = gsi_stmt (gsi);
3594 	  if (!gimple_assign_cast_p (cast_stmt))
3595 	    return;
3596 	  tree new_lhs = gimple_assign_lhs (cast_stmt);
3597 	  gsi_next_nondebug (&gsi);
3598 	  if (!gsi_end_p (gsi)
3599 	      || !new_lhs
3600 	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3601 	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3602 	    return;
3603 	  lhs = new_lhs;
3604 	}
3605       edge succ_edge = single_succ_edge (bb);
3606       basic_block succ_bb = succ_edge->dest;
3607       gsi = gsi_start_phis (succ_bb);
3608       if (gsi_end_p (gsi))
3609 	return;
3610       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3611       gsi_next (&gsi);
3612       if (!gsi_end_p (gsi))
3613 	return;
3614       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3615 	return;
3616       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3617       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3618 	{
3619 	  tree cond = gimple_assign_rhs1 (cond_stmt);
3620 	  if (TREE_CODE (cond) == NE_EXPR)
3621 	    {
3622 	      if (!operand_equal_p (other_val,
3623 				    gimple_assign_rhs3 (cond_stmt), 0))
3624 		return;
3625 	    }
3626 	  else if (!operand_equal_p (other_val,
3627 				     gimple_assign_rhs2 (cond_stmt), 0))
3628 	    return;
3629 	}
3630       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3631 	{
3632 	  if (!integer_zerop (other_val))
3633 	    return;
3634 	}
3635       else if (!integer_onep (other_val))
3636 	return;
3637     }
3638   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3639   if (pred_edge->flags & EDGE_TRUE_VALUE)
3640     gimple_cond_make_true (zero_gcond);
3641   else
3642     gimple_cond_make_false (zero_gcond);
3643   update_stmt (zero_cond);
3644   *cfg_changed = true;
3645 }
3646 
3647 /* Helper function for arith_overflow_check_p.  Return true
3648    if VAL1 is equal to VAL2 cast to corresponding integral type
3649    with other signedness or vice versa.  */
3650 
3651 static bool
arith_cast_equal_p(tree val1,tree val2)3652 arith_cast_equal_p (tree val1, tree val2)
3653 {
3654   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3655     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3656   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3657     return false;
3658   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3659       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3660     return true;
3661   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3662       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3663     return true;
3664   return false;
3665 }
3666 
3667 /* Helper function of match_arith_overflow.  Return 1
3668    if USE_STMT is unsigned overflow check ovf != 0 for
3669    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3670    and 0 otherwise.  */
3671 
3672 static int
arith_overflow_check_p(gimple * stmt,gimple * cast_stmt,gimple * & use_stmt,tree maxval,tree * other)3673 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3674 			tree maxval, tree *other)
3675 {
3676   enum tree_code ccode = ERROR_MARK;
3677   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3678   enum tree_code code = gimple_assign_rhs_code (stmt);
3679   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3680   tree rhs1 = gimple_assign_rhs1 (stmt);
3681   tree rhs2 = gimple_assign_rhs2 (stmt);
3682   tree multop = NULL_TREE, divlhs = NULL_TREE;
3683   gimple *cur_use_stmt = use_stmt;
3684 
3685   if (code == MULT_EXPR)
3686     {
3687       if (!is_gimple_assign (use_stmt))
3688 	return 0;
3689       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3690 	return 0;
3691       if (gimple_assign_rhs1 (use_stmt) != lhs)
3692 	return 0;
3693       if (cast_stmt)
3694 	{
3695 	  if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3696 	    multop = rhs2;
3697 	  else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3698 	    multop = rhs1;
3699 	  else
3700 	    return 0;
3701 	}
3702       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3703 	multop = rhs2;
3704       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3705 	multop = rhs1;
3706       else
3707 	return 0;
3708       if (stmt_ends_bb_p (use_stmt))
3709 	return 0;
3710       divlhs = gimple_assign_lhs (use_stmt);
3711       if (!divlhs)
3712 	return 0;
3713       use_operand_p use;
3714       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3715 	return 0;
3716     }
3717   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3718     {
3719       ccode = gimple_cond_code (cur_use_stmt);
3720       crhs1 = gimple_cond_lhs (cur_use_stmt);
3721       crhs2 = gimple_cond_rhs (cur_use_stmt);
3722     }
3723   else if (is_gimple_assign (cur_use_stmt))
3724     {
3725       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3726 	{
3727 	  ccode = gimple_assign_rhs_code (cur_use_stmt);
3728 	  crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3729 	  crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3730 	}
3731       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3732 	{
3733 	  tree cond = gimple_assign_rhs1 (cur_use_stmt);
3734 	  if (COMPARISON_CLASS_P (cond))
3735 	    {
3736 	      ccode = TREE_CODE (cond);
3737 	      crhs1 = TREE_OPERAND (cond, 0);
3738 	      crhs2 = TREE_OPERAND (cond, 1);
3739 	    }
3740 	  else
3741 	    return 0;
3742 	}
3743       else
3744 	return 0;
3745     }
3746   else
3747     return 0;
3748 
3749   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3750     return 0;
3751 
3752   switch (ccode)
3753     {
3754     case GT_EXPR:
3755     case LE_EXPR:
3756       if (maxval)
3757 	{
3758 	  /* r = a + b; r > maxval or r <= maxval  */
3759 	  if (crhs1 == lhs
3760 	      && TREE_CODE (crhs2) == INTEGER_CST
3761 	      && tree_int_cst_equal (crhs2, maxval))
3762 	    return ccode == GT_EXPR ? 1 : -1;
3763 	  break;
3764 	}
3765       /* r = a - b; r > a or r <= a
3766 	 r = a + b; a > r or a <= r or b > r or b <= r.  */
3767       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3768 	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3769 	      && crhs2 == lhs))
3770 	return ccode == GT_EXPR ? 1 : -1;
3771       /* r = ~a; b > r or b <= r.  */
3772       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3773 	{
3774 	  if (other)
3775 	    *other = crhs1;
3776 	  return ccode == GT_EXPR ? 1 : -1;
3777 	}
3778       break;
3779     case LT_EXPR:
3780     case GE_EXPR:
3781       if (maxval)
3782 	break;
3783       /* r = a - b; a < r or a >= r
3784 	 r = a + b; r < a or r >= a or r < b or r >= b.  */
3785       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3786 	  || (code == PLUS_EXPR && crhs1 == lhs
3787 	      && (crhs2 == rhs1 || crhs2 == rhs2)))
3788 	return ccode == LT_EXPR ? 1 : -1;
3789       /* r = ~a; r < b or r >= b.  */
3790       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3791 	{
3792 	  if (other)
3793 	    *other = crhs2;
3794 	  return ccode == LT_EXPR ? 1 : -1;
3795 	}
3796       break;
3797     case EQ_EXPR:
3798     case NE_EXPR:
3799       /* r = a * b; _1 = r / a; _1 == b
3800 	 r = a * b; _1 = r / b; _1 == a
3801 	 r = a * b; _1 = r / a; _1 != b
3802 	 r = a * b; _1 = r / b; _1 != a.  */
3803       if (code == MULT_EXPR)
3804 	{
3805 	  if (cast_stmt)
3806 	    {
3807 	      if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3808 		  || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3809 		{
3810 		  use_stmt = cur_use_stmt;
3811 		  return ccode == NE_EXPR ? 1 : -1;
3812 		}
3813 	    }
3814 	  else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3815 		   || (crhs2 == divlhs && crhs1 == multop))
3816 	    {
3817 	      use_stmt = cur_use_stmt;
3818 	      return ccode == NE_EXPR ? 1 : -1;
3819 	    }
3820 	}
3821       break;
3822     default:
3823       break;
3824     }
3825   return 0;
3826 }
3827 
3828 /* Recognize for unsigned x
3829    x = y - z;
3830    if (x > y)
3831    where there are other uses of x and replace it with
3832    _7 = .SUB_OVERFLOW (y, z);
3833    x = REALPART_EXPR <_7>;
3834    _8 = IMAGPART_EXPR <_7>;
3835    if (_8)
3836    and similarly for addition.
3837 
3838    Also recognize:
3839    yc = (type) y;
3840    zc = (type) z;
3841    x = yc + zc;
3842    if (x > max)
3843    where y and z have unsigned types with maximum max
3844    and there are other uses of x and all of those cast x
3845    back to that unsigned type and again replace it with
3846    _7 = .ADD_OVERFLOW (y, z);
3847    _9 = REALPART_EXPR <_7>;
3848    _8 = IMAGPART_EXPR <_7>;
3849    if (_8)
3850    and replace (utype) x with _9.
3851 
3852    Also recognize:
3853    x = ~z;
3854    if (y > x)
3855    and replace it with
3856    _7 = .ADD_OVERFLOW (y, z);
3857    _8 = IMAGPART_EXPR <_7>;
3858    if (_8)
3859 
3860    And also recognize:
3861    z = x * y;
3862    if (x != 0)
3863      goto <bb 3>; [50.00%]
3864    else
3865      goto <bb 4>; [50.00%]
3866 
3867    <bb 3> [local count: 536870913]:
3868    _2 = z / x;
3869    _9 = _2 != y;
3870    _10 = (int) _9;
3871 
3872    <bb 4> [local count: 1073741824]:
3873    # iftmp.0_3 = PHI <_10(3), 0(2)>
3874    and replace it with
3875    _7 = .MUL_OVERFLOW (x, y);
3876    z = IMAGPART_EXPR <_7>;
3877    _8 = IMAGPART_EXPR <_7>;
3878    _9 = _8 != 0;
3879    iftmp.0_3 = (int) _9;  */
3880 
3881 static bool
match_arith_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code,bool * cfg_changed)3882 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3883 		      enum tree_code code, bool *cfg_changed)
3884 {
3885   tree lhs = gimple_assign_lhs (stmt);
3886   tree type = TREE_TYPE (lhs);
3887   use_operand_p use_p;
3888   imm_use_iterator iter;
3889   bool use_seen = false;
3890   bool ovf_use_seen = false;
3891   gimple *use_stmt;
3892   gimple *add_stmt = NULL;
3893   bool add_first = false;
3894   gimple *cond_stmt = NULL;
3895   gimple *cast_stmt = NULL;
3896   tree cast_lhs = NULL_TREE;
3897 
3898   gcc_checking_assert (code == PLUS_EXPR
3899 		       || code == MINUS_EXPR
3900 		       || code == MULT_EXPR
3901 		       || code == BIT_NOT_EXPR);
3902   if (!INTEGRAL_TYPE_P (type)
3903       || !TYPE_UNSIGNED (type)
3904       || has_zero_uses (lhs)
3905       || (code != PLUS_EXPR
3906 	  && code != MULT_EXPR
3907 	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3908 			    TYPE_MODE (type)) == CODE_FOR_nothing))
3909     return false;
3910 
3911   tree rhs1 = gimple_assign_rhs1 (stmt);
3912   tree rhs2 = gimple_assign_rhs2 (stmt);
3913   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3914     {
3915       use_stmt = USE_STMT (use_p);
3916       if (is_gimple_debug (use_stmt))
3917 	continue;
3918 
3919       tree other = NULL_TREE;
3920       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3921 	{
3922 	  if (code == BIT_NOT_EXPR)
3923 	    {
3924 	      gcc_assert (other);
3925 	      if (TREE_CODE (other) != SSA_NAME)
3926 		return false;
3927 	      if (rhs2 == NULL)
3928 		rhs2 = other;
3929 	      else
3930 		return false;
3931 	      cond_stmt = use_stmt;
3932 	    }
3933 	  ovf_use_seen = true;
3934 	}
3935       else
3936 	{
3937 	  use_seen = true;
3938 	  if (code == MULT_EXPR
3939 	      && cast_stmt == NULL
3940 	      && gimple_assign_cast_p (use_stmt))
3941 	    {
3942 	      cast_lhs = gimple_assign_lhs (use_stmt);
3943 	      if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3944 		  && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3945 		  && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3946 		      == TYPE_PRECISION (TREE_TYPE (lhs))))
3947 		cast_stmt = use_stmt;
3948 	      else
3949 		cast_lhs = NULL_TREE;
3950 	    }
3951 	}
3952       if (ovf_use_seen && use_seen)
3953 	break;
3954     }
3955 
3956   if (!ovf_use_seen
3957       && code == MULT_EXPR
3958       && cast_stmt)
3959     {
3960       if (TREE_CODE (rhs1) != SSA_NAME
3961 	  || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3962 	return false;
3963       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3964 	{
3965 	  use_stmt = USE_STMT (use_p);
3966 	  if (is_gimple_debug (use_stmt))
3967 	    continue;
3968 
3969 	  if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3970 				      NULL_TREE, NULL))
3971 	    ovf_use_seen = true;
3972 	}
3973     }
3974   else
3975     {
3976       cast_stmt = NULL;
3977       cast_lhs = NULL_TREE;
3978     }
3979 
3980   tree maxval = NULL_TREE;
3981   if (!ovf_use_seen
3982       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3983       || (code == PLUS_EXPR
3984 	  && optab_handler (uaddv4_optab,
3985 			    TYPE_MODE (type)) == CODE_FOR_nothing)
3986       || (code == MULT_EXPR
3987 	  && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3988 			    TYPE_MODE (type)) == CODE_FOR_nothing))
3989     {
3990       if (code != PLUS_EXPR)
3991 	return false;
3992       if (TREE_CODE (rhs1) != SSA_NAME
3993 	  || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3994 	return false;
3995       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3996       tree type1 = TREE_TYPE (rhs1);
3997       if (!INTEGRAL_TYPE_P (type1)
3998 	  || !TYPE_UNSIGNED (type1)
3999 	  || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4000 	  || (TYPE_PRECISION (type1)
4001 	      != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4002 	return false;
4003       if (TREE_CODE (rhs2) == INTEGER_CST)
4004 	{
4005 	  if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4006 	  			    TYPE_PRECISION (type1),
4007 				    UNSIGNED), 0))
4008 	    return false;
4009 	  rhs2 = fold_convert (type1, rhs2);
4010 	}
4011       else
4012 	{
4013 	  if (TREE_CODE (rhs2) != SSA_NAME
4014 	      || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4015 	    return false;
4016 	  rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4017 	  tree type2 = TREE_TYPE (rhs2);
4018 	  if (!INTEGRAL_TYPE_P (type2)
4019 	      || !TYPE_UNSIGNED (type2)
4020 	      || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4021 	      || (TYPE_PRECISION (type2)
4022 		  != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4023 	    return false;
4024 	}
4025       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4026 	type = type1;
4027       else
4028 	type = TREE_TYPE (rhs2);
4029 
4030       if (TREE_CODE (type) != INTEGER_TYPE
4031 	  || optab_handler (uaddv4_optab,
4032 			    TYPE_MODE (type)) == CODE_FOR_nothing)
4033 	return false;
4034 
4035       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4036 						      UNSIGNED));
4037       ovf_use_seen = false;
4038       use_seen = false;
4039       basic_block use_bb = NULL;
4040       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4041 	{
4042 	  use_stmt = USE_STMT (use_p);
4043 	  if (is_gimple_debug (use_stmt))
4044 	    continue;
4045 
4046 	  if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4047 	    {
4048 	      ovf_use_seen = true;
4049 	      use_bb = gimple_bb (use_stmt);
4050 	    }
4051 	  else
4052 	    {
4053 	      if (!gimple_assign_cast_p (use_stmt)
4054 		  || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4055 		return false;
4056 	      tree use_lhs = gimple_assign_lhs (use_stmt);
4057 	      if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4058 		  || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4059 		      > TYPE_PRECISION (type)))
4060 		return false;
4061 	      use_seen = true;
4062 	    }
4063 	}
4064       if (!ovf_use_seen)
4065 	return false;
4066       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4067 	{
4068 	  if (!use_seen)
4069 	    return false;
4070 	  tree new_rhs1 = make_ssa_name (type);
4071 	  gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4072 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4073 	  rhs1 = new_rhs1;
4074 	}
4075       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4076 	{
4077 	  if (!use_seen)
4078 	    return false;
4079 	  tree new_rhs2 = make_ssa_name (type);
4080 	  gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4081 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4082 	  rhs2 = new_rhs2;
4083 	}
4084       else if (!use_seen)
4085 	{
4086 	  /* If there are no uses of the wider addition, check if
4087 	     forwprop has not created a narrower addition.
4088 	     Require it to be in the same bb as the overflow check.  */
4089 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4090 	    {
4091 	      use_stmt = USE_STMT (use_p);
4092 	      if (is_gimple_debug (use_stmt))
4093 		continue;
4094 
4095 	      if (use_stmt == stmt)
4096 		continue;
4097 
4098 	      if (!is_gimple_assign (use_stmt)
4099 		  || gimple_bb (use_stmt) != use_bb
4100 		  || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4101 		continue;
4102 
4103 	      if (gimple_assign_rhs1 (use_stmt) == rhs1)
4104 		{
4105 		  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4106 					rhs2, 0))
4107 		    continue;
4108 		}
4109 	      else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4110 		{
4111 		  if (gimple_assign_rhs1 (use_stmt) != rhs2)
4112 		    continue;
4113 		}
4114 	      else
4115 		continue;
4116 
4117 	      add_stmt = use_stmt;
4118 	      break;
4119 	    }
4120 	  if (add_stmt == NULL)
4121 	    return false;
4122 
4123 	  /* If stmt and add_stmt are in the same bb, we need to find out
4124 	     which one is earlier.  If they are in different bbs, we've
4125 	     checked add_stmt is in the same bb as one of the uses of the
4126 	     stmt lhs, so stmt needs to dominate add_stmt too.  */
4127 	  if (gimple_bb (stmt) == gimple_bb (add_stmt))
4128 	    {
4129 	      gimple_stmt_iterator gsif = *gsi;
4130 	      gimple_stmt_iterator gsib = *gsi;
4131 	      int i;
4132 	      /* Search both forward and backward from stmt and have a small
4133 		 upper bound.  */
4134 	      for (i = 0; i < 128; i++)
4135 		{
4136 		  if (!gsi_end_p (gsib))
4137 		    {
4138 		      gsi_prev_nondebug (&gsib);
4139 		      if (gsi_stmt (gsib) == add_stmt)
4140 			{
4141 			  add_first = true;
4142 			  break;
4143 			}
4144 		    }
4145 		  else if (gsi_end_p (gsif))
4146 		    break;
4147 		  if (!gsi_end_p (gsif))
4148 		    {
4149 		      gsi_next_nondebug (&gsif);
4150 		      if (gsi_stmt (gsif) == add_stmt)
4151 			break;
4152 		    }
4153 		}
4154 	      if (i == 128)
4155 		return false;
4156 	      if (add_first)
4157 		*gsi = gsi_for_stmt (add_stmt);
4158 	    }
4159 	}
4160     }
4161 
4162   if (code == BIT_NOT_EXPR)
4163     *gsi = gsi_for_stmt (cond_stmt);
4164 
4165   auto_vec<gimple *, 8> mul_stmts;
4166   if (code == MULT_EXPR && cast_stmt)
4167     {
4168       type = TREE_TYPE (cast_lhs);
4169       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4170       if (gimple_assign_cast_p (g)
4171 	  && useless_type_conversion_p (type,
4172 					TREE_TYPE (gimple_assign_rhs1 (g)))
4173 	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4174 	rhs1 = gimple_assign_rhs1 (g);
4175       else
4176 	{
4177 	  g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4178 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4179 	  rhs1 = gimple_assign_lhs (g);
4180 	  mul_stmts.quick_push (g);
4181 	}
4182       if (TREE_CODE (rhs2) == INTEGER_CST)
4183 	rhs2 = fold_convert (type, rhs2);
4184       else
4185 	{
4186 	  g = SSA_NAME_DEF_STMT (rhs2);
4187 	  if (gimple_assign_cast_p (g)
4188 	      && useless_type_conversion_p (type,
4189 					    TREE_TYPE (gimple_assign_rhs1 (g)))
4190 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4191 	    rhs2 = gimple_assign_rhs1 (g);
4192 	  else
4193 	    {
4194 	      g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4195 	      gsi_insert_before (gsi, g, GSI_SAME_STMT);
4196 	      rhs2 = gimple_assign_lhs (g);
4197 	      mul_stmts.quick_push (g);
4198 	    }
4199 	}
4200     }
4201   tree ctype = build_complex_type (type);
4202   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4203 					 ? IFN_MUL_OVERFLOW
4204 					 : code != MINUS_EXPR
4205 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4206 					 2, rhs1, rhs2);
4207   tree ctmp = make_ssa_name (ctype);
4208   gimple_call_set_lhs (g, ctmp);
4209   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4210   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4211   gassign *g2;
4212   if (code != BIT_NOT_EXPR)
4213     {
4214       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4215 				build1 (REALPART_EXPR, type, ctmp));
4216       if (maxval || cast_stmt)
4217 	{
4218 	  gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4219 	  if (add_first)
4220 	    *gsi = gsi_for_stmt (stmt);
4221 	}
4222       else
4223 	gsi_replace (gsi, g2, true);
4224       if (code == MULT_EXPR)
4225 	{
4226 	  mul_stmts.quick_push (g);
4227 	  mul_stmts.quick_push (g2);
4228 	  if (cast_stmt)
4229 	    {
4230 	      g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4231 	      gsi_replace (gsi, g2, true);
4232 	      mul_stmts.quick_push (g2);
4233 	    }
4234 	}
4235     }
4236   tree ovf = make_ssa_name (type);
4237   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4238 			    build1 (IMAGPART_EXPR, type, ctmp));
4239   if (code != BIT_NOT_EXPR)
4240     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4241   else
4242     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4243   if (code == MULT_EXPR)
4244     mul_stmts.quick_push (g2);
4245 
4246   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4247     {
4248       if (is_gimple_debug (use_stmt))
4249 	continue;
4250 
4251       gimple *orig_use_stmt = use_stmt;
4252       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4253 					    maxval, NULL);
4254       if (ovf_use == 0)
4255 	{
4256 	  gcc_assert (code != BIT_NOT_EXPR);
4257 	  if (maxval)
4258 	    {
4259 	      tree use_lhs = gimple_assign_lhs (use_stmt);
4260 	      gimple_assign_set_rhs1 (use_stmt, new_lhs);
4261 	      if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4262 					     TREE_TYPE (new_lhs)))
4263 		gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4264 	      update_stmt (use_stmt);
4265 	    }
4266 	  continue;
4267 	}
4268       if (gimple_code (use_stmt) == GIMPLE_COND)
4269 	{
4270 	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
4271 	  gimple_cond_set_lhs (cond_stmt, ovf);
4272 	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4273 	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4274 	}
4275       else
4276 	{
4277 	  gcc_checking_assert (is_gimple_assign (use_stmt));
4278 	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4279 	    {
4280 	      gimple_assign_set_rhs1 (use_stmt, ovf);
4281 	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4282 	      gimple_assign_set_rhs_code (use_stmt,
4283 					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4284 	    }
4285 	  else
4286 	    {
4287 	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4288 				   == COND_EXPR);
4289 	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4290 				  boolean_type_node, ovf,
4291 				  build_int_cst (type, 0));
4292 	      gimple_assign_set_rhs1 (use_stmt, cond);
4293 	    }
4294 	}
4295       update_stmt (use_stmt);
4296       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4297 	{
4298 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4299 	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4300 					 cfg_changed);
4301 	  gsi_remove (&gsi2, true);
4302 	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4303 	}
4304     }
4305   if (maxval)
4306     {
4307       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4308       gsi_remove (&gsi2, true);
4309       if (add_stmt)
4310 	{
4311 	  gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4312 					   new_lhs);
4313 	  gsi2 = gsi_for_stmt (add_stmt);
4314 	  gsi_replace (&gsi2, g, true);
4315 	}
4316     }
4317   else if (code == BIT_NOT_EXPR)
4318     {
4319       *gsi = gsi_for_stmt (stmt);
4320       gsi_remove (gsi, true);
4321       release_ssa_name (lhs);
4322       return true;
4323     }
4324   return false;
4325 }
4326 
4327 /* Return true if target has support for divmod.  */
4328 
4329 static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)4330 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4331 {
4332   /* If target supports hardware divmod insn, use it for divmod.  */
4333   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4334     return true;
4335 
4336   /* Check if libfunc for divmod is available.  */
4337   rtx libfunc = optab_libfunc (divmod_optab, mode);
4338   if (libfunc != NULL_RTX)
4339     {
4340       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4341 	 we don't want to use the libfunc even if it exists for given mode.  */
4342       machine_mode div_mode;
4343       FOR_EACH_MODE_FROM (div_mode, mode)
4344 	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4345 	  return false;
4346 
4347       return targetm.expand_divmod_libfunc != NULL;
4348     }
4349 
4350   return false;
4351 }
4352 
4353 /* Check if stmt is candidate for divmod transform.  */
4354 
4355 static bool
divmod_candidate_p(gassign * stmt)4356 divmod_candidate_p (gassign *stmt)
4357 {
4358   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4359   machine_mode mode = TYPE_MODE (type);
4360   optab divmod_optab, div_optab;
4361 
4362   if (TYPE_UNSIGNED (type))
4363     {
4364       divmod_optab = udivmod_optab;
4365       div_optab = udiv_optab;
4366     }
4367   else
4368     {
4369       divmod_optab = sdivmod_optab;
4370       div_optab = sdiv_optab;
4371     }
4372 
4373   tree op1 = gimple_assign_rhs1 (stmt);
4374   tree op2 = gimple_assign_rhs2 (stmt);
4375 
4376   /* Disable the transform if either is a constant, since division-by-constant
4377      may have specialized expansion.  */
4378   if (CONSTANT_CLASS_P (op1))
4379     return false;
4380 
4381   if (CONSTANT_CLASS_P (op2))
4382     {
4383       if (integer_pow2p (op2))
4384 	return false;
4385 
4386       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4387 	  && TYPE_PRECISION (type) <= BITS_PER_WORD)
4388 	return false;
4389 
4390       /* If the divisor is not power of 2 and the precision wider than
4391 	 HWI, expand_divmod punts on that, so in that case it is better
4392 	 to use divmod optab or libfunc.  Similarly if choose_multiplier
4393 	 might need pre/post shifts of BITS_PER_WORD or more.  */
4394     }
4395 
4396   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4397      expand using the [su]divv optabs.  */
4398   if (TYPE_OVERFLOW_TRAPS (type))
4399     return false;
4400 
4401   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4402     return false;
4403 
4404   return true;
4405 }
4406 
4407 /* This function looks for:
4408    t1 = a TRUNC_DIV_EXPR b;
4409    t2 = a TRUNC_MOD_EXPR b;
4410    and transforms it to the following sequence:
4411    complex_tmp = DIVMOD (a, b);
4412    t1 = REALPART_EXPR(a);
4413    t2 = IMAGPART_EXPR(b);
4414    For conditions enabling the transform see divmod_candidate_p().
4415 
4416    The pass has three parts:
4417    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4418       other trunc_div_expr and trunc_mod_expr stmts.
4419    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4420       to stmts vector.
4421    3) Insert DIVMOD call just before top_stmt and update entries in
4422       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4423       IMAGPART_EXPR for mod).  */
4424 
4425 static bool
convert_to_divmod(gassign * stmt)4426 convert_to_divmod (gassign *stmt)
4427 {
4428   if (stmt_can_throw_internal (cfun, stmt)
4429       || !divmod_candidate_p (stmt))
4430     return false;
4431 
4432   tree op1 = gimple_assign_rhs1 (stmt);
4433   tree op2 = gimple_assign_rhs2 (stmt);
4434 
4435   imm_use_iterator use_iter;
4436   gimple *use_stmt;
4437   auto_vec<gimple *> stmts;
4438 
4439   gimple *top_stmt = stmt;
4440   basic_block top_bb = gimple_bb (stmt);
4441 
4442   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4443      at-least stmt and possibly other trunc_div/trunc_mod stmts
4444      having same operands as stmt.  */
4445 
4446   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4447     {
4448       if (is_gimple_assign (use_stmt)
4449 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4450 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4451 	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4452 	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4453 	{
4454 	  if (stmt_can_throw_internal (cfun, use_stmt))
4455 	    continue;
4456 
4457 	  basic_block bb = gimple_bb (use_stmt);
4458 
4459 	  if (bb == top_bb)
4460 	    {
4461 	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4462 		top_stmt = use_stmt;
4463 	    }
4464 	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4465 	    {
4466 	      top_bb = bb;
4467 	      top_stmt = use_stmt;
4468 	    }
4469 	}
4470     }
4471 
4472   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4473   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4474 
4475   stmts.safe_push (top_stmt);
4476   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4477 
4478   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4479      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4480      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4481      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4482 
4483   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4484     {
4485       if (is_gimple_assign (use_stmt)
4486 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4487 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4488 	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4489 	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4490 	{
4491 	  if (use_stmt == top_stmt
4492 	      || stmt_can_throw_internal (cfun, use_stmt)
4493 	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4494 	    continue;
4495 
4496 	  stmts.safe_push (use_stmt);
4497 	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4498 	    div_seen = true;
4499 	}
4500     }
4501 
4502   if (!div_seen)
4503     return false;
4504 
4505   /* Part 3: Create libcall to internal fn DIVMOD:
4506      divmod_tmp = DIVMOD (op1, op2).  */
4507 
4508   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4509   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4510 				 call_stmt, "divmod_tmp");
4511   gimple_call_set_lhs (call_stmt, res);
4512   /* We rejected throwing statements above.  */
4513   gimple_call_set_nothrow (call_stmt, true);
4514 
4515   /* Insert the call before top_stmt.  */
4516   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4517   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4518 
4519   widen_mul_stats.divmod_calls_inserted++;
4520 
4521   /* Update all statements in stmts vector:
4522      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4523      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4524 
4525   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4526     {
4527       tree new_rhs;
4528 
4529       switch (gimple_assign_rhs_code (use_stmt))
4530 	{
4531 	  case TRUNC_DIV_EXPR:
4532 	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4533 	    break;
4534 
4535 	  case TRUNC_MOD_EXPR:
4536 	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4537 	    break;
4538 
4539 	  default:
4540 	    gcc_unreachable ();
4541 	}
4542 
4543       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4544       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4545       update_stmt (use_stmt);
4546     }
4547 
4548   return true;
4549 }
4550 
4551 /* Find integer multiplications where the operands are extended from
4552    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4553    where appropriate.  */
4554 
4555 namespace {
4556 
4557 const pass_data pass_data_optimize_widening_mul =
4558 {
4559   GIMPLE_PASS, /* type */
4560   "widening_mul", /* name */
4561   OPTGROUP_NONE, /* optinfo_flags */
4562   TV_TREE_WIDEN_MUL, /* tv_id */
4563   PROP_ssa, /* properties_required */
4564   0, /* properties_provided */
4565   0, /* properties_destroyed */
4566   0, /* todo_flags_start */
4567   TODO_update_ssa, /* todo_flags_finish */
4568 };
4569 
4570 class pass_optimize_widening_mul : public gimple_opt_pass
4571 {
4572 public:
pass_optimize_widening_mul(gcc::context * ctxt)4573   pass_optimize_widening_mul (gcc::context *ctxt)
4574     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4575   {}
4576 
4577   /* opt_pass methods: */
gate(function *)4578   virtual bool gate (function *)
4579     {
4580       return flag_expensive_optimizations && optimize;
4581     }
4582 
4583   virtual unsigned int execute (function *);
4584 
4585 }; // class pass_optimize_widening_mul
4586 
4587 /* Walker class to perform the transformation in reverse dominance order. */
4588 
4589 class math_opts_dom_walker : public dom_walker
4590 {
4591 public:
4592   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4593      if walking modidifes the CFG.  */
4594 
math_opts_dom_walker(bool * cfg_changed_p)4595   math_opts_dom_walker (bool *cfg_changed_p)
4596     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4597       m_cfg_changed_p (cfg_changed_p) {}
4598 
4599   /* The actual actions performed in the walk.  */
4600 
4601   virtual void after_dom_children (basic_block);
4602 
4603   /* Set of results of chains of multiply and add statement combinations that
4604      were not transformed into FMAs because of active deferring.  */
4605   hash_set<tree> m_last_result_set;
4606 
4607   /* Pointer to a flag of the user that needs to be set if CFG has been
4608      modified.  */
4609   bool *m_cfg_changed_p;
4610 };
4611 
4612 void
after_dom_children(basic_block bb)4613 math_opts_dom_walker::after_dom_children (basic_block bb)
4614 {
4615   gimple_stmt_iterator gsi;
4616 
4617   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4618 
4619   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4620     {
4621       gimple *stmt = gsi_stmt (gsi);
4622       enum tree_code code;
4623 
4624       if (is_gimple_assign (stmt))
4625 	{
4626 	  code = gimple_assign_rhs_code (stmt);
4627 	  switch (code)
4628 	    {
4629 	    case MULT_EXPR:
4630 	      if (!convert_mult_to_widen (stmt, &gsi)
4631 		  && !convert_expand_mult_copysign (stmt, &gsi)
4632 		  && convert_mult_to_fma (stmt,
4633 					  gimple_assign_rhs1 (stmt),
4634 					  gimple_assign_rhs2 (stmt),
4635 					  &fma_state))
4636 		{
4637 		  gsi_remove (&gsi, true);
4638 		  release_defs (stmt);
4639 		  continue;
4640 		}
4641 	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4642 	      break;
4643 
4644 	    case PLUS_EXPR:
4645 	    case MINUS_EXPR:
4646 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
4647 		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4648 	      break;
4649 
4650 	    case BIT_NOT_EXPR:
4651 	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4652 		continue;
4653 	      break;
4654 
4655 	    case TRUNC_MOD_EXPR:
4656 	      convert_to_divmod (as_a<gassign *> (stmt));
4657 	      break;
4658 
4659 	    default:;
4660 	    }
4661 	}
4662       else if (is_gimple_call (stmt))
4663 	{
4664 	  switch (gimple_call_combined_fn (stmt))
4665 	    {
4666 	    CASE_CFN_POW:
4667 	      if (gimple_call_lhs (stmt)
4668 		  && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4669 		  && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4670 				 &dconst2)
4671 		  && convert_mult_to_fma (stmt,
4672 					  gimple_call_arg (stmt, 0),
4673 					  gimple_call_arg (stmt, 0),
4674 					  &fma_state))
4675 		{
4676 		  unlink_stmt_vdef (stmt);
4677 		  if (gsi_remove (&gsi, true)
4678 		      && gimple_purge_dead_eh_edges (bb))
4679 		    *m_cfg_changed_p = true;
4680 		  release_defs (stmt);
4681 		  continue;
4682 		}
4683 	      break;
4684 
4685 	    case CFN_COND_MUL:
4686 	      if (convert_mult_to_fma (stmt,
4687 				       gimple_call_arg (stmt, 1),
4688 				       gimple_call_arg (stmt, 2),
4689 				       &fma_state,
4690 				       gimple_call_arg (stmt, 0)))
4691 
4692 		{
4693 		  gsi_remove (&gsi, true);
4694 		  release_defs (stmt);
4695 		  continue;
4696 		}
4697 	      break;
4698 
4699 	    case CFN_LAST:
4700 	      cancel_fma_deferring (&fma_state);
4701 	      break;
4702 
4703 	    default:
4704 	      break;
4705 	    }
4706 	}
4707       gsi_next (&gsi);
4708     }
4709   if (fma_state.m_deferring_p
4710       && fma_state.m_initial_phi)
4711     {
4712       gcc_checking_assert (fma_state.m_last_result);
4713       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4714 						 &m_last_result_set))
4715 	cancel_fma_deferring (&fma_state);
4716       else
4717 	m_last_result_set.add (fma_state.m_last_result);
4718     }
4719 }
4720 
4721 
4722 unsigned int
execute(function * fun)4723 pass_optimize_widening_mul::execute (function *fun)
4724 {
4725   bool cfg_changed = false;
4726 
4727   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4728   calculate_dominance_info (CDI_DOMINATORS);
4729   renumber_gimple_stmt_uids (cfun);
4730 
4731   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4732 
4733   statistics_counter_event (fun, "widening multiplications inserted",
4734 			    widen_mul_stats.widen_mults_inserted);
4735   statistics_counter_event (fun, "widening maccs inserted",
4736 			    widen_mul_stats.maccs_inserted);
4737   statistics_counter_event (fun, "fused multiply-adds inserted",
4738 			    widen_mul_stats.fmas_inserted);
4739   statistics_counter_event (fun, "divmod calls inserted",
4740 			    widen_mul_stats.divmod_calls_inserted);
4741 
4742   return cfg_changed ? TODO_cleanup_cfg : 0;
4743 }
4744 
4745 } // anon namespace
4746 
4747 gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)4748 make_pass_optimize_widening_mul (gcc::context *ctxt)
4749 {
4750   return new pass_optimize_widening_mul (ctxt);
4751 }
4752